pax_global_header00006660000000000000000000000064141072215550014514gustar00rootroot0000000000000052 comment=f8731f89d581b9839cdbdd5dec256f78d3d788f1 ReadStat-1.1.7/000077500000000000000000000000001410722155500132315ustar00rootroot00000000000000ReadStat-1.1.7/.github/000077500000000000000000000000001410722155500145715ustar00rootroot00000000000000ReadStat-1.1.7/.github/workflows/000077500000000000000000000000001410722155500166265ustar00rootroot00000000000000ReadStat-1.1.7/.github/workflows/build.yml000066400000000000000000000034601410722155500204530ustar00rootroot00000000000000name: build on: [ push, pull_request ] jobs: linux: runs-on: ubuntu-latest strategy: matrix: compiler: [ clang, gcc, gcc-8, gcc-9, gcc-10 ] steps: - name: Add repository run: sudo apt-add-repository -y "ppa:ubuntu-toolchain-r/test" - name: Install packages run: sudo apt install gettext gcc-8 gcc-9 gcc-10 - uses: actions/checkout@v2 - name: Autoconf run: autoreconf -i -f - name: Configure run: ./configure env: CC: ${{ matrix.compiler }} - name: Make run: make - name: Run tests run: make check - name: Generate corpus run: make generate_corpus macos: runs-on: macos-latest strategy: matrix: compiler: [ clang, gcc ] steps: - name: Install packages run: brew install automake - uses: actions/checkout@v2 - name: Autoconf run: autoreconf -i -f - name: Configure run: ./configure env: CC: ${{ matrix.compiler }} - name: Make run: make - name: Run tests run: make check - name: Generate corpus run: make generate_corpus windows: runs-on: windows-latest env: MSYSTEM: MINGW64 steps: - uses: actions/checkout@v2 - name: Autoconf run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && autoreconf -i -f' - name: Configure run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && ./configure' - name: Make run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && make' - name: Test run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && make check' - name: Log run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && cat ./test-suite.log' ReadStat-1.1.7/.github/workflows/codecov.yml000066400000000000000000000013011410722155500207660ustar00rootroot00000000000000name: Codecov on: [ push, pull_request ] jobs: codecov: runs-on: ubuntu-latest steps: - name: Install packages run: sudo apt install gettext - uses: actions/checkout@v2 - name: Autoconf run: autoreconf -i -f - name: Configure run: ./configure --enable-code-coverage - name: Make run: make - name: Run tests run: make check - name: Generate corpus run: make generate_corpus - name: Move coverage files run: mv src/.libs/*.gc* . - name: Move more coverage files run: mv src/*/.libs/*.gc* . - name: Upload coverage files run: bash <(curl -s https://codecov.io/bash) ReadStat-1.1.7/.github/workflows/fuzz.yml000066400000000000000000000011351410722155500203470ustar00rootroot00000000000000name: CIFuzz on: [pull_request] jobs: Fuzzing: runs-on: ubuntu-latest steps: - name: Build Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master with: oss-fuzz-project-name: 'readstat' dry-run: false - name: Run Fuzzers uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master with: oss-fuzz-project-name: 'readstat' fuzz-seconds: 600 dry-run: false - name: Upload Crash uses: actions/upload-artifact@v1 if: failure() with: name: artifacts path: ./out/artifacts ReadStat-1.1.7/.gitignore000066400000000000000000000010621410722155500152200ustar00rootroot00000000000000.Rproj.user .Rhistory .RData *.Rproj **/.deps **/.libs **/.dirstamp **/*.lo **/*.o aclocal.m4 compile config.guess config.log config.status config.sub configure depcomp install-sh libreadstat.la libtool ltmain.sh Makefile Makefile.in missing readstat test-driver autom4te.cache/* output.* test-suite.log test_readstat *.log *.trs test_csv_to_dta* .vscode/ *.swp dev/ tmp.* test_dta_days prof_output gmon.out gen.json extract_metadata test_sav_date test_double_decimals generate_corpus *.csv *.json *.py *.tar.gz *.zip .DS_Store **/*.vcxproj.user Debug/ Release/ ReadStat-1.1.7/LICENSE000066400000000000000000000021031410722155500142320ustar00rootroot00000000000000Copyright (c) 2013-2016 Evan Miller (except where otherwise noted) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ReadStat-1.1.7/Makefile.am000066400000000000000000000327721410722155500153000ustar00rootroot00000000000000AUTOMAKE_OPTIONS = foreign dist-zip lib_LTLIBRARIES = libreadstat.la libreadstat_la_SOURCES = \ src/CKHashTable.c \ src/readstat_bits.c \ src/readstat_convert.c \ src/readstat_error.c \ src/readstat_io_unistd.c \ src/readstat_malloc.c \ src/readstat_metadata.c \ src/readstat_parser.c \ src/readstat_value.c \ src/readstat_variable.c \ src/readstat_writer.c \ src/sas/ieee.c \ src/sas/readstat_sas.c \ src/sas/readstat_sas7bcat_read.c \ src/sas/readstat_sas7bcat_write.c \ src/sas/readstat_sas7bdat_read.c \ src/sas/readstat_sas7bdat_write.c \ src/sas/readstat_sas_rle.c \ src/sas/readstat_xport.c \ src/sas/readstat_xport_read.c \ src/sas/readstat_xport_write.c \ src/spss/readstat_por.c \ src/spss/readstat_por_parse.c \ src/spss/readstat_por_read.c \ src/spss/readstat_por_write.c \ src/spss/readstat_sav.c \ src/spss/readstat_sav_compress.c \ src/spss/readstat_sav_parse.c \ src/spss/readstat_sav_parse_timestamp.c \ src/spss/readstat_sav_read.c \ src/spss/readstat_sav_write.c \ src/spss/readstat_spss.c \ src/spss/readstat_spss_parse.c \ src/stata/readstat_dta.c \ src/stata/readstat_dta_parse_timestamp.c \ src/stata/readstat_dta_read.c \ src/stata/readstat_dta_write.c \ src/txt/commands_util.c \ src/txt/readstat_copy.c \ src/txt/readstat_sas_commands_read.c \ src/txt/readstat_spss_commands_read.c \ src/txt/readstat_schema.c \ src/txt/readstat_stata_dictionary_read.c \ src/txt/readstat_txt_read.c if HAVE_ZLIB libreadstat_la_SOURCES += \ src/spss/readstat_zsav_compress.c \ src/spss/readstat_zsav_read.c \ src/spss/readstat_zsav_write.c endif if HAVE_RAGEL .rl.c: $(AM_V_GEN)$(RAGEL) $(RAGELFLAGS) $< -o $@ endif libreadstat_la_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 libreadstat_la_LIBADD = @EXTRA_LIBS@ libreadstat_la_LDFLAGS = @EXTRA_LDFLAGS@ -version-info @READSTAT_VERSION_INFO@ if HAVE_ZLIB libreadstat_la_LIBADD += -lz libreadstat_la_CFLAGS += -DHAVE_ZLIB=1 endif if CODE_COVERAGE_ENABLED libreadstat_la_CFLAGS += -O0 -fprofile-arcs -ftest-coverage endif if FUZZER_ENABLED libreadstat_la_CFLAGS += -fsanitize=fuzzer-no-link libreadstat_la_LDFLAGS += -fsanitize=fuzzer endif dist_man1_MANS = man/readstat.man man/extract_metadata.man include_HEADERS = src/readstat.h noinst_HEADERS = \ src/CKHashTable.h \ src/readstat_bits.h \ src/readstat_convert.h \ src/readstat_iconv.h \ src/readstat_io_unistd.h \ src/readstat_malloc.h \ src/readstat_strings.h \ src/readstat_writer.h \ src/sas/ieee.h \ src/sas/readstat_sas.h \ src/sas/readstat_sas_rle.h \ src/sas/readstat_xport.h \ src/spss/readstat_por.h \ src/spss/readstat_por_parse.h \ src/spss/readstat_sav.h \ src/spss/readstat_sav_compress.h \ src/spss/readstat_sav_parse.h \ src/spss/readstat_sav_parse_timestamp.h \ src/spss/readstat_spss.h \ src/spss/readstat_spss_parse.h \ src/spss/readstat_zsav_compress.h \ src/spss/readstat_zsav_read.h \ src/spss/readstat_zsav_write.h \ src/stata/readstat_dta.h \ src/stata/readstat_dta_parse_timestamp.h \ src/txt/commands_util.h \ src/txt/readstat_copy.h \ src/txt/readstat_schema.h \ src/bin/extract_metadata.h \ src/bin/read_csv/csv_metadata.h \ src/bin/read_csv/jsmn.h \ src/bin/read_csv/json_metadata.h \ src/bin/read_csv/mod_csv.h \ src/bin/read_csv/mod_dta.h \ src/bin/read_csv/mod_sav.h \ src/bin/read_csv/read_csv.h \ src/bin/read_csv/read_module.h \ src/bin/read_csv/value.h \ src/bin/write/double_decimals.h \ src/bin/write/json/write_missing_values.h \ src/bin/write/json/write_value_labels.h \ src/bin/write/mod_csv.h \ src/bin/write/mod_readstat.h \ src/bin/write/mod_xlsx.h \ src/bin/write/module.h \ src/bin/write/module_util.h \ src/bin/util/main.h \ src/bin/util/file_format.h \ src/bin/util/quote_and_escape.h \ src/bin/util/readstat_dta_days.h \ src/bin/util/readstat_sav_date.h \ src/fuzz/fuzz_format.h \ src/test/test_buffer.h \ src/test/test_buffer_io.h \ src/test/test_dta.h \ src/test/test_error.h \ src/test/test_list.h \ src/test/test_read.h \ src/test/test_readstat.h \ src/test/test_sas.h \ src/test/test_sav.h \ src/test/test_types.h \ src/test/test_write.h bin_PROGRAMS = \ extract_metadata \ readstat readstat_SOURCES = \ src/bin/readstat.c \ src/bin/read_csv/jsmn.c \ src/bin/read_csv/json_metadata.c \ src/bin/read_csv/mod_csv.c \ src/bin/read_csv/mod_dta.c \ src/bin/read_csv/mod_sav.c \ src/bin/read_csv/value.c \ src/bin/write/double_decimals.c \ src/bin/write/mod_csv.c \ src/bin/write/mod_readstat.c \ src/bin/write/module_util.c \ src/bin/util/file_format.c \ src/bin/util/quote_and_escape.c \ src/bin/util/readstat_dta_days.c \ src/bin/util/readstat_sav_date.c readstat_LDADD = libreadstat.la readstat_CFLAGS = -DREADSTAT_VERSION=\"@READSTAT_VERSION@\" -Wall -Werror -pedantic-errors -std=c99 if HAVE_ZLIB readstat_CFLAGS += -DHAVE_ZLIB=1 endif extract_metadata_SOURCES = \ src/bin/extract_metadata.c \ src/bin/write/json/write_missing_values.c \ src/bin/write/json/write_value_labels.c \ src/bin/util/file_format.c \ src/bin/util/quote_and_escape.c \ src/bin/util/readstat_dta_days.c \ src/bin/util/readstat_sav_date.c extract_metadata_LDADD = libreadstat.la extract_metadata_CFLAGS = -Wall -Werror -pedantic-errors -std=c99 if HAVE_XLSXWRITER readstat_SOURCES += src/bin/write/mod_xlsx.c readstat_LDADD += -lxlsxwriter readstat_CFLAGS += -DHAVE_XLSXWRITER=1 endif if HAVE_CSVREADER readstat_SOURCES += src/bin/read_csv/read_csv.c readstat_LDADD += -lcsv readstat_CFLAGS += -DHAVE_CSVREADER=1 endif check_PROGRAMS = \ test_readstat \ test_dta_days \ test_sav_date \ test_double_decimals test_readstat_SOURCES = \ src/test/test_buffer.c \ src/test/test_buffer_io.c \ src/test/test_dta.c \ src/test/test_error.c \ src/test/test_read.c \ src/test/test_readstat.c \ src/test/test_sas.c \ src/test/test_sav.c \ src/test/test_write.c test_readstat_LDADD = libreadstat.la test_readstat_CFLAGS = -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 -DDEBUG=1 test_dta_days_SOURCES = \ src/bin/util/readstat_dta_days.c \ src/test/test_dta_days.c test_dta_days_LDADD = libreadstat.la test_dta_days_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 test_sav_date_SOURCES = \ src/bin/util/readstat_sav_date.c \ src/test/test_sav_date.c test_sav_date_LDADD = libreadstat.la test_sav_date_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 test_double_decimals_SOURCES = \ src/bin/write/double_decimals.c \ src/test/test_double_decimals.c test_double_decimals_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 TESTS = test_readstat test_dta_days test_sav_date test_double_decimals EXTRA_PROGRAMS = \ generate_corpus generate_corpus_SOURCES = \ src/fuzz/generate_corpus.c \ src/test/test_buffer.c \ src/test/test_write.c \ src/test/test_read.c \ src/test/test_buffer_io.c \ src/test/test_error.c \ src/test/test_dta.c \ src/test/test_sas.c \ src/test/test_sav.c generate_corpus_LDADD = libreadstat.la generate_corpus_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 EXTRA_PROGRAMS += \ fuzz_compression_sas_rle \ fuzz_compression_sav \ fuzz_format_dta \ fuzz_format_por \ fuzz_format_sav \ fuzz_format_sas7bcat \ fuzz_format_sas7bdat \ fuzz_format_xport \ fuzz_format_sas_commands \ fuzz_format_spss_commands \ fuzz_format_stata_dictionary \ fuzz_grammar_dta_timestamp \ fuzz_grammar_por_double \ fuzz_grammar_sav_date \ fuzz_grammar_sav_time \ fuzz_grammar_spss_format # Force C++ linking for fuzz targets nodist_EXTRA_fuzz_compression_sas_rle_SOURCES = dummy.cxx nodist_EXTRA_fuzz_compression_sav_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_dta_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_por_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_sav_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_sas7bcat_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_sas7bdat_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_xport_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_sas_commands_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_spss_commands_SOURCES = dummy.cxx nodist_EXTRA_fuzz_format_stata_dictionary_SOURCES = dummy.cxx nodist_EXTRA_fuzz_grammar_dta_timestamp_SOURCES = dummy.cxx nodist_EXTRA_fuzz_grammar_por_double_SOURCES = dummy.cxx nodist_EXTRA_fuzz_grammar_sav_date_SOURCES = dummy.cxx nodist_EXTRA_fuzz_grammar_sav_time_SOURCES = dummy.cxx nodist_EXTRA_fuzz_grammar_spss_format_SOURCES = dummy.cxx fuzz_grammar_dta_timestamp_SOURCES = \ src/fuzz/fuzz_grammar_dta_timestamp.c fuzz_grammar_dta_timestamp_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_grammar_dta_timestamp_LDFLAGS = -static fuzz_grammar_dta_timestamp_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_grammar_por_double_SOURCES = \ src/fuzz/fuzz_grammar_por_double.c fuzz_grammar_por_double_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_grammar_por_double_LDFLAGS = -static fuzz_grammar_por_double_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_grammar_sav_date_SOURCES = \ src/fuzz/fuzz_grammar_sav_date.c fuzz_grammar_sav_date_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_grammar_sav_date_LDFLAGS = -static fuzz_grammar_sav_date_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_grammar_sav_time_SOURCES = \ src/fuzz/fuzz_grammar_sav_time.c fuzz_grammar_sav_time_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_grammar_sav_time_LDFLAGS = -static fuzz_grammar_sav_time_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_grammar_spss_format_SOURCES = \ src/fuzz/fuzz_grammar_spss_format.c fuzz_grammar_spss_format_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_grammar_spss_format_LDFLAGS = -static fuzz_grammar_spss_format_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_dta_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_dta.c \ src/test/test_buffer_io.c fuzz_format_dta_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_dta_LDFLAGS = -static fuzz_format_dta_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_por_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_por.c \ src/test/test_buffer_io.c fuzz_format_por_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_por_LDFLAGS = -static fuzz_format_por_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_sas7bcat_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_sas7bcat.c \ src/test/test_buffer_io.c fuzz_format_sas7bcat_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_sas7bcat_LDFLAGS = -static fuzz_format_sas7bcat_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_sas7bdat_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_sas7bdat.c \ src/test/test_buffer_io.c fuzz_format_sas7bdat_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_sas7bdat_LDFLAGS = -static fuzz_format_sas7bdat_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_sav_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_sav.c \ src/test/test_buffer_io.c fuzz_format_sav_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_sav_LDFLAGS = -static fuzz_format_sav_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_xport_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_xport.c \ src/test/test_buffer_io.c fuzz_format_xport_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_xport_LDFLAGS = -static fuzz_format_xport_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_sas_commands_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_sas_commands.c \ src/test/test_buffer_io.c fuzz_format_sas_commands_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_sas_commands_LDFLAGS = -static fuzz_format_sas_commands_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_spss_commands_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_spss_commands.c \ src/test/test_buffer_io.c fuzz_format_spss_commands_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_spss_commands_LDFLAGS = -static fuzz_format_spss_commands_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_format_stata_dictionary_SOURCES = \ src/fuzz/fuzz_format.c \ src/fuzz/fuzz_format_stata_dictionary.c \ src/test/test_buffer_io.c fuzz_format_stata_dictionary_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_format_stata_dictionary_LDFLAGS = -static fuzz_format_stata_dictionary_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_compression_sas_rle_SOURCES = \ src/fuzz/fuzz_compression_sas_rle.c fuzz_compression_sas_rle_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_compression_sas_rle_LDFLAGS = -static fuzz_compression_sas_rle_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ fuzz_compression_sav_SOURCES = \ src/fuzz/fuzz_compression_sav.c fuzz_compression_sav_LDADD = libreadstat.la @LIB_FUZZING_ENGINE@ fuzz_compression_sav_LDFLAGS = -static fuzz_compression_sav_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ ReadStat-1.1.7/NEWS000066400000000000000000000117431410722155500137360ustar00rootroot00000000000000New in 1.1.7: * SAV writer: Permit missing ranges for integer variables #251 * SAV writer: Fix format-truncation warning #247 * DTA reader/writer: Improved support for string refs on older platforms * DTA reader: Fix incorrect ordering in StrL comparison functions #248 * SAS7BDAT writer: Improved support for tagged missing values on big-endian platforms * SAS readers: Allow header sizes up to 16MiB #249 * Visual Studio: Fix paths to make them consistent between x86 and x64 archs #242 New in 1.1.6: * Migrate to GitHub Actions * Regenerate parsers with Ragel 7 and update build script * SAS7BDAT reader: Improved large file support on Windows #226 * SAV reader: Skip null bytes in UTF-8 data https://github.com/tidyverse/haven/issues/560 * SAV reader: Fix hang (oss-fuzz/23485) * DTA reader: Disallow str0 type * DTA reader: Fix encoding error when garbage values are present beyond the end of a string * Command file readers: Fix integer overflow (oss-fuzz/15778) * `extract_metadata`: Implement duration support #223 (thanks to @basgys) * Support for SAS files created with SAS Visual Forecaster #232 * Report format widths for date/time SAS formats #233 * Document the meaning of a -1 return value from `readstat_get_row_count` #234 * Fix SAS file creation / modification times on Windows #238 #240 New in 1.1.5: * Support for building with MSVC #214 (thanks to @zebrys and @jonathon-love) * CLI tools: Support non-ASCII file paths on Windows #200 #216 (thanks to @zebrys) * DTA reader: Ignore bad timestamps * DTA writer: Fix memory leak * DTA writer: Improved support for empty value labels #219 * POR reader: Improved support for date/time formats #160 * SAS7BDAT reader: Added support for reading the dataset label #180 #213 (thanks to @reikoch) * SAS7BDAT reader: Improved detection of compressed files * SAS7BDAT reader: Improved bounds checking OSS-Fuzz/28312 * SAS7BDAT reader: Support for more character encodings * SAV reader: Tolerate illegal lowercase variable names #217 * SAV reader: Better support for non-UTF-8 variable names * SAV reader: Fix format widths for very long strings https://github.com/Roche/pyreadstat/issues/77 * SAV reader: Fix undefined behavior with negative row counts OSS-Fuzz/23423 New in 1.1.4: * SAS7BDAT reader: Add support for binary-compressed files #21 * XPT v8 writer: Improve compatibility with SAS #207 (thanks to @reikoch) * XPT reader: Fix reading of long variable names #208 (thanks to @reikoch) * SAS readers: Support for more character encodings * SAV reader: Clients sometimes received truncated UTF-8 strings * SPSS writers: Improve compatibility with PSPP with DATETIME fields #211 * All formats: Improved support for setting / getting the `display_width` #210 New in 1.1.3: * Fix warnings when compiling with GCC 10 #202 * SAS RLE compressor: Fixes for large files #201 * SAV reader: Improved support for UTF-8 column names #206 * SAV reader: Return a better error message if the magic number doesn't match * SAV reader: Support for dash-separated timestamps New in 1.1.2: * DTA reader: support for Spanish-locale timestamps * SAS reader: support for "any" encoding tidyverse/haven#482 * CLI tool: Allow uppercase filename extensions * Improved support for reading SPSS and SAS command files * Improved support for reading POR files with format widths >100 * Improved support for reading SAV files containing space-padded timestamps #197 * Improved support for writing SAV files with a large number of variables #199 * Improved support for reading SAS7BDAT files created by Stat/Transfer #198 * Fix several integer overflows and undefined values #192 #193 #194 #195 #196 New in 1.1.1: * Support row limits in the plain-text parsers * SAV reader: Allow spaces in timestamp strings * README: Fix Windows / pacman instructions #189 * Fix errors opening files in Stata 15 (tidyverse/haven#461) New in 1.1: * New function: readstat_set_row_offset (#185). Thanks to @mikmart * Fix segfault when localtime fails on Windows * Fix implicit float conversion warning (oss-fuzz/16372) * New error code: READSTAT_ERROR_BAD_TIMESTAMP_VALUE * Renamed error code: READSTAT_ERROR_BAD_TIMESTAMP => READSTAT_ERROR_BAD_TIMESTAMP_STRING New in 1.0.2: * Compilation: Fix -Wstringop-truncation warnings on GCC 8.2 and later (#151) * SPSS command parser: Fix signed integer overflow (oss-fuzz/15049) * POR parser: Use doubles internally to prevent integer overflows with very large exponents (#182) New in 1.0.1: * SAV writer: Validate variable names * Fix a buffer overflow reading SPSS commands (oss-fuzz/15050) * New error code READSTAT_ERROR_NAME_IS_ZERO_LENGTH when a blank variable name is provided * New fuzzing dictionary files in fuzz/dict for parsing plain-text file formats * Move corpus files from corpus to fuzz/corpus ReadStat-1.1.7/README.md000066400000000000000000000363421410722155500145200ustar00rootroot00000000000000[![GitHub CI build status](https://github.com/WizardMac/ReadStat/workflows/build/badge.svg)](https://github.com/WizardMac/ReadStat/actions) [![Appveyor build status](https://ci.appveyor.com/api/projects/status/76ctatpy3grlrd9x/branch/master?svg=true)](https://ci.appveyor.com/project/evanmiller/readstat/branch/master) [![codecov](https://codecov.io/gh/WizardMac/ReadStat/branch/master/graph/badge.svg)](https://codecov.io/gh/WizardMac/ReadStat) [![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/readstat.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:readstat) ReadStat: Read (and write) data sets from SAS, Stata, and SPSS == Originally developed for [Wizard](https://www.wizardmac.com/), ReadStat is a command-line tool and MIT-licensed C library for reading files from popular stats packages. Supported data formats include: * SAS: SAS7BDAT (binary file) and XPORT (transport file) * Stata: DTA (binary file) versions 104-119 * SPSS: POR (portable file), SAV (binary file), and ZSAV (compressed binary) Supported metadata formats include: * SAS: SAS7BCAT (catalog file) and .sas (command file) * Stata: .dct (dictionary file) * SPSS: .sps (command file) There is also write support for all the data formats, but not the metadata formats. *The produced SAS7BDAT files still cannot be read by SAS*, but feel free to contribute your binary-format expertise here. For reading in R data files, please see the related [librdata](https://github.com/WizardMac/librdata) project. Installation on Unix / macOS -- Grab the latest [release](https://github.com/WizardMac/ReadStat/releases) and then proceed as usual: ./configure make sudo make install If you're cloning the repository, first make sure you have autotools installed, and then run `./autogen.sh` to generate the configure file. If you're on Mac and see errors about `AM_ICONV` when you run `./autogen.sh`, you'll need to install [gettext](https://www.gnu.org/software/gettext/). Installation on Windows -- ReadStat now includes a Microsoft Visual Studio project file that includes build targets for the library and tests. See the [VS17](./VS17) folder in the downloaded release for a "one-click" Windows build. Alternatively, you can build ReadStat on the command line using an [msys2](https://msys2.github.io/) environment. After installing msys2, download some other packages: pacman -S autoconf automake libtool make mingw-w64-x86_64-toolchain mingw-w64-x86_64-cmake mingw-w64-x86_64-libiconv Then start a MINGW command line (not the msys2 prompt!) and follow the UNIX install instructions above for this package. Language Bindings -- * Julia: [ReadStat.jl](https://github.com/queryverse/ReadStat.jl) * Perl 6: [ReadStat.pm6](https://github.com/WizardMac/ReadStat.pm6) * Python: [pyreadstat](https://github.com/Roche/pyreadstat) * R: [haven](https://github.com/tidyverse/haven) Docker -- A dockerized version is available [here](https://github.com/jbn/readstat) Command-line Usage -- Standard usage: readstat [-f] Where: * `` ends with `.dta`, `.por`, `.sav`, `.sas7bdat`, or `.xpt`and * `` ends with `.dta`, `.por`, `.sav`, `.sas7bdat`, `.xpt` or `.csv` If [libxlsxwriter](http://libxlsxwriter.github.io) is found at compile-time, an XLSX file (ending in `.xlsx`) can be written instead. If zlib is found at compile-time, compressed SPSS files (`.zsav`) can be read and written as well. Use the `-f` option to overwrite an existing output file. If you have a plain-text file described by a Stata dictionary file, a SAS command file, or an SPSS command file, a second invocation style is supported: readstat Where: * `` can be anything * `` ends with `.dct`, `.sas`, or `.sps` * `` ends with `.dta`, `.por`, `.sav`, `.xpt`, or `.csv` If you have a SAS catalog file containing the data set's value labels, you can use the same invocation: readstat Except where: * `` ends with `.sas7bdat` * `` ends with `.sas7bcat` * `` ends with `.dta`, `.por`, `.sav`, `.xpt`, or `.csv` If the file conversion succeeds, ReadStat will report the number of rows and variables converted, e.g. Converted 111 variables and 160851 rows in 12.36 seconds At the moment value labels are supported, but the finer nuances of converting format strings (e.g. `%8.2g`) are not. Command-line Usage with CSV input -- A prerequisite for CSV input is that the [libcsv](https://github.com/rgamble/libcsv) library is found at compile time. CSV input is supported together with a metadata file describing the data: readstat The `` should end with `.dta`, `.sav`, or `.csv`. The `` is a regular CSV file. The `` is a JSON file describing column types, value labels and missing values. The easiest way to create such a metadata file is to use the provided `extract_metadata` program on an existing file: $ extract_metadata The schema of this JSON file is fully described in [variablemetadata_schema.json](variablemetadata_schema.json) using [JSON Schema](http://json-schema.org/). The following is an example of a valid metadata file: { "type": "SPSS", "variables": [ { "type": "NUMERIC", "name": "citizenship", "label": "Citizenship of respondent", "categories": [ { "code": 1, "label": "Afghanistan" }, { "code": 2, "label": "Albania" }, { "code": 98, "label": "No answer" }, { "code": 99, "label": "Not applicable" } ], "missing": { "type": "DISCRETE", "values": [ 98, 99 ] } } ] } Here the column `citizenship` is a numeric column with four possible values `1`, `2`, `98`, and `99`. `1` has the label `Afghanistan`, `2` has `Albania`, `98` has `No answer` and `99` has `Not applicable`. `98` and `99` are defined as missing values. Other column types are `STRING` and `DATE`. All values in `DATE` columns are expected to conform to [ISO 8601 date](https://en.wikipedia.org/wiki/ISO_8601). Here is an example of `DATE` metadata: { "type": "SPSS", "variables": [ { "type": "DATE", "name": "startdate", "label": "Start date", "categories": [ { "code": "6666-01-01", "label": "no date available" } ], "missing": { "type": "DISCRETE", "values": [ "6666-01-01", "9999-01-01" ] } } ] } Value labels are supported for `DATE`. The last column type is `STRING`: { "type": "SPSS", "variables": [ { "type": "STRING", "name": "somestring", "label": "Label of column", "missing": { "type": "DISCRETE", "values": [ "NA", "N/A" ] } } ] } Value labels are not supported for `STRING`. Library Usage: Reading Files -- The ReadStat API is callback-based. It uses very little memory, and is suitable for programs with progress bars. ReadStat uses [iconv](https://en.wikipedia.org/wiki/Iconv) to automatically transcode text data into UTF-8, so you don't have to worry about character encodings. See src/readstat.h for the complete API. In general you'll provide a filename and a set of optional callback functions for handling various information and data found in the file. It's up to the user to store this information in an appropriate data structure. If a context pointer is passed to the parse_* functions, it will be made available to the various callback functions. Callback functions should return `READSTAT_HANDLER_OK` (zero) on success. Returning `READSTAT_HANDLER_ABORT` will abort the parsing process. Example: Return the number of records in a DTA file. ```c #include "readstat.h" int handle_metadata(readstat_metadata_t *metadata, void *ctx) { int *my_count = (int *)ctx; *my_count = readstat_get_row_count(metadata); return READSTAT_HANDLER_OK; } int main(int argc, char *argv[]) { if (argc != 2) { printf("Usage: %s \n", argv[0]); return 1; } int my_count = 0; readstat_error_t error = READSTAT_OK; readstat_parser_t *parser = readstat_parser_init(); readstat_set_metadata_handler(parser, &handle_metadata); error = readstat_parse_dta(parser, argv[1], &my_count); readstat_parser_free(parser); if (error != READSTAT_OK) { printf("Error processing %s: %d\n", argv[1], error); return 1; } printf("Found %d records\n", my_count); return 0; } ``` Example: Convert a DTA to a tab-separated file. ```c #include "readstat.h" int handle_metadata(readstat_metadata_t *metadata, void *ctx) { int *my_var_count = (int *)ctx; *my_var_count = readstat_get_var_count(metadata); return READSTAT_HANDLER_OK; } int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { int *my_var_count = (int *)ctx; printf("%s", readstat_variable_get_name(variable)); if (index == *my_var_count - 1) { printf("\n"); } else { printf("\t"); } return READSTAT_HANDLER_OK; } int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { int *my_var_count = (int *)ctx; int var_index = readstat_variable_get_index(variable); readstat_type_t type = readstat_value_type(value); if (!readstat_value_is_system_missing(value)) { if (type == READSTAT_TYPE_STRING) { printf("%s", readstat_string_value(value)); } else if (type == READSTAT_TYPE_INT8) { printf("%hhd", readstat_int8_value(value)); } else if (type == READSTAT_TYPE_INT16) { printf("%hd", readstat_int16_value(value)); } else if (type == READSTAT_TYPE_INT32) { printf("%d", readstat_int32_value(value)); } else if (type == READSTAT_TYPE_FLOAT) { printf("%f", readstat_float_value(value)); } else if (type == READSTAT_TYPE_DOUBLE) { printf("%lf", readstat_double_value(value)); } } if (var_index == *my_var_count - 1) { printf("\n"); } else { printf("\t"); } return READSTAT_HANDLER_OK; } int main(int argc, char *argv[]) { if (argc != 2) { printf("Usage: %s \n", argv[0]); return 1; } int my_var_count = 0; readstat_error_t error = READSTAT_OK; readstat_parser_t *parser = readstat_parser_init(); readstat_set_metadata_handler(parser, &handle_metadata); readstat_set_variable_handler(parser, &handle_variable); readstat_set_value_handler(parser, &handle_value); error = readstat_parse_dta(parser, argv[1], &my_var_count); readstat_parser_free(parser); if (error != READSTAT_OK) { printf("Error processing %s: %d\n", argv[1], error); return 1; } return 0; } ``` Library Usage: Writing Files -- ReadStat can write data sets to a number of file formats, and uses largely the same API for each of them. Files are written incrementally, with the header written first, followed by individual rows of data, and ending with some kind of trailer. (So the full data file never resides in memory.) Unlike like the callback-based API for reading files, the writer API consists of function that the developer must call in a particular order. The complete API can be found in [readstat.h](./src/readstat.h). Basic usage: ```c #include "readstat.h" /* A callback for writing bytes to your file descriptor of choice */ /* The ctx argument comes from the readstat_begin_writing_xxx function */ static ssize_t write_bytes(const void *data, size_t len, void *ctx) { int fd = *(int *)ctx; return write(fd, data, len); } int main(int argc, char *argv[]) { readstat_writer_t *writer = readstat_writer_init(); readstat_set_data_writer(writer, &write_bytes); readstat_writer_set_file_label(writer, "My data set"); int row_count = 1; readstat_variable_t *variable = readstat_add_variable(writer, "Var1", READSTAT_TYPE_DOUBLE, 0); readstat_variable_set_label(variable, "First variable"); /* Call one of: * readstat_begin_writing_dta * readstat_begin_writing_por * readstat_begin_writing_sas7bdat * readstat_begin_writing_sav * readstat_begin_writing_xport */ int fd = open("something.dta", O_CREAT | O_WRONLY); readstat_begin_writing_dta(writer, &fd, row_count); int i; for (i=0; i READSTAT_VERSION="1.1.7";_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) ReadStat-1.1.7/VS17/ReadStat.sln000066400000000000000000000137041410722155500161630ustar00rootroot00000000000000 Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio 15 VisualStudioVersion = 15.0.27703.2047 MinimumVisualStudioVersion = 10.0.40219.1 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat", "ReadStat.vcxproj", "{7D9D3258-8850-466E-8A74-4F8025E6FA49}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat_Test_readstat", "ReadStat_Test_readstat\ReadStat_Test_readstat.vcxproj", "{E9408E27-6E13-46C9-B07D-9569BD078EF6}" ProjectSection(ProjectDependencies) = postProject {7D9D3258-8850-466E-8A74-4F8025E6FA49} = {7D9D3258-8850-466E-8A74-4F8025E6FA49} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat_App", "ReadStat_App\ReadStat_App.vcxproj", "{80188B4B-81A0-4113-A848-CB6483D185EA}" ProjectSection(ProjectDependencies) = postProject {7D9D3258-8850-466E-8A74-4F8025E6FA49} = {7D9D3258-8850-466E-8A74-4F8025E6FA49} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat_Test_double_decimals", "ReadStat_Test_double_decimals\ReadStat_Test_double_decimals.vcxproj", "{96AB8E29-11AC-40F0-871A-856DD9C78EAB}" ProjectSection(ProjectDependencies) = postProject {7D9D3258-8850-466E-8A74-4F8025E6FA49} = {7D9D3258-8850-466E-8A74-4F8025E6FA49} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat_Test_dta_days", "ReadStat_Test_dta_days\ReadStat_Test_dta_days.vcxproj", "{D6F08523-A8F5-4F2A-A57B-EC013A136DA6}" ProjectSection(ProjectDependencies) = postProject {7D9D3258-8850-466E-8A74-4F8025E6FA49} = {7D9D3258-8850-466E-8A74-4F8025E6FA49} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ReadStat_Test_sav_date", "ReadStat_Test_sav_date\ReadStat_Test_sav_date.vcxproj", "{D5265192-6034-4689-94FF-8B2060FBC309}" ProjectSection(ProjectDependencies) = postProject {7D9D3258-8850-466E-8A74-4F8025E6FA49} = {7D9D3258-8850-466E-8A74-4F8025E6FA49} EndProjectSection EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 Debug|x86 = Debug|x86 Release|x64 = Release|x64 Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Debug|x64.ActiveCfg = Debug|x64 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Debug|x64.Build.0 = Debug|x64 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Debug|x86.ActiveCfg = Debug|Win32 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Debug|x86.Build.0 = Debug|Win32 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Release|x64.ActiveCfg = Release|x64 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Release|x64.Build.0 = Release|x64 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Release|x86.ActiveCfg = Release|Win32 {7D9D3258-8850-466E-8A74-4F8025E6FA49}.Release|x86.Build.0 = Release|Win32 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Debug|x64.ActiveCfg = Debug|x64 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Debug|x64.Build.0 = Debug|x64 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Debug|x86.ActiveCfg = Debug|Win32 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Debug|x86.Build.0 = Debug|Win32 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Release|x64.ActiveCfg = Release|x64 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Release|x64.Build.0 = Release|x64 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Release|x86.ActiveCfg = Release|Win32 {E9408E27-6E13-46C9-B07D-9569BD078EF6}.Release|x86.Build.0 = Release|Win32 {80188B4B-81A0-4113-A848-CB6483D185EA}.Debug|x64.ActiveCfg = Debug|x64 {80188B4B-81A0-4113-A848-CB6483D185EA}.Debug|x64.Build.0 = Debug|x64 {80188B4B-81A0-4113-A848-CB6483D185EA}.Debug|x86.ActiveCfg = Debug|Win32 {80188B4B-81A0-4113-A848-CB6483D185EA}.Debug|x86.Build.0 = Debug|Win32 {80188B4B-81A0-4113-A848-CB6483D185EA}.Release|x64.ActiveCfg = Release|x64 {80188B4B-81A0-4113-A848-CB6483D185EA}.Release|x64.Build.0 = Release|x64 {80188B4B-81A0-4113-A848-CB6483D185EA}.Release|x86.ActiveCfg = Release|Win32 {80188B4B-81A0-4113-A848-CB6483D185EA}.Release|x86.Build.0 = Release|Win32 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Debug|x64.ActiveCfg = Debug|x64 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Debug|x64.Build.0 = Debug|x64 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Debug|x86.ActiveCfg = Debug|Win32 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Debug|x86.Build.0 = Debug|Win32 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Release|x64.ActiveCfg = Release|x64 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Release|x64.Build.0 = Release|x64 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Release|x86.ActiveCfg = Release|Win32 {96AB8E29-11AC-40F0-871A-856DD9C78EAB}.Release|x86.Build.0 = Release|Win32 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Debug|x64.ActiveCfg = Debug|x64 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Debug|x64.Build.0 = Debug|x64 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Debug|x86.ActiveCfg = Debug|Win32 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Debug|x86.Build.0 = Debug|Win32 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Release|x64.ActiveCfg = Release|x64 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Release|x64.Build.0 = Release|x64 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Release|x86.ActiveCfg = Release|Win32 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6}.Release|x86.Build.0 = Release|Win32 {D5265192-6034-4689-94FF-8B2060FBC309}.Debug|x64.ActiveCfg = Debug|x64 {D5265192-6034-4689-94FF-8B2060FBC309}.Debug|x64.Build.0 = Debug|x64 {D5265192-6034-4689-94FF-8B2060FBC309}.Debug|x86.ActiveCfg = Debug|Win32 {D5265192-6034-4689-94FF-8B2060FBC309}.Debug|x86.Build.0 = Debug|Win32 {D5265192-6034-4689-94FF-8B2060FBC309}.Release|x64.ActiveCfg = Release|x64 {D5265192-6034-4689-94FF-8B2060FBC309}.Release|x64.Build.0 = Release|x64 {D5265192-6034-4689-94FF-8B2060FBC309}.Release|x86.ActiveCfg = Release|Win32 {D5265192-6034-4689-94FF-8B2060FBC309}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {B01A5CE6-8035-4FD5-A80B-1E63884D1506} EndGlobalSection EndGlobal ReadStat-1.1.7/VS17/ReadStat.vcxproj000066400000000000000000000327671410722155500170740ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {7D9D3258-8850-466E-8A74-4F8025E6FA49} Win32Proj ReadStat 10.0.17763.0 StaticLibrary true v141 Unicode StaticLibrary false v141 true Unicode StaticLibrary true v141 Unicode StaticLibrary false v141 true Unicode true $(SolutionDir)\x86\$(Configuration)\ x86\$(Configuration)\ true false $(SolutionDir)x86\$(Configuration)\ x86\$(Configuration)\ false NotUsing Level3 Disabled true WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions) true false $(ProjectDir)..\..\iconv\include;%(AdditionalIncludeDirectories) $(IntDir)FakePath\%(RelativeDir) $(IntDir)FakePath\%(RelativeDir)\ Windows true NotUsing Level3 Disabled true WIN64;_DEBUG;_LIB;%(PreprocessorDefinitions) true $(ProjectDir)..\..\iconv\include;%(AdditionalIncludeDirectories) $(IntDir)FakePath\%(RelativeDir) $(IntDir)FakePath\%(RelativeDir)\ Windows true NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions) true false $(ProjectDir)..\..\iconv\include;%(AdditionalIncludeDirectories) $(IntDir)FakePath\%(RelativeDir) $(IntDir)FakePath\%(RelativeDir)\ Windows true true true NotUsing Level3 MaxSpeed true true true WIN64;NDEBUG;_LIB;%(PreprocessorDefinitions) true $(ProjectDir)..\..\iconv\include;%(AdditionalIncludeDirectories) $(IntDir)FakePath\%(RelativeDir) $(IntDir)FakePath\%(RelativeDir)\ Windows true true true ReadStat-1.1.7/VS17/ReadStat.vcxproj.filters000066400000000000000000000221031410722155500205220ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms {36d69d1d-b944-4abc-9981-ca8572816f7a} {88abd1ce-1660-482c-87b3-9d210ee57a1b} {f7d7807e-0c88-4fd8-8d4a-4e1c18783cae} Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\sas Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\spss Source Files\stata Source Files\stata Source Files\stata Source Files\stata Source Files Source Files Source Files Source Files Source Files Source Files Source Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files ReadStat-1.1.7/VS17/ReadStat_App/000077500000000000000000000000001410722155500162405ustar00rootroot00000000000000ReadStat-1.1.7/VS17/ReadStat_App/ReadStat_App.vcxproj000066400000000000000000000310061410722155500221640ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {80188B4B-81A0-4113-A848-CB6483D185EA} Win32Proj ReadStatApp 10.0.17763.0 ReadStat_App Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode true x86\$(Configuration)\ $(SolutionDir)\x86\$(Configuration)\ true false x86\$(Configuration)\ $(SolutionDir)x86\$(Configuration)\ false NotUsing Level3 Disabled true WIN32;_DEBUG;DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;libiconvD.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName); NotUsing Level3 Disabled true _DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;libiconvD.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;libiconv.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName); NotUsing Level3 MaxSpeed true true true NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;libiconv.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) ReadStat-1.1.7/VS17/ReadStat_App/ReadStat_App.vcxproj.filters000066400000000000000000000132151410722155500236350ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms {91d99aae-1d30-4dab-923c-4e331303b7c8} {de4b7ac4-3816-4eea-a44f-949af9bb3a37} {8f2a21ef-feb4-4b94-bfa9-40433fb84f36} {ddbd8e6f-15a8-42dc-ac41-a8dbf6b67fea} Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files\util Source Files\util Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files\util Header Files\util ReadStat-1.1.7/VS17/ReadStat_Test_double_decimals/000077500000000000000000000000001410722155500216325ustar00rootroot00000000000000ReadStat-1.1.7/VS17/ReadStat_Test_double_decimals/ReadStat_Test_double_decimals.vcxproj000066400000000000000000000250501410722155500311520ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {96AB8E29-11AC-40F0-871A-856DD9C78EAB} Win32Proj ReadStatTests 10.0.17763.0 ReadStat_Test_double_decimals Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode false x86\$(Configuration)\ $(SolutionDir)x86\$(Configuration)\ true x86\$(Configuration)\ $(SolutionDir)\x86\$(Configuration)\ true false NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN32;_DEBUG;DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) NotUsing Level3 MaxSpeed true true true WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) ReadStat-1.1.7/VS17/ReadStat_Test_double_decimals/ReadStat_Test_double_decimals.vcxproj.filters000066400000000000000000000027571410722155500326320ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms {bc66b4d8-b052-45e3-83e6-c757da36b820} {4429e582-f8a5-49eb-b347-4bd29ed708c1} Source Files Source Files\write Header Files\write ReadStat-1.1.7/VS17/ReadStat_Test_dta_days/000077500000000000000000000000001410722155500203075ustar00rootroot00000000000000ReadStat-1.1.7/VS17/ReadStat_Test_dta_days/ReadStat_Test_dta_days.vcxproj000066400000000000000000000250341410722155500263060ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {D6F08523-A8F5-4F2A-A57B-EC013A136DA6} Win32Proj ReadStatTests 10.0.17763.0 ReadStat_Test_dta_days Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode false x86\$(Configuration)\ $(SolutionDir)x86\$(Configuration)\ true x86\$(Configuration)\ $(SolutionDir)\x86\$(Configuration)\ true false NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN32;_DEBUG;DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) NotUsing Level3 MaxSpeed true true true WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) ReadStat-1.1.7/VS17/ReadStat_Test_dta_days/ReadStat_Test_dta_days.vcxproj.filters000066400000000000000000000027461410722155500277620ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms {a9dc55c0-eadc-4894-9f72-c59adba6368d} {f0d789bb-6100-449e-9c28-91496e119e6a} Header Files\util Source Files Source Files\util ReadStat-1.1.7/VS17/ReadStat_Test_readstat/000077500000000000000000000000001410722155500203265ustar00rootroot00000000000000ReadStat-1.1.7/VS17/ReadStat_Test_readstat/ReadStat_Test_readstat.vcxproj000066400000000000000000000267621410722155500263550ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {E9408E27-6E13-46C9-B07D-9569BD078EF6} Win32Proj ReadStatTests 10.0.17763.0 ReadStat_Test_readstat Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode false x86\$(Configuration)\ $(SolutionDir)x86\$(Configuration)\ true x86\$(Configuration)\ $(SolutionDir)\x86\$(Configuration)\ true false NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;libiconv.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN32;_DEBUG;DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;libiconvD.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;libiconvD.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) NotUsing Level3 MaxSpeed true true true WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;libiconv.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) ReadStat-1.1.7/VS17/ReadStat_Test_readstat/ReadStat_Test_readstat.vcxproj.filters000066400000000000000000000057151410722155500300170ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Header Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files Source Files ReadStat-1.1.7/VS17/ReadStat_Test_sav_date/000077500000000000000000000000001410722155500203055ustar00rootroot00000000000000ReadStat-1.1.7/VS17/ReadStat_Test_sav_date/ReadStat_Test_sav_date.vcxproj000066400000000000000000000250341410722155500263020ustar00rootroot00000000000000 Debug Win32 Release Win32 Debug x64 Release x64 15.0 {D5265192-6034-4689-94FF-8B2060FBC309} Win32Proj ReadStatTests 10.0.17763.0 ReadStat_Test_sav_date Application true v141 Unicode Application false v141 true Unicode Application true v141 Unicode Application false v141 true Unicode false x86\$(Configuration)\ $(SolutionDir)x86\$(Configuration)\ true x86\$(Configuration)\ $(SolutionDir)\x86\$(Configuration)\ true false NotUsing Level3 MaxSpeed true true true WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN32;_DEBUG;DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true $(ProjectDir)..\..\..\iconv\lib;..\x86\$(ConfigurationName);%(AdditionalLibraryDirectories) ReadStat.lib;%(AdditionalDependencies) NotUsing Level3 Disabled true WIN64;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) NotUsing Level3 MaxSpeed true true true WIN64;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) true $(IntDir)FakePath\FakePath\%(RelativeDir) $(ProjectDir)..\..\..\iconv\include $(IntDir)FakePath\FakePath\%(RelativeDir)\ Console true true false ReadStat.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies) $(ProjectDir)..\..\..\iconv\lib64;..\x64\$(ConfigurationName) ReadStat-1.1.7/VS17/ReadStat_Test_sav_date/ReadStat_Test_sav_date.vcxproj.filters000066400000000000000000000027461410722155500277560ustar00rootroot00000000000000 {4FC737F1-C7A5-4376-A066-2A32D752A2FF} cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx {93995380-89BD-4b04-88EB-625FBE52EBFB} h;hh;hpp;hxx;hm;inl;inc;ipp;xsd {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms {2348db41-955b-434d-b332-e856065fa485} {46e899c2-0b26-4c91-877e-7ce755c899d7} Source Files Source Files\util Header Files\util ReadStat-1.1.7/appveyor.yml000066400000000000000000000040331410722155500156210ustar00rootroot00000000000000version: 0.1.{build} os: Windows Server 2016 image: Visual Studio 2017 platform: x64 environment: matrix: - TOOLCHAIN: "msvc" - TOOLCHAIN: "cygwin" skip_tags: true install: - git clone https://github.com/pffang/libiconv-for-Windows.git C:\projects\iconv - cd C:\projects\iconv - git checkout 0e36d95873 for: - matrix: only: - TOOLCHAIN: "msvc" build: parallel: true project: VS17/ReadStat.sln verbosity: minimal - matrix: except: - TOOLCHAIN: "msvc" build_script: - ps: | if ($env:TOOLCHAIN -eq "cygwin") { C:\cygwin64\setup-x86_64.exe -qnNdO -s http://cygwin.mirror.constant.com -l C:/cygwin64/var/cache/setup -P libiconv-devel -P ragel -P zlib-devel C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/readstat && ./autogen.sh" C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/readstat && ./configure" C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/readstat && make" } test_script: - ps: | if ($env:TOOLCHAIN -eq "cygwin") { C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/readstat && make check" } else { $state = "PASS" $tests = @("readstat","double_decimals","sav_date","dta_days") Copy-Item "C:\projects\iconv\lib64\libiconvD.dll" -Destination "C:\projects\readstat\VS17\x64\Debug\" for($i=0 ; $i -lt $tests.Length ; $i++) { $currTest = $tests[$i] $currExec = 'C:\projects\readstat\VS17\x64\Debug\ReadStat_Test_' + $currTest + '.exe;$?' echo "Running test $currTest..." $success = Invoke-Expression $currExec if(-not $success){ $state = "FAIL" echo "Test result: FAIL" } else{ echo "Test result: PASS" } } if($state -eq "FAIL") { exit 1 } } on_success: - ps: | if ($env:TOOLCHAIN -eq "cygwin") { C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/readstat && cat ./test-suite.log" } ReadStat-1.1.7/autogen.sh000077500000000000000000000000441410722155500152300ustar00rootroot00000000000000#! /usr/bin/env bash autoreconf -i ReadStat-1.1.7/config.rpath000066400000000000000000000000001410722155500155240ustar00rootroot00000000000000ReadStat-1.1.7/configure.ac000066400000000000000000000065721410722155500155310ustar00rootroot00000000000000# Update this with each release. Be sure to change VS17/PropertySheet.props too AC_INIT([ReadStat], [1.1.7], [https://github.com/WizardMac/ReadStat/issues], [readstat], [https://github.com/WizardMac/ReadStat]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AM_SILENT_RULES([yes]) # Don't forget to update these too READSTAT_MAJOR_VERSION=1 READSTAT_MINOR_VERSION=1 READSTAT_MICRO_VERSION=7 # libtool stuff, see https://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info READSTAT_VERSION=$READSTAT_MAJOR_VERSION.$READSTAT_MINOR_VERSION.$READSTAT_MICRO_VERSION # TODO the following logic is broken. Fix it during the next minor update. # See https://github.com/libxls/libxls/commit/cf5b0500e78e77a70361c6de12b08230590857dc READSTAT_VERSION_INFO=`expr $READSTAT_MAJOR_VERSION + $READSTAT_MINOR_VERSION`:$READSTAT_MICRO_VERSION:$READSTAT_MINOR_VERSION AC_SUBST(READSTAT_VERSION) AC_SUBST(READSTAT_VERSION_INFO) LT_INIT([disable-static]) AC_PROG_CC AC_PROG_CXX AC_ARG_ENABLE([code-coverage], AS_HELP_STRING([--enable-code-coverage], [Enable code coverage profiling]), [code_coverage=yes], [code_coverage=no]) AC_ARG_ENABLE([fuzz-testing], AS_HELP_STRING([--enable-fuzz-testing], ["Enable fuzz testing (requires Clang 6 or later)"]), [ AC_MSG_CHECKING([whether $CC accepts -fsanitize=fuzzer]) tmp_saved_flags=$[]_AC_LANG_PREFIX[]FLAGS _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS -fsanitize=fuzzer" AC_LINK_IFELSE([AC_LANG_PROGRAM()], [ AC_MSG_RESULT(yes) SANITIZERS="-fsanitize=fuzzer" fuzzer=yes], AC_MSG_RESULT(no) AC_MSG_FAILURE([-fsanitize=fuzzer not supported (Required with --enable-fuzz-testing)])) _AC_LANG_PREFIX[]FLAGS=$tmp_saved_flags ], [SANITIZERS="" fuzzer=no]) AM_CONDITIONAL([FUZZER_ENABLED], test "x$fuzzer" = "xyes") AC_SUBST([SANITIZERS]) AM_ICONV AC_CANONICAL_HOST AS_CASE([$host], [*linux*|*bsd*|*mingw*|*cygwin*], [EXTRA_LIBS="-lm"], [EXTRA_LIBS=""]) AC_SUBST([EXTRA_LIBS]) AS_CASE([$host], [*mingw*], [EXTRA_WARNINGS="-Wno-pedantic-ms-format -Wno-stringop-truncation"], [*cygwin*], [EXTRA_WARNINGS="-Wno-unused-const-variable"], [EXTRA_WARNINGS=""]) AC_SUBST([EXTRA_WARNINGS]) AS_CASE([$host], [*mingw*|*cygwin*], [EXTRA_LDFLAGS="$LTLIBICONV -no-undefined"], [EXTRA_LDFLAGS="$LTLIBICONV"]) AC_SUBST([EXTRA_LDFLAGS]) AC_ARG_VAR([LIB_FUZZING_ENGINE], [Location of prebuilt fuzzing engine library]) AC_SUBST([LIB_FUZZING_ENGINE]) AC_ARG_VAR([RAGEL], [Ragel generator command]) AC_ARG_VAR([RAGELFLAGS], [Ragel generator flags]) AC_PATH_PROG([RAGEL], [ragel], [true]) AM_CONDITIONAL([HAVE_RAGEL], test "$RAGEL" != "true") AC_CHECK_LIB([xlsxwriter], [workbook_new], [true], [false]) AM_CONDITIONAL([HAVE_XLSXWRITER], test "$ac_cv_lib_xlsxwriter_workbook_new" = yes) AC_CHECK_LIB([csv], [csv_parse], [true], [false]) AM_CONDITIONAL([HAVE_CSVREADER], test "$ac_cv_lib_csv_csv_parse" = yes) AC_CHECK_LIB([z], [deflate], [true], [false]) AM_CONDITIONAL([HAVE_ZLIB], test "$ac_cv_lib_z_deflate" = yes) AM_CONDITIONAL([CODE_COVERAGE_ENABLED], test "x$code_coverage" = "xyes") AC_OUTPUT([Makefile]) AC_MSG_RESULT([ Configuration: C compiler: $CC CFLAGS: $CFLAGS LD: $LD C++ compiler: $CXX CXXFLAGS: $CXXFLAGS CXXLD: $CXXLD Host: $host Extra warnings: $EXTRA_WARNINGS Extra libs: $EXTRA_LIBS Extra ld flags: $EXTRA_LDFLAGS Fuzzing engine: $LIB_FUZZING_ENGINE Ragel: $RAGEL Ragel flags: $RAGELFLAGS]) ReadStat-1.1.7/fuzz/000077500000000000000000000000001410722155500142275ustar00rootroot00000000000000ReadStat-1.1.7/fuzz/dict/000077500000000000000000000000001410722155500151525ustar00rootroot00000000000000ReadStat-1.1.7/fuzz/dict/fuzz_format_sas_commands.dict000066400000000000000000000004221410722155500231120ustar00rootroot00000000000000" " "#" "$" "$CHAR" "%LET" "(" ")" "*/" "/*" ";" "@" "ATTRIB" "CLEAR" "CONTENTS" "DATA" "DATE" "FILENAME" "FOOTNOTE" "FORMAT" "IF" "INFILE" "INPUT" "INVALUE" "LABEL" "LENGTH" "LIBNAME" "LIST" "MISSING" "OPTIONS" "OTHER" "PRINT" "PROC" "RUN" "VALUE" "_ALL_" "dlm" "firstobs" ReadStat-1.1.7/fuzz/dict/fuzz_format_spss_commands.dict000066400000000000000000000005701410722155500233200ustar00rootroot00000000000000"(" "(NOMINAL)" "(ORDINAL)" "(SCALE)" ")" "." "/" "/VARIABLES" "=" " " "A" "ADATE" "COMMENT" "DATA" "DATASET" "DATE" "DELIMITERS" "DICTIONARY" "DISPLAY" "END" "EXECUTE" "F" "FILE" "FIRSTCASE" "FIXED" "FORMATS" "HANDLE" "IF" "INPUT" "LABEL" "LABELS" "LEVEL" "LIST" "NAME" "OUTFILE" "PROGRAM" "RECODE" "RECORD" "SAVE" "SELECT" "SET" "SYSMIS" "TABLE" "VALUE" "VARIABLE" "WINDOW" ReadStat-1.1.7/fuzz/dict/fuzz_format_stata_commands.dict000066400000000000000000000002631410722155500234430ustar00rootroot00000000000000" " ")" "*/" "/*" "_column(" "_firstlineoffile(" "_line(" "_lines(" "_lrecl(" "_newline" "_skip(" "byte" "dictionary" "double" "float" "infile" "int" "long" "str" "using" "{" "}" ReadStat-1.1.7/fuzz/dict/fuzz_grammar_dta_timestamp.dict000066400000000000000000000001561410722155500234400ustar00rootroot00000000000000" " ":" "Jan" "Feb" "Mar" "Apr" "May" "Mai" "Jun" "Jul" "Aug" "Sep" "Oct" "Okt" "Nov" "Dec" "Dez" "12" "2000" ReadStat-1.1.7/fuzz/dict/fuzz_grammar_por_double.dict000066400000000000000000000000311410722155500227270ustar00rootroot00000000000000"*." "+" "-" "/" "0" "." ReadStat-1.1.7/fuzz/dict/fuzz_grammar_sav_date.dict000066400000000000000000000002311410722155500223650ustar00rootroot00000000000000"12" " " "Jan" "JAN" "Feb" "FEB" "Mar" "MAR" "Apr" "APR" "May" "MAY" "Jun" "JUN" "Jul" "JUL" "Aug" "AUG" "Sep" "SEP" "Oct" "OCT" "Nov" "NOV" "Dec" "DEC" ReadStat-1.1.7/fuzz/dict/fuzz_grammar_sav_time.dict000066400000000000000000000000111410722155500224020ustar00rootroot00000000000000"12" ":" ReadStat-1.1.7/fuzz/dict/fuzz_grammar_spss_format.dict000066400000000000000000000003661410722155500231500ustar00rootroot00000000000000"A" "ADATE" "AHEX" "CCA" "CCB" "CCC" "CCD" "CCE" "COMMA" "DATE" "DATETIME" "DOLLAR" "DOT" "DTIME" "E" "EDATE" "F" "IB" "JDATE" "MONTH" "MOYR" "MTIME" "N" "P" "PCT" "PIB" "PIBHEX" "PK" "QYR" "RB" "RBHEX" "SDATE" "TIME" "WKDAY" "WKYR" "YMDHMS" "Z" ReadStat-1.1.7/man/000077500000000000000000000000001410722155500140045ustar00rootroot00000000000000ReadStat-1.1.7/man/extract_metadata.man000066400000000000000000000025771410722155500200260ustar00rootroot00000000000000.TH EXTRACT_METADATA 1 "01 February 2019" .SH NAME extract_metadata \- read column metadata from SPSS and Stata binary files, and format it as JSON .SH SYNOPSIS .B extract_metadata .IR input-file .IR output-file .SH DESCRIPTION .B extract_metadata reads column metadata from existing binary data files, so that \fBreadstat\fR can produce new, column-compatible binary files from CSV input files. Both programs use JSON as a metadata interchange format. .PP The .IR input\-file should be a file with one of the following extensions: .TP .IR dta Stata binary file, version 104 or newer .TP .IR sav SPSS uncompressed binary file .TP .IR zsav SPSS compressed binary file .PP In all cases, \fIoutput-file\fR should end in .json. .SH EXAMPLE Suppose you have a Stata file with last year's survey data, and want to produce a compatible Stata file containing this year's survey data. First, extract the metadata: .PP .nf .RS extract_metadata last-year.dta survey-metadata.json .RE .fi .PP Now apply it to this year's data, which is stored in a CSV file: .PP .nf .RS readstat this-year.csv metadata.json this-year.dta .RE .fi .PP The first line of the CSV file should contain column names which match the column names in last-year.dta. If everything went well, your new binary data set is now stored in this-year.dta. .SH OPTIONS .PP None. .SH AUTHOR Copyright (C) 2012-2019 Evan Miller, and others where indicated. ReadStat-1.1.7/man/readstat.man000066400000000000000000000055121410722155500163130ustar00rootroot00000000000000.TH READSTAT 1 "23 January 2019" .SH NAME readstat \- read and write data set files from SAS, SPSS, and Stata .SH SYNOPSIS .B readstat .IR input-file .P .B readstat [\fB-f\fR] .IR input-file .IR output-file .P .B readstat [\fB-f\fR] .IR input-file .IR metadata-file .IR output-file .SH DESCRIPTION .B readstat converts data set files from popular statistics packages stored in both plain-text and binary formats. .PP In the first invocation style, .B readstat displays metadata from \fIinput\-file\fR, including the row count, column count, text encoding, and timestamp. .IR input\-file should be a file with one of the following extensions: .TP .IR sas7bdat SAS binary file, created with SAS version 7 or newer .TP .IR xpt SAS portable file, version 5 or version 8, created with the SAS XPORT command .TP .IR sav SPSS uncompressed binary file .TP .IR zsav SPSS compressed binary file .TP .IR por SPSS portable file .TP .IR dta Stata binary file, version 104 or newer .PP If the row count cannot be determined from the file header, which is sometimes the case with SPSS binary files and always the case with SPSS portable files, .B readstat will report a value of -1. .PP In the second invocation style, .B readstat converts .IR input-file to \fIoutput-file\fR, e.g. a SAS portable file to a Stata binary file. In addition to the preceding extension list, \fIoutput-file\fR may have extension .IR csv or \fIxlsx\fR, which creates a CSV or Excel file, respectively. .PP The third invocation style is used when additional metadata about the input file, such as value labels or column widths, is stored in a separate file. Several types of metadata file are supported: .TP .IR sas7bcat SAS binary "catalog" file, created with SAS version 7 or newer, containing value labels .TP .IR json JavaScript Object Notation (JSON) file, containing column metadata that cannot be gleaned from the input CSV. For details, see the manual page for the \fBextract_metadata\fR command. .TP .IR dct Stata dictionary file, containing the data layout and column metadata for a plain-text input file. .TP .IR sps SPSS command file, describing the data layout and column metadata for a plain-text input file. .TP .IR sps SAS command file, describing the data layout and column metadata for a plain-text input file. .PP The last three formats can be used for both fixed-width and delimiter-separated (e.g. tab-separated) input files. These are commonly distributed along with plain-text ASCII data sets. .PP Both input and output formats are implied by the file extension. .SH OPTIONS .TP .BR \-f Overwrite any existing \fIoutput-file\fR. .SH BUGS SAS binary files created by \fBreadstat\fR do not open with current versions of SAS. .PP The finer details of format strings (e.g. "%8.2g") are not properly converted between file formats. .SH AUTHOR Copyright (C) 2012-2019 Evan Miller, and others where indicated. ReadStat-1.1.7/src/000077500000000000000000000000001410722155500140205ustar00rootroot00000000000000ReadStat-1.1.7/src/CKHashTable.c000066400000000000000000000213221410722155500162350ustar00rootroot00000000000000// CKHashTable - A simple hash table // Copyright 2010-2020 Evan Miller (see LICENSE) #include "CKHashTable.h" /* SipHash reference C implementation Copyright (c) 2012 Jean-Philippe Aumasson Copyright (c) 2012 Daniel J. Bernstein To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see . */ #include #include #include typedef uint64_t u64; typedef uint32_t u32; typedef uint8_t u8; #define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) #define U32TO8_LE(p, v) \ (p)[0] = (u8)((v) ); (p)[1] = (u8)((v) >> 8); \ (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24); #define U64TO8_LE(p, v) \ U32TO8_LE((p), (u32)((v) )); \ U32TO8_LE((p) + 4, (u32)((v) >> 32)); #define U8TO64_LE(p) \ (((u64)((p)[0]) ) | \ ((u64)((p)[1]) << 8) | \ ((u64)((p)[2]) << 16) | \ ((u64)((p)[3]) << 24) | \ ((u64)((p)[4]) << 32) | \ ((u64)((p)[5]) << 40) | \ ((u64)((p)[6]) << 48) | \ ((u64)((p)[7]) << 56)) #define SIPROUND \ do { \ v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ } while(0) /* SipHash-1-2 */ static int siphash( unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k ) { /* "somepseudorandomlygeneratedbytes" */ u64 v0 = 0x736f6d6570736575ULL; u64 v1 = 0x646f72616e646f6dULL; u64 v2 = 0x6c7967656e657261ULL; u64 v3 = 0x7465646279746573ULL; u64 b; u64 k0 = U8TO64_LE( k ); u64 k1 = U8TO64_LE( k + 8 ); u64 m; const u8 *end = in + inlen - ( inlen % sizeof( u64 ) ); const int left = inlen & 7; b = ( ( u64 )inlen ) << 56; v3 ^= k1; v2 ^= k0; v1 ^= k1; v0 ^= k0; for ( ; in != end; in += 8 ) { m = U8TO64_LE( in ); v3 ^= m; SIPROUND; v0 ^= m; } switch( left ) { case 7: b |= ( ( u64 )in[ 6] ) << 48; case 6: b |= ( ( u64 )in[ 5] ) << 40; case 5: b |= ( ( u64 )in[ 4] ) << 32; case 4: b |= ( ( u64 )in[ 3] ) << 24; case 3: b |= ( ( u64 )in[ 2] ) << 16; case 2: b |= ( ( u64 )in[ 1] ) << 8; case 1: b |= ( ( u64 )in[ 0] ); break; case 0: break; } v3 ^= b; SIPROUND; v0 ^= b; v2 ^= 0xff; SIPROUND; SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; U64TO8_LE( out, b ); return 0; } static uint64_t ck_hash_str(const char *str, size_t keylen) { uint64_t hash; unsigned char k[16] = { 0 }; siphash((unsigned char *)&hash, (const unsigned char *)str, keylen, k); return hash; } const void *ck_float_hash_lookup(float key, ck_hash_table_t *table) { return ck_str_n_hash_lookup((const char *)&key, sizeof(float), table); } int ck_float_hash_insert(float key, const void *value, ck_hash_table_t *table) { return ck_str_n_hash_insert((const char *)&key, sizeof(float), value, table); } const void *ck_double_hash_lookup(double key, ck_hash_table_t *table) { return ck_str_n_hash_lookup((const char *)&key, sizeof(double), table); } int ck_double_hash_insert(double key, const void *value, ck_hash_table_t *table) { return ck_str_n_hash_insert((const char *)&key, sizeof(double), value, table); } const void *ck_str_hash_lookup(const char *key, ck_hash_table_t *table) { size_t keylen = strlen(key); return ck_str_n_hash_lookup(key, keylen, table); } const void *ck_str_n_hash_lookup(const char *key, size_t keylen, ck_hash_table_t *table) { if (table->count == 0) return NULL; if (keylen == 0) return NULL; uint64_t hash_key = ck_hash_str(key, keylen); hash_key %= table->capacity; uint64_t end = hash_key; do { char *this_key = &table->keys[table->entries[hash_key].key_offset]; size_t this_keylen = table->entries[hash_key].key_length; if (this_keylen == 0) return NULL; if (this_keylen == keylen && memcmp(this_key, key, keylen) == 0) { return table->entries[hash_key].value; } hash_key++; hash_key %= table->capacity; } while (hash_key != end); return NULL; } int ck_str_hash_insert(const char *key, const void *value, ck_hash_table_t *table) { size_t keylen = strlen(key); return ck_str_n_hash_insert(key, keylen, value, table); } static int ck_hash_insert_nocopy(off_t key_offset, size_t keylen, uint64_t hash_key, const void *value, ck_hash_table_t *table) { if (table->capacity == 0) return 0; hash_key %= table->capacity; uint64_t end = (hash_key + table->capacity - 1) % table->capacity; while (hash_key != end) { ck_hash_entry_t *entry = &table->entries[hash_key]; if (table->entries[hash_key].key_length == 0) { table->count++; entry->key_offset = key_offset; entry->key_length = keylen; entry->value = value; return 1; } else if (entry->key_length == keylen && entry->key_offset == key_offset) { entry->value = value; return 1; } hash_key++; hash_key %= table->capacity; } return 0; } int ck_str_n_hash_insert(const char *key, size_t keylen, const void *value, ck_hash_table_t *table) { if (table->capacity == 0) return 0; if (keylen == 0) return 0; if (table->count >= 0.75 * table->capacity) { if (ck_hash_table_grow(table) == -1) { return 0; } } uint64_t hash_key = ck_hash_str(key, keylen); hash_key %= table->capacity; uint64_t end = hash_key; do { ck_hash_entry_t *entry = &table->entries[hash_key]; char *this_key = &table->keys[entry->key_offset]; if (entry->key_length == 0) { table->count++; while (table->keys_used + keylen > table->keys_capacity) { table->keys_capacity *= 2; table->keys = realloc(table->keys, table->keys_capacity); } memcpy(table->keys + table->keys_used, key, keylen); entry->key_offset = table->keys_used; entry->key_length = keylen; table->keys_used += keylen; entry->value = value; return 1; } else if (entry->key_length == keylen && memcmp(this_key, key, keylen) == 0) { table->entries[hash_key].value = value; return 1; } hash_key++; hash_key %= table->capacity; } while (hash_key != end); return 0; } ck_hash_table_t *ck_hash_table_init(size_t num_entries, size_t mean_key_length) { ck_hash_table_t *table; if ((table = malloc(sizeof(ck_hash_table_t))) == NULL) return NULL; if ((table->keys = malloc(num_entries * mean_key_length)) == NULL) { free(table); return NULL; } table->keys_capacity = num_entries * mean_key_length; num_entries *= 2; if ((table->entries = malloc(num_entries * sizeof(ck_hash_entry_t))) == NULL) { free(table->keys); free(table); return NULL; } table->capacity = num_entries; ck_hash_table_wipe(table); return table; } void ck_hash_table_free(ck_hash_table_t *table) { free(table->entries); if (table->keys) free(table->keys); free(table); } void ck_hash_table_wipe(ck_hash_table_t *table) { table->keys_used = 0; table->count = 0; memset(table->entries, 0, table->capacity * sizeof(ck_hash_entry_t)); } int ck_hash_table_grow(ck_hash_table_t *table) { ck_hash_entry_t *old_entries = table->entries; uint64_t old_capacity = table->capacity; uint64_t new_capacity = 2 * table->capacity; if ((table->entries = calloc(new_capacity, sizeof(ck_hash_entry_t))) == NULL) { return -1; } table->capacity = new_capacity; table->count = 0; for (int i=0; ikeys[old_entries[i].key_offset]; uint64_t hash_key = ck_hash_str(this_key, old_entries[i].key_length); if (!ck_hash_insert_nocopy(old_entries[i].key_offset, old_entries[i].key_length, hash_key, old_entries[i].value, table)) return -1; } } free(old_entries); return 0; } ReadStat-1.1.7/src/CKHashTable.h000066400000000000000000000024601410722155500162440ustar00rootroot00000000000000// CKHashTable - A simple hash table // Copyright 2010-2020 Evan Miller (see LICENSE) #include #include typedef struct ck_hash_entry_s { off_t key_offset; size_t key_length; const void *value; } ck_hash_entry_t; typedef struct ck_hash_table_s { size_t capacity; size_t count; ck_hash_entry_t *entries; char *keys; size_t keys_used; size_t keys_capacity; } ck_hash_table_t; int ck_str_hash_insert(const char *key, const void *value, ck_hash_table_t *table); const void *ck_str_hash_lookup(const char *key, ck_hash_table_t *table); int ck_str_n_hash_insert(const char *key, size_t keylen, const void *value, ck_hash_table_t *table); const void *ck_str_n_hash_lookup(const char *key, size_t keylen, ck_hash_table_t *table); int ck_float_hash_insert(float key, const void *value, ck_hash_table_t *table); const void *ck_float_hash_lookup(float key, ck_hash_table_t *table); int ck_double_hash_insert(double key, const void *value, ck_hash_table_t *table); const void *ck_double_hash_lookup(double key, ck_hash_table_t *table); ck_hash_table_t *ck_hash_table_init(size_t num_entries, size_t mean_key_length); void ck_hash_table_wipe(ck_hash_table_t *table); int ck_hash_table_grow(ck_hash_table_t *table); void ck_hash_table_free(ck_hash_table_t *table); ReadStat-1.1.7/src/bin/000077500000000000000000000000001410722155500145705ustar00rootroot00000000000000ReadStat-1.1.7/src/bin/extract_metadata.c000066400000000000000000000471741410722155500202630ustar00rootroot00000000000000#include "../readstat.h" #include #include #include #include #include #include "util/readstat_sav_date.h" #include "util/readstat_dta_days.h" #include "util/quote_and_escape.h" #include "util/file_format.h" #include "util/main.h" #include "extract_metadata.h" #include "write/json/write_missing_values.h" #include "write/json/write_value_labels.h" static const char* extract_metadata_type_str(extract_metadata_type_t t) { switch (t) { case EXTRACT_METADATA_TYPE_NUMERIC: return "NUMERIC"; case EXTRACT_METADATA_TYPE_STRING: return "STRING"; case EXTRACT_METADATA_TYPE_UNKNOWN: return "UNKNOWN"; } return "UNKNOWN"; } static const char* extract_metadata_format_str(extract_metadata_format_t format) { switch (format) { case EXTRACT_METADATA_FORMAT_NUMBER: return "NUMBER"; case EXTRACT_METADATA_FORMAT_PERCENT: return "PERCENT"; case EXTRACT_METADATA_FORMAT_CURRENCY: return "CURRENCY"; case EXTRACT_METADATA_FORMAT_DATE: return "DATE"; case EXTRACT_METADATA_FORMAT_TIME: return "TIME"; case EXTRACT_METADATA_FORMAT_DATE_TIME: return "DATE_TIME"; case EXTRACT_METADATA_FORMAT_UNSPECIFIED: return "UNSPECIFIED"; } return "UNSPECIFIED"; } static const char* readstat_type_str(readstat_type_t type) { switch (type) { case READSTAT_TYPE_STRING: return "READSTAT_TYPE_STRING"; case READSTAT_TYPE_INT8: return "READSTAT_TYPE_INT8"; case READSTAT_TYPE_INT16: return "READSTAT_TYPE_INT16"; case READSTAT_TYPE_INT32: return "READSTAT_TYPE_INT32"; case READSTAT_TYPE_FLOAT: return "READSTAT_TYPE_FLOAT"; case READSTAT_TYPE_DOUBLE: return "READSTAT_TYPE_DOUBLE"; case READSTAT_TYPE_STRING_REF: return "READSTAT_TYPE_STRING_REF"; } return "UNKNOWN TYPE"; } static int extract_decimals(const char *s, char prefix) { if (s && s[0] && s[0]==prefix) { int decimals; if (sscanf(s, "%*c%*d.%d", &decimals) == 1) { if (decimals < 0 || decimals > 16) { fprintf(stderr, "%s:%d decimals was %d, expected to be [0, 16]\n", __FILE__, __LINE__, decimals); exit(EXIT_FAILURE); } return decimals; } fprintf(stderr, "%s:%d not a number: %s\n", __FILE__, __LINE__, &s[1]); exit(EXIT_FAILURE); } else { return -1; } } int hasPrefix(const char *str, char *prefix) { return strncmp(str, prefix, sizeof(prefix)-1); } static int handle_variable_sav(int index, readstat_variable_t *variable, const char *val_labels, struct context *ctx) { extract_metadata_type_t type = EXTRACT_METADATA_TYPE_UNKNOWN; extract_metadata_format_t format = EXTRACT_METADATA_FORMAT_UNSPECIFIED; char *pattern = ""; int decimals = -1; const char *vformat = readstat_variable_get_format(variable); const char *label = readstat_variable_get_label(variable); switch (readstat_variable_get_type_class(variable)) { case READSTAT_TYPE_CLASS_STRING: type = EXTRACT_METADATA_TYPE_STRING; break; case READSTAT_TYPE_CLASS_NUMERIC: type = EXTRACT_METADATA_TYPE_NUMERIC; // Extract format // SPSS data types: https://libguides.library.kent.edu/SPSS/DatesTime // Pattern formats: https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns // TODO: Extract currency if (vformat) { if (hasPrefix(vformat, "DATE9") == 0) { // e.g. 31-JAN-13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "dd-MMM-yy"; } else if (hasPrefix(vformat, "DATE11") == 0) { // e.g. 31-JAN-13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "dd-MMM-yyyy"; } else if (hasPrefix(vformat, "ADATE8") == 0) { // e.g. 01/31/13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "MM/dd/yy"; } else if (hasPrefix(vformat, "ADATE10") == 0) { // e.g. 01/31/2013 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "MM/dd/yyyy"; } else if (hasPrefix(vformat, "EDATE8") == 0) { // e.g. 31.01.13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "dd.MM.yy"; } else if (hasPrefix(vformat, "EDATE10") == 0) { // e.g. 31.01.2013 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "dd.MM.yyyy"; } else if (hasPrefix(vformat, "SDATE8") == 0) { // e.g. 13/01/31 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "yy/MM/dd"; } else if (hasPrefix(vformat, "SDATE10") == 0) { // e.g. 2013/01/31 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "yyyy/MM/dd"; } else if (hasPrefix(vformat, "DATETIME17") == 0) { // e.g. 31-JAN-2013 01:02 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "dd-MMM-yyyy hh:mm"; } else if (hasPrefix(vformat, "DATETIME20") == 0) { // e.g. 31-JAN-2013 01:02:33 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "dd-MMM-yyyy hh:mm:ss"; } else if (hasPrefix(vformat, "DATETIME23.2") == 0) { // e.g. 31-JAN-2013 01:02:33.72 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "dd-MMM-yyyy hh:mm:ss.SS+"; } else if (hasPrefix(vformat, "YMDHMS16") == 0) { // e.g. 2013-01-31 1:02 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "yyyy-MM-dd h:mm"; } else if (hasPrefix(vformat, "YMDHMS19") == 0) { // e.g. 2013-01-31 1:02:33 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "yyyy-MM-dd h:mm:ss"; } else if (hasPrefix(vformat, "YMDHMS19.2") == 0) { // e.g. 2013-01-31 1:02:33.72 format = EXTRACT_METADATA_FORMAT_DATE_TIME; pattern = "yyyy-MM-dd h:mm:ss.SS+"; } else if (hasPrefix(vformat, "MTIME5") == 0) { // e.g. 1754:36 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[m+]:[s+]"; } else if (hasPrefix(vformat, "MTIME8.2") == 0) { // e.g. 1754:36.58 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[m+]:[s+]"; } else if (hasPrefix(vformat, "TIME5") == 0) { // e.g. 29:14 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[h+]:[m+]"; } else if (hasPrefix(vformat, "TIME8") == 0) { // e.g. 29:14:36 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[h+]:[m+]:[s+]"; } else if (hasPrefix(vformat, "TIME11.2") == 0) { // e.g. 29:14:36.58 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[h+]:[m+]:[s+]"; } else if (hasPrefix(vformat, "DTIME9") == 0) { // e.g. 1 05:14 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[d+] [h+]:[m+]"; } else if (hasPrefix(vformat, "DTIME12") == 0) { // e.g. 1 05:14:36 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[d+] [h+]:[m+]:[s+]"; } else if (hasPrefix(vformat, "DTIME15.2") == 0) { // e.g. 1 05:14:36.58 format = EXTRACT_METADATA_FORMAT_TIME; pattern = "[d+] [h+]:[m+]:[s+]"; } else if (hasPrefix(vformat, "JDATE5") == 0) { // e.g. 13031 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "yyddd"; } else if (hasPrefix(vformat, "JDATE7") == 0) { // e.g. 2013031 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "yyyyddd"; } else if (hasPrefix(vformat, "QYR6") == 0) { // e.g. 1 Q 13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "Q 'Q' y"; } else if (hasPrefix(vformat, "QYR8") == 0) { // e.g. 1 Q 2013 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "Q 'Q' yyyy"; } else if (hasPrefix(vformat, "MOYR6") == 0) { // e.g. JAN 13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "mmm yy"; } else if (hasPrefix(vformat, "MOYR8") == 0) { // e.g. JAN 2013 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "mmm yyyy"; } else if (hasPrefix(vformat, "WKYR8") == 0) { // e.g. 5 WK 13 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "w 'WK' yy"; } else if (hasPrefix(vformat, "WKYR10") == 0) { // e.g. 5 WK 2013 format = EXTRACT_METADATA_FORMAT_DATE; pattern = "w 'WK' yyyy"; } else if (hasPrefix(vformat, "WKDAY3") == 0) { // Day of the week, three letter abbreviation (e.g. "Mon"). format = EXTRACT_METADATA_FORMAT_DATE; pattern = "eee"; } else if (hasPrefix(vformat, "WKDAY9") == 0) { // Day of the week, full name. (e.g. "Monday") format = EXTRACT_METADATA_FORMAT_DATE; pattern = "eeee"; } else if (hasPrefix(vformat, "MONTH3") == 0) { // Three letter month abbreviation (e.g. "Feb"). format = EXTRACT_METADATA_FORMAT_DATE; pattern = "MMM"; } else if (hasPrefix(vformat, "MONTH9") == 0) { // Full month name. (e.g. "February") format = EXTRACT_METADATA_FORMAT_DATE; pattern = "MMMM"; } else { format = EXTRACT_METADATA_FORMAT_NUMBER; decimals = extract_decimals(vformat, 'F'); } } else { format = EXTRACT_METADATA_FORMAT_UNSPECIFIED; } break; default: fprintf(stderr, "%s:%d unhandled type %s\n", __FILE__, __LINE__, readstat_type_str(variable->type)); exit(EXIT_FAILURE); break; } if (ctx->count == 0) { ctx->count = 1; fprintf(ctx->fp, "{\"type\": \"SPSS\",\n \"variables\": [\n"); } else { fprintf(ctx->fp, ",\n"); } fprintf(ctx->fp, "{\"type\": \"%s\", \"name\": \"%s\"", extract_metadata_type_str(type), variable->name ); if (type == EXTRACT_METADATA_TYPE_NUMERIC) { fprintf(ctx->fp, ", \"format\": \"%s\"", extract_metadata_format_str(format)); if (pattern && pattern[0]) { fprintf(ctx->fp, ", \"pattern\": \"%s\"", pattern); } } if (decimals > 0) { fprintf(ctx->fp, ", \"decimals\": %d", decimals); } if (label) { char* quoted_label = quote_and_escape(label); fprintf(ctx->fp, ", \"label\": %s", quoted_label); free(quoted_label); } add_val_labels(ctx, variable, val_labels); add_missing_values(ctx, variable); fprintf(ctx->fp, "}"); return 0; } static int handle_variable_dta(int index, readstat_variable_t *variable, const char *val_labels, struct context *ctx) { extract_metadata_type_t type = EXTRACT_METADATA_TYPE_UNKNOWN; extract_metadata_format_t format = EXTRACT_METADATA_FORMAT_UNSPECIFIED; char *pattern = ""; const char *vformat = readstat_variable_get_format(variable); const char *label = readstat_variable_get_label(variable); int decimals = -1; switch (readstat_variable_get_type_class(variable)) { case READSTAT_TYPE_CLASS_STRING: type = EXTRACT_METADATA_TYPE_STRING; break; case READSTAT_TYPE_CLASS_NUMERIC: type = EXTRACT_METADATA_TYPE_NUMERIC; // Extract format // Pattern formats: https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns if (vformat) { if (strcmp(vformat, "%d") == 0) { format = EXTRACT_METADATA_FORMAT_DATE; } else if (strcmp(vformat, "%td") == 0) { format = EXTRACT_METADATA_FORMAT_DATE_TIME; } } else { format = EXTRACT_METADATA_FORMAT_NUMBER; decimals = extract_decimals(vformat, '%'); } break; default: fprintf(stderr, "%s:%d unhandled type %s\n", __FILE__, __LINE__, readstat_type_str(variable->type)); exit(EXIT_FAILURE); break; } if (ctx->count == 0) { ctx->count = 1; fprintf(ctx->fp, "{\"type\": \"STATA\",\n \"variables\": [\n"); } else { fprintf(ctx->fp, ",\n"); } fprintf(ctx->fp, "{\"type\": \"%s\", \"name\": \"%s\"", extract_metadata_type_str(type), variable->name ); if (type == EXTRACT_METADATA_TYPE_NUMERIC) { fprintf(ctx->fp, ", \"format\": \"%s\"", extract_metadata_format_str(format)); if (pattern && pattern[0]) { fprintf(ctx->fp, ", \"pattern\": \"%s\"", pattern); } } if (decimals > 0) { fprintf(ctx->fp, ", \"decimals\": %d", decimals); } if (label) { char* quoted_label = quote_and_escape(label); fprintf(ctx->fp, ", \"label\": %s", quoted_label); free(quoted_label); } add_val_labels(ctx, variable, val_labels); add_missing_values(ctx, variable); fprintf(ctx->fp, "}"); return 0; } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *my_ctx) { struct context *ctx = (struct context *)my_ctx; if (ctx->input_format == RS_FORMAT_SAV) { return handle_variable_sav(index, variable, val_labels, ctx); } else if (ctx->input_format == RS_FORMAT_DTA) { return handle_variable_dta(index, variable, val_labels, ctx); } else { fprintf(stderr, "%s:%d unsupported output format %d\n", __FILE__, __LINE__, ctx->input_format); exit(EXIT_FAILURE); } } static readstat_label_set_t * get_or_create_label_set(const char *val_labels, struct context *ctx) { for (int i=0; ivariable_count; i++) { readstat_label_set_t * lbl = &ctx->label_set[i]; if (0 == strcmp(lbl->name, val_labels)) { return lbl; } } ctx->variable_count++; ctx->label_set = realloc(ctx->label_set, ctx->variable_count*sizeof(readstat_label_set_t)); if (!ctx->label_set) { fprintf(stderr, "%s:%d realloc error: %s\n", __FILE__, __LINE__, strerror(errno)); return NULL; } readstat_label_set_t * lbl = &ctx->label_set[ctx->variable_count-1]; memset(lbl, 0, sizeof(readstat_label_set_t)); snprintf(lbl->name, sizeof(lbl->name), "%s", val_labels); return lbl; } static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *c) { struct context *ctx = (struct context*)c; if (value.type == READSTAT_TYPE_DOUBLE || value.type == READSTAT_TYPE_STRING || value.type == READSTAT_TYPE_INT32) { readstat_label_set_t * label_set = get_or_create_label_set(val_labels, ctx); if (!label_set) { return READSTAT_ERROR_MALLOC; } long label_idx = label_set->value_labels_count; label_set->value_labels = realloc(label_set->value_labels, (1 + label_idx) * sizeof(readstat_value_label_t)); if (!label_set->value_labels) { fprintf(stderr, "%s:%d realloc error: %s\n", __FILE__, __LINE__, strerror(errno)); return READSTAT_ERROR_MALLOC; } readstat_value_label_t* value_label = &label_set->value_labels[label_idx]; memset(value_label, 0, sizeof(readstat_value_label_t)); if (value.type == READSTAT_TYPE_DOUBLE) { value_label->double_key = value.v.double_value; } else if (value.type == READSTAT_TYPE_STRING) { char *string_key = malloc(strlen(value.v.string_value) + 1); strcpy(string_key, value.v.string_value); value_label->string_key = string_key; value_label->string_key_len = strlen(value.v.string_value); } else if (value.type == READSTAT_TYPE_INT32) { value_label->int32_key = value.v.i32_value; } else { fprintf(stderr, "%s:%d unsupported type!\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } char *lbl = malloc(strlen(label) + 1); strcpy(lbl, label); value_label->label = lbl; value_label->label_len = strlen(label); label_set->value_labels_count++; } else { fprintf(stderr, "%s:%d Unhandled value.type %d\n", __FILE__, __LINE__, value.type); exit(EXIT_FAILURE); } return READSTAT_OK; } int pass(struct context *ctx, char *input, char *output, int pass) { if (pass==2) { FILE* fp = fopen(output, "w"); if (fp == NULL) { fprintf(stderr, "Could not open %s for writing: %s\n", output, strerror(errno)); exit(EXIT_FAILURE); } ctx->fp = fp; } else { ctx->fp = NULL; } int ret = 0; readstat_error_t error = READSTAT_OK; readstat_parser_t *parser = readstat_parser_init(); if (pass == 1) { readstat_set_value_label_handler(parser, &handle_value_label); } else if (pass == 2) { readstat_set_variable_handler(parser, &handle_variable); } const char *filename = input; size_t len = strlen(filename); if (len < sizeof(".dta") -1) { fprintf(stderr, "Unknown input format\n"); ret = 1; goto cleanup; } if (strncmp(filename + len - 4, ".sav", 4) == 0) { fprintf(stdout, "parsing sav file\n"); error = readstat_parse_sav(parser, input, ctx); } else if (strncmp(filename + len - 4, ".dta", 4) == 0) { fprintf(stdout, "parsing dta file\n"); error = readstat_parse_dta(parser, input, ctx); } else { fprintf(stderr, "Unsupported input format\n"); ret = 1; goto cleanup; } if (error != READSTAT_OK) { fprintf(stderr, "Error processing %s: %s (%d)\n", input, readstat_error_message(error), error); ret = 1; } else { if (ctx->fp) { fprintf(ctx->fp, "]}\n"); fprintf(ctx->fp, "\n"); } } cleanup: readstat_parser_free(parser); if (ctx->fp) { fclose(ctx->fp); } if (pass==2 && ctx->variable_count >=1) { for (int i=0; ivariable_count; i++) { readstat_label_set_t * label_set = &ctx->label_set[i]; for (int j=0; jvalue_labels_count; j++) { readstat_value_label_t* value_label = &label_set->value_labels[j]; if (value_label->string_key) { free(value_label->string_key); } if (value_label->label) { free(value_label->label); } } free(label_set->value_labels); } free(ctx->label_set); } fprintf(stdout, "pass %d done\n", pass); return ret; } int portable_main(int argc, char *argv[]) { if (argc != 3) { printf("Usage: %s \n", argv[0]); return 1; } int ret = 0; struct context ctx; memset(&ctx, 0, sizeof(struct context)); ctx.input_format = readstat_format(argv[1]); ret = pass(&ctx, argv[1], argv[2], 1); if (!ret) { ret = pass(&ctx, argv[1], argv[2], 2); } printf("extract_metadata exiting\n"); return ret; } ReadStat-1.1.7/src/bin/extract_metadata.h000066400000000000000000000013441410722155500202550ustar00rootroot00000000000000#ifndef __EXTRACT_METADATA_H #define __EXTRACT_METADATA_H #include "../readstat.h" typedef struct context { int count; FILE* fp; int variable_count; int input_format; readstat_label_set_t *label_set; } context; typedef enum extract_metadata_type_e { EXTRACT_METADATA_TYPE_NUMERIC, EXTRACT_METADATA_TYPE_STRING, EXTRACT_METADATA_TYPE_UNKNOWN } extract_metadata_type_t; typedef enum extract_metadata_format_e { EXTRACT_METADATA_FORMAT_NUMBER, EXTRACT_METADATA_FORMAT_PERCENT, EXTRACT_METADATA_FORMAT_CURRENCY, EXTRACT_METADATA_FORMAT_DATE, EXTRACT_METADATA_FORMAT_TIME, EXTRACT_METADATA_FORMAT_DATE_TIME, EXTRACT_METADATA_FORMAT_UNSPECIFIED } extract_metadata_format_t; #endif ReadStat-1.1.7/src/bin/read_csv/000077500000000000000000000000001410722155500163565ustar00rootroot00000000000000ReadStat-1.1.7/src/bin/read_csv/LICENSE000066400000000000000000000021631410722155500173650ustar00rootroot00000000000000jsmn.* is from https://github.com/zserge/jsmn with the following LICENSE file: Copyright (c) 2010 Serge A. Zaitsev Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.ReadStat-1.1.7/src/bin/read_csv/csv_metadata.h000066400000000000000000000005671410722155500211720ustar00rootroot00000000000000 typedef struct csv_metadata { int pass; long rows; long columns; long _columns; long _rows; int output_format; size_t* column_width; int open_row; readstat_callbacks_t handle; void *user_ctx; readstat_variable_t *variables; int* is_date; struct json_metadata *json_md; rs_read_module_t *output_module; } csv_metadata; ReadStat-1.1.7/src/bin/read_csv/jsmn.c000066400000000000000000000171111410722155500174720ustar00rootroot00000000000000#include "jsmn.h" /** * Allocates a fresh unused token from the token pull. */ static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, jsmntok_t *tokens, size_t num_tokens) { jsmntok_t *tok; if (parser->toknext >= num_tokens) { return NULL; } tok = &tokens[parser->toknext++]; tok->start = tok->end = -1; tok->size = 0; #ifdef JSMN_PARENT_LINKS tok->parent = -1; #endif return tok; } /** * Fills token type and boundaries. */ static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, int start, int end) { token->type = type; token->start = start; token->end = end; token->size = 0; } /** * Fills next available token with JSON primitive. */ static int jsmn_parse_primitive(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens) { jsmntok_t *token; int start; start = parser->pos; for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { switch (js[parser->pos]) { #ifndef JSMN_STRICT /* In strict mode primitive must be followed by "," or "}" or "]" */ case ':': #endif case '\t' : case '\r' : case '\n' : case ' ' : case ',' : case ']' : case '}' : goto found; } if (js[parser->pos] < 32 || js[parser->pos] >= 127) { parser->pos = start; return JSMN_ERROR_INVAL; } } #ifdef JSMN_STRICT /* In strict mode primitive must be followed by a comma/object/array */ parser->pos = start; return JSMN_ERROR_PART; #endif found: if (tokens == NULL) { parser->pos--; return 0; } token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) { parser->pos = start; return JSMN_ERROR_NOMEM; } jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; #endif parser->pos--; return 0; } /** * Fills next token with JSON string. */ static int jsmn_parse_string(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, size_t num_tokens) { jsmntok_t *token; int start = parser->pos; parser->pos++; /* Skip starting quote */ for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { char c = js[parser->pos]; /* Quote: end of string */ if (c == '\"') { if (tokens == NULL) { return 0; } token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) { parser->pos = start; return JSMN_ERROR_NOMEM; } jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos); #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; #endif return 0; } /* Backslash: Quoted symbol expected */ if (c == '\\' && parser->pos + 1 < len) { int i; parser->pos++; switch (js[parser->pos]) { /* Allowed escaped symbols */ case '\"': case '/' : case '\\' : case 'b' : case 'f' : case 'r' : case 'n' : case 't' : break; /* Allows escaped symbol \uXXXX */ case 'u': parser->pos++; for(i = 0; i < 4 && parser->pos < len && js[parser->pos] != '\0'; i++) { /* If it isn't a hex character we have an error */ if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ parser->pos = start; return JSMN_ERROR_INVAL; } parser->pos++; } parser->pos--; break; /* Unexpected symbol */ default: parser->pos = start; return JSMN_ERROR_INVAL; } } } parser->pos = start; return JSMN_ERROR_PART; } /** * Parse JSON string and fill tokens. */ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, unsigned int num_tokens) { int r; int i; jsmntok_t *token; int count = parser->toknext; for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { char c; jsmntype_t type; c = js[parser->pos]; switch (c) { case '{': case '[': count++; if (tokens == NULL) { break; } token = jsmn_alloc_token(parser, tokens, num_tokens); if (token == NULL) return JSMN_ERROR_NOMEM; if (parser->toksuper != -1) { tokens[parser->toksuper].size++; #ifdef JSMN_PARENT_LINKS token->parent = parser->toksuper; #endif } token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); token->start = parser->pos; parser->toksuper = parser->toknext - 1; break; case '}': case ']': if (tokens == NULL) break; type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); #ifdef JSMN_PARENT_LINKS if (parser->toknext < 1) { return JSMN_ERROR_INVAL; } token = &tokens[parser->toknext - 1]; for (;;) { if (token->start != -1 && token->end == -1) { if (token->type != type) { return JSMN_ERROR_INVAL; } token->end = parser->pos + 1; parser->toksuper = token->parent; break; } if (token->parent == -1) { break; } token = &tokens[token->parent]; } #else for (i = parser->toknext - 1; i >= 0; i--) { token = &tokens[i]; if (token->start != -1 && token->end == -1) { if (token->type != type) { return JSMN_ERROR_INVAL; } parser->toksuper = -1; token->end = parser->pos + 1; break; } } /* Error if unmatched closing bracket */ if (i == -1) return JSMN_ERROR_INVAL; for (; i >= 0; i--) { token = &tokens[i]; if (token->start != -1 && token->end == -1) { parser->toksuper = i; break; } } #endif break; case '\"': r = jsmn_parse_string(parser, js, len, tokens, num_tokens); if (r < 0) return r; count++; if (parser->toksuper != -1 && tokens != NULL) tokens[parser->toksuper].size++; break; case '\t' : case '\r' : case '\n' : case ' ': break; case ':': parser->toksuper = parser->toknext - 1; break; case ',': if (tokens != NULL && parser->toksuper != -1 && tokens[parser->toksuper].type != JSMN_ARRAY && tokens[parser->toksuper].type != JSMN_OBJECT) { #ifdef JSMN_PARENT_LINKS parser->toksuper = tokens[parser->toksuper].parent; #else for (i = parser->toknext - 1; i >= 0; i--) { if (tokens[i].type == JSMN_ARRAY || tokens[i].type == JSMN_OBJECT) { if (tokens[i].start != -1 && tokens[i].end == -1) { parser->toksuper = i; break; } } } #endif } break; #ifdef JSMN_STRICT /* In strict mode primitives are: numbers and booleans */ case '-': case '0': case '1' : case '2': case '3' : case '4': case '5': case '6': case '7' : case '8': case '9': case 't': case 'f': case 'n' : /* And they must not be keys of the object */ if (tokens != NULL && parser->toksuper != -1) { jsmntok_t *t = &tokens[parser->toksuper]; if (t->type == JSMN_OBJECT || (t->type == JSMN_STRING && t->size != 0)) { return JSMN_ERROR_INVAL; } } #else /* In non-strict mode every unquoted value is a primitive */ default: #endif r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); if (r < 0) return r; count++; if (parser->toksuper != -1 && tokens != NULL) tokens[parser->toksuper].size++; break; #ifdef JSMN_STRICT /* Unexpected char in strict mode */ default: return JSMN_ERROR_INVAL; #endif } } if (tokens != NULL) { for (i = parser->toknext - 1; i >= 0; i--) { /* Unmatched opened object or array */ if (tokens[i].start != -1 && tokens[i].end == -1) { return JSMN_ERROR_PART; } } } return count; } /** * Creates a new parser based over a given buffer with an array of tokens * available. */ void jsmn_init(jsmn_parser *parser) { parser->pos = 0; parser->toknext = 0; parser->toksuper = -1; } ReadStat-1.1.7/src/bin/read_csv/jsmn.h000066400000000000000000000031361410722155500175010ustar00rootroot00000000000000#ifndef __JSMN_H_ #define __JSMN_H_ #include #ifdef __cplusplus extern "C" { #endif /** * JSON type identifier. Basic types are: * o Object * o Array * o String * o Other primitive: number, boolean (true/false) or null */ typedef enum { JSMN_UNDEFINED = 0, JSMN_OBJECT = 1, JSMN_ARRAY = 2, JSMN_STRING = 3, JSMN_PRIMITIVE = 4 } jsmntype_t; enum jsmnerr { /* Not enough tokens were provided */ JSMN_ERROR_NOMEM = -1, /* Invalid character inside JSON string */ JSMN_ERROR_INVAL = -2, /* The string is not a full JSON packet, more bytes expected */ JSMN_ERROR_PART = -3 }; /** * JSON token description. * type type (object, array, string etc.) * start start position in JSON data string * end end position in JSON data string */ typedef struct { jsmntype_t type; int start; int end; int size; #ifdef JSMN_PARENT_LINKS int parent; #endif } jsmntok_t; /** * JSON parser. Contains an array of token blocks available. Also stores * the string being parsed now and current position in that string */ typedef struct { unsigned int pos; /* offset in the JSON string */ unsigned int toknext; /* next token to allocate */ int toksuper; /* superior token node, e.g parent object or array */ } jsmn_parser; /** * Create JSON parser over an array of tokens */ void jsmn_init(jsmn_parser *parser); /** * Run JSON parser. It parses a JSON data string into and array of tokens, each describing * a single JSON object. */ int jsmn_parse(jsmn_parser *parser, const char *js, size_t len, jsmntok_t *tokens, unsigned int num_tokens); #ifdef __cplusplus } #endif #endif /* __JSMN_H_ */ ReadStat-1.1.7/src/bin/read_csv/json_metadata.c000066400000000000000000000214071410722155500213370ustar00rootroot00000000000000#include #include #include #include #include "jsmn.h" #include "json_metadata.h" #include "../../readstat.h" #include "../util/file_format.h" /* Function realloc_it() is a wrapper function for standart realloc() * with one difference - it frees old memory pointer in case of realloc * failure. Thus, DO NOT use old data pointer in anyway after call to * realloc_it(). If your code has some kind of fallback algorithm if * memory can't be re-allocated - use standart realloc() instead. */ static inline void *realloc_it(void *ptrmem, size_t size) { void *p = realloc(ptrmem, size); if (!p) { free (ptrmem); fprintf(stderr, "realloc(): errno=%d\n", errno); } return p; } int slurp_object(jsmntok_t *t) { int res = 1; for (int i=0; isize; i++) { res+= slurp_object((t+res)); } return res; } int match_token(const char *js, jsmntok_t *tok, const char* name) { unsigned int len = tok->end - tok->start; return (tok->type == JSMN_STRING) && (len == strlen(name)) && (0==strncmp(js+tok->start, name, len)); } jsmntok_t* find_object_property(const char *js, jsmntok_t *t, const char* propname) { int j = 0; for (int i = 0; i < t->size; i++) { jsmntok_t* tok = t+1+j; if (match_token(js, tok, propname)) { return tok+1; } j+= slurp_object(tok); } return 0; } char* get_object_property(const char *js, jsmntok_t *t, const char* propname, char* dest, size_t size) { jsmntok_t* tok = find_object_property(js, t, propname); if (!tok) { return NULL; } snprintf(dest, size, "%.*s", tok->end-tok->start, js+tok->start); return dest; } unsigned char get_separator(struct json_metadata* md) { jsmntok_t* token = find_object_property(md->js, md->tok, "separator"); if (!token) { return ','; } else { int len = token->end - token->start; const char *tokenstr = md->js + token->start; if (len == 1) { return tokenstr[0]; } else if (len == 2 && tokenstr[0] == '\\' && tokenstr[1]=='t') { return '\t'; } else { return ','; } } } jsmntok_t* find_variable_property(const char *js, jsmntok_t *t, const char* varname, const char* property) { if (t->type != JSMN_OBJECT) { fprintf(stderr, "expected root token to be OBJECT\n"); return 0; } jsmntok_t* variables = find_object_property(js, t, "variables"); if (!variables) { fprintf(stderr, "Could not find variables property\n"); return 0; } int j = 0; for (int i=0; isize; i++) { jsmntok_t* variable = variables+1+j; jsmntok_t* name = find_object_property(js, variable, "name"); if (name && match_token(js, name, varname)) { return find_object_property(js, variable, property); } else if (name == 0) { fprintf(stderr, "name property not found\n"); } j += slurp_object(variable); } return 0; } char* copy_variable_property(struct json_metadata* md, const char* varname, const char* property, char* dest, size_t maxsize) { jsmntok_t* tok = find_variable_property(md->js, md->tok, varname, property); if (tok == NULL) { return NULL; } int len = tok->end - tok->start; if (len == 0) { return NULL; } snprintf(dest, maxsize, "%.*s", len, md->js+tok->start); return dest; } int missing_string_idx(struct json_metadata* md, const char* varname, char* v) { jsmntok_t* missing = find_variable_property(md->js, md->tok, varname, "missing"); if (!missing) { return 0; } jsmntok_t* values = find_object_property(md->js, missing, "values"); if (!values) { return 0; } int j = 1; for (int i=0; isize; i++) { jsmntok_t* value = values+j; int len = value->end - value->start; if (len == strlen(v)) { if (0 == strncmp(v, md->js + value->start, len)) { return i+1; } } j+= slurp_object(value); } return 0; } int missing_double_idx(struct json_metadata* md, const char* varname, double v) { jsmntok_t* missing = find_variable_property(md->js, md->tok, varname, "missing"); if (!missing) { return 0; } jsmntok_t* values = find_object_property(md->js, missing, "values"); if (!values) { return 0; } int j = 1; for (int i=0; isize; i++) { jsmntok_t* value = values+j; int len = value->end - value->start; char tmp[1024]; snprintf(tmp, sizeof(tmp), "%.*s", len, md->js + value->start); char *dest; double vv = strtod(tmp, &dest); if (dest == tmp) { fprintf(stderr, "Expected a number: %s\n", tmp); exit(EXIT_FAILURE); } if (vv == v) { return i+1; } j+= slurp_object(value); } return 0; } int get_decimals(struct json_metadata* md, const char* varname) { jsmntok_t* decimals_tok = find_variable_property(md->js, md->tok, varname, "decimals"); if (!decimals_tok) { return 0; } else { char *dest; char *buf = md->js + decimals_tok->start; long int decimals = strtol(buf, &dest, 10); if (dest == buf) { fprintf(stderr, "%s:%d not a number: %.*s\n", __FILE__, __LINE__, decimals_tok->end-decimals_tok->start, buf); exit(EXIT_FAILURE); } return decimals; } } extract_metadata_type_t column_type(struct json_metadata* md, const char* varname, int output_format) { jsmntok_t* typ = find_variable_property(md->js, md->tok, varname, "type"); if (!typ) { fprintf(stderr, "Could not find type of variable %s in metadata\n", varname); exit(EXIT_FAILURE); } if (match_token(md->js, typ, "NUMERIC")) { return EXTRACT_METADATA_TYPE_NUMERIC; } else if (match_token(md->js, typ, "STRING")) { return EXTRACT_METADATA_TYPE_STRING; } else { fprintf(stderr, "%s: %d: Unknown metadata type for variable %s\n", __FILE__, __LINE__, varname); exit(EXIT_FAILURE); } } extract_metadata_format_t column_format(struct json_metadata* md, const char* varname) { jsmntok_t* typ = find_variable_property(md->js, md->tok, varname, "format"); if (!typ) { return EXTRACT_METADATA_FORMAT_UNSPECIFIED; } if (match_token(md->js, typ, "NUMBER")) { return EXTRACT_METADATA_FORMAT_NUMBER; } else if (match_token(md->js, typ, "PERCENT")) { return EXTRACT_METADATA_FORMAT_PERCENT; } else if (match_token(md->js, typ, "CURRENCY")) { return EXTRACT_METADATA_FORMAT_CURRENCY; } else if (match_token(md->js, typ, "DATE")) { return EXTRACT_METADATA_FORMAT_DATE; } else if (match_token(md->js, typ, "TIME")) { return EXTRACT_METADATA_FORMAT_TIME; } else if (match_token(md->js, typ, "DATE_TIME")) { return EXTRACT_METADATA_FORMAT_DATE_TIME; } return EXTRACT_METADATA_FORMAT_UNSPECIFIED; } double get_double_from_token(const char *js, jsmntok_t* token) { char buf[255]; char *dest; int len = token->end - token->start; snprintf(buf, sizeof(buf), "%.*s", len, js + token->start); double val = strtod(buf, &dest); if (buf == dest) { fprintf(stderr, "%s:%d failed to parse double: %s\n", __FILE__, __LINE__, buf); exit(EXIT_FAILURE); } return val; } struct json_metadata* get_json_metadata(const char* filename) { struct json_metadata* result = malloc(sizeof(struct json_metadata)); if (result == NULL) { fprintf(stderr, "%s: %d: malloc failed: %s\n", __FILE__, __LINE__, strerror(errno)); return 0; } int r; int eof_expected = 0; char *js = NULL; size_t jslen = 0; char buf[BUFSIZ]; FILE* fd = NULL; jsmn_parser p; jsmntok_t *tok = NULL; size_t tokcount = 10; /* Prepare parser */ jsmn_init(&p); /* Allocate some tokens as a start */ tok = malloc(sizeof(*tok) * tokcount); if (tok == NULL) { fprintf(stderr, "malloc(): error:%s\n", strerror(errno)); goto errexit; } fd = fopen(filename, "rb"); if (fd == NULL) { fprintf(stderr, "Could not open %s: %s\n", filename, strerror(errno)); goto errexit; } for (;;) { /* Read another chunk */ r = fread(buf, 1, sizeof(buf), fd); if (r < 0) { fprintf(stderr, "fread(): %s\n", strerror(errno)); goto errexit; } if (r == 0) { if (eof_expected != 0) { break; } else { fprintf(stderr, "fread(): unexpected EOF\n"); goto errexit; } } js = realloc_it(js, jslen + r + 1); if (js == NULL) { goto errexit; } strncpy(js + jslen, buf, r); jslen = jslen + r; again: r = jsmn_parse(&p, js, jslen, tok, tokcount); if (r < 0) { if (r == JSMN_ERROR_NOMEM) { tokcount = tokcount * 2; tok = realloc_it(tok, sizeof(*tok) * tokcount); if (tok == NULL) { goto errexit; } goto again; } } else { eof_expected = 1; } } fclose(fd); result->tok = tok; result->js = js; return result; errexit: fprintf(stderr, "error during json metadata parsing\n"); if (fd) { fclose(fd); fd = NULL; } if (tok) { free(tok); tok = NULL; } if (js) { free(js); js = NULL; } if (result) { free(result); result = NULL; } return NULL; } void free_json_metadata(struct json_metadata* md) { free(md->tok); free(md->js); free(md); } ReadStat-1.1.7/src/bin/read_csv/json_metadata.h000066400000000000000000000026211410722155500213410ustar00rootroot00000000000000#include "jsmn.h" #include "../../readstat.h" #include "../extract_metadata.h" #ifndef __JSON_METADATA_H_ #define __JSON_METADATA_H_ typedef struct json_metadata { char* js; jsmntok_t* tok; } json_metadata; struct json_metadata* get_json_metadata(const char* filename); extract_metadata_type_t column_type(struct json_metadata* md, const char* varname, int output_format); extract_metadata_format_t column_format(struct json_metadata* md, const char* varname); void free_json_metadata(struct json_metadata*); int get_decimals(struct json_metadata* md, const char* varname); unsigned char get_separator(struct json_metadata* md); int missing_double_idx(struct json_metadata* md, const char* varname, double v); int missing_string_idx(struct json_metadata* md, const char* varname, char* v); char* copy_variable_property(struct json_metadata* md, const char* varname, const char* property, char* dest, size_t maxsize); jsmntok_t* find_variable_property(const char *js, jsmntok_t *t, const char* varname, const char* property); int slurp_object(jsmntok_t *t); jsmntok_t* find_object_property(const char *js, jsmntok_t *t, const char* propname); char* get_object_property(const char *js, jsmntok_t *t, const char* propname, char* dest, size_t size); int match_token(const char *js, jsmntok_t *tok, const char* name); double get_double_from_token(const char *js, jsmntok_t* token); #endif /* __JSON_METADATA_H_ */ ReadStat-1.1.7/src/bin/read_csv/mod_csv.c000066400000000000000000000052351410722155500201610ustar00rootroot00000000000000#include #include "../../readstat.h" #include "../extract_metadata.h" #include "json_metadata.h" #include "read_module.h" #include "csv_metadata.h" #include "value.h" #include "../util/file_format.h" static void produce_column_header_csv(void *csv_metadata, const char *column, readstat_variable_t* var); void produce_csv_value_csv(void *csv_metadata, const char *s, size_t len); rs_read_module_t rs_read_mod_csv = { .format = RS_FORMAT_CSV, .header = &produce_column_header_csv, .csv_value = &produce_csv_value_csv }; static void produce_column_header_csv(void *csv_metadata, const char *column, readstat_variable_t* var) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; extract_metadata_type_t coltype = column_type(c->json_md, column, c->output_format); switch (coltype) { case EXTRACT_METADATA_TYPE_NUMERIC:; extract_metadata_format_t colformat = column_format(c->json_md, column); switch (colformat) { case EXTRACT_METADATA_FORMAT_NUMBER: var->type = READSTAT_TYPE_DOUBLE; break; case EXTRACT_METADATA_FORMAT_PERCENT: var->type = READSTAT_TYPE_DOUBLE; break; case EXTRACT_METADATA_FORMAT_CURRENCY: var->type = READSTAT_TYPE_DOUBLE; break; case EXTRACT_METADATA_FORMAT_DATE: var->type = READSTAT_TYPE_STRING; break; case EXTRACT_METADATA_FORMAT_TIME: var->type = READSTAT_TYPE_STRING; break; case EXTRACT_METADATA_FORMAT_DATE_TIME: var->type = READSTAT_TYPE_STRING; break; default: var->type = READSTAT_TYPE_DOUBLE; } break; case EXTRACT_METADATA_TYPE_STRING: var->type = READSTAT_TYPE_STRING; break; case EXTRACT_METADATA_TYPE_UNKNOWN: // ... break; } } void produce_csv_value_csv(void *csv_metadata, const char *s, size_t len) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; readstat_variable_t *var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; int obs_index = c->rows - 1; // TODO: ??? readstat_value_t value; if (len == 0) { value = value_sysmiss(s, len, c); } else if (is_date) { value = value_string(s, len, c); } else if (var->type == READSTAT_TYPE_DOUBLE) { value = value_double(s, len, c); } else if (var->type == READSTAT_TYPE_STRING) { value = value_string(s, len, c); } else { fprintf(stderr, "%s:%d unsupported variable type %d\n", __FILE__, __LINE__, var->type); exit(EXIT_FAILURE); } c->handle.value(obs_index, var, value, c->user_ctx); } ReadStat-1.1.7/src/bin/read_csv/mod_csv.h000066400000000000000000000000521410722155500201560ustar00rootroot00000000000000 extern rs_read_module_t rs_read_mod_csv; ReadStat-1.1.7/src/bin/read_csv/mod_dta.c000066400000000000000000000377161410722155500201470ustar00rootroot00000000000000#include #include #include "../../readstat.h" #include "json_metadata.h" #include "read_module.h" #include "csv_metadata.h" #include "value.h" #include "../util/file_format.h" #include "../util/readstat_dta_days.h" void produce_column_header_dta(void *csv_metadata, const char *column, readstat_variable_t* var); void produce_missingness_dta(void *csv_metadata, const char* column); void produce_value_label_dta(void *csv_metadata, const char* column); void produce_csv_value_dta(void *csv_metadata, const char *s, size_t len); rs_read_module_t rs_read_mod_dta = { .format = RS_FORMAT_DTA, .header = &produce_column_header_dta, .missingness = &produce_missingness_dta, .value_label = &produce_value_label_dta, .csv_value = &produce_csv_value_dta }; static double get_dta_days_from_token(const char *js, jsmntok_t* token) { char buf[255]; int len = token->end - token->start; snprintf(buf, sizeof(buf), "%.*s", len, js + token->start); char* dest; int days = readstat_dta_num_days(buf, &dest); if (dest == buf) { fprintf(stderr, "%s:%d error parsing date %s\n", __FILE__, __LINE__, buf); exit(EXIT_FAILURE); } return days; } static char dta_add_missing_date(readstat_variable_t* var, double v) { int idx = var->missingness.missing_ranges_count; char tagg = 'a' + idx; if (tagg > 'z') { fprintf(stderr, "%s:%d missing tag reached %c, aborting ...\n", __FILE__, __LINE__, tagg); exit(EXIT_FAILURE); } readstat_value_t value = { .type = READSTAT_TYPE_INT32, .is_system_missing = 0, .is_tagged_missing = 1, .tag = tagg, .v = { .i32_value = v } }; var->missingness.missing_ranges[(idx*2)] = value; var->missingness.missing_ranges[(idx*2)+1] = value; var->missingness.missing_ranges_count++; return tagg; } static char dta_add_missing_double(readstat_variable_t* var, double v) { int idx = var->missingness.missing_ranges_count; char tagg = 'a' + idx; if (tagg > 'z') { fprintf(stderr, "%s:%d missing tag reached %c, aborting ...\n", __FILE__, __LINE__, tagg); exit(EXIT_FAILURE); } readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .is_system_missing = 0, .is_tagged_missing = 1, .tag = tagg, .v = { .double_value = v } }; var->missingness.missing_ranges[(idx*2)] = value; var->missingness.missing_ranges[(idx*2)+1] = value; var->missingness.missing_ranges_count++; return tagg; } static void produce_missingness_range_dta(struct csv_metadata *c, jsmntok_t* missing, const char* column) { readstat_variable_t* var = &c->variables[c->columns]; const char *js = c->json_md->js; int is_date = c->is_date[c->columns]; jsmntok_t* low = find_object_property(js, missing, "low"); jsmntok_t* high = find_object_property(js, missing, "high"); jsmntok_t* discrete = find_object_property(js, missing, "discrete-value"); jsmntok_t* categories = find_variable_property(js, c->json_md->tok, column, "categories"); if (!categories && (low || high || discrete)) { fprintf(stderr, "%s:%d expected to find categories for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } else if (!categories) { return; } if (low && !high) { fprintf(stderr, "%s:%d missing.low specified for column %s, but missing.high not specified\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (high && !low) { fprintf(stderr, "%s:%d missing.high specified for column %s, but missing.low not specified\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } char label_buf[1024]; int j = 1; for (int i=0; isize; i++) { jsmntok_t* tok = categories+j; jsmntok_t* code = find_object_property(js, tok, "code"); char* label = get_object_property(c->json_md->js, tok, "label", label_buf, sizeof(label_buf)); if (!code || !label) { fprintf(stderr, "%s:%d bogus JSON metadata input. Missing code/label for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } double cod = is_date ? get_dta_days_from_token(js, code) : get_double_from_token(js, code); if (low && high) { double lo = is_date ? get_dta_days_from_token(js, low) : get_double_from_token(js, low); double hi = is_date ? get_dta_days_from_token(js, high) : get_double_from_token(js, high); if (cod >= lo && cod <= hi) { is_date ? dta_add_missing_date(var, cod) : dta_add_missing_double(var, cod); } } if (discrete) { double v = is_date ? get_dta_days_from_token(js, discrete) : get_double_from_token(js, discrete); if (cod == v) { is_date ? dta_add_missing_date(var, cod) : dta_add_missing_double(var, cod); } } j += slurp_object(tok); } } static void produce_missingness_discrete_dta(struct csv_metadata *c, jsmntok_t* missing, const char* column) { readstat_variable_t* var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; const char *js = c->json_md->js; jsmntok_t* values = find_object_property(js, missing, "values"); if (!values) { fprintf(stderr, "%s:%d Expected to find missing 'values' property\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } int j = 1; for (int i=0; isize; i++) { jsmntok_t* missing_value_token = values + j; if (is_date) { dta_add_missing_date(var, get_dta_days_from_token(js, missing_value_token)); } else if (var->type == READSTAT_TYPE_DOUBLE) { dta_add_missing_double(var, get_double_from_token(js, missing_value_token)); } else if (var->type == READSTAT_TYPE_STRING) { } else { fprintf(stderr, "%s:%d Unsupported column type %d\n", __FILE__, __LINE__, var->type); exit(EXIT_FAILURE); } j += slurp_object(missing_value_token); } } void produce_missingness_dta(void *csv_metadata, const char* column) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; const char *js = c->json_md->js; readstat_variable_t* var = &c->variables[c->columns]; var->missingness.missing_ranges_count = 0; jsmntok_t* missing = find_variable_property(js, c->json_md->tok, column, "missing"); if (!missing) { return; } jsmntok_t* missing_type = find_object_property(js, missing, "type"); if (!missing_type) { fprintf(stderr, "%s:%d expected to find missing.type for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (match_token(js, missing_type, "DISCRETE")) { produce_missingness_discrete_dta(c, missing, column); } else if (match_token(js, missing_type, "RANGE")) { produce_missingness_range_dta(c, missing, column); } else { fprintf(stderr, "%s:%d unknown missing type %.*s\n", __FILE__, __LINE__, missing_type->end - missing_type->start, js+missing_type->start); exit(EXIT_FAILURE); } } void produce_column_header_dta(void *csv_metadata, const char *column, readstat_variable_t* var) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; extract_metadata_type_t coltype = column_type(c->json_md, column, c->output_format); if (coltype == EXTRACT_METADATA_TYPE_NUMERIC) { extract_metadata_format_t colformat = column_format(c->json_md, column); switch (colformat) { case EXTRACT_METADATA_FORMAT_NUMBER: case EXTRACT_METADATA_FORMAT_PERCENT: case EXTRACT_METADATA_FORMAT_CURRENCY: var->type = READSTAT_TYPE_DOUBLE; snprintf(var->format, sizeof(var->format), "%%9.%df", get_decimals(c->json_md, column)); break; case EXTRACT_METADATA_FORMAT_DATE: var->type = READSTAT_TYPE_INT32; snprintf(var->format, sizeof(var->format), "%s", "%td"); break; case EXTRACT_METADATA_FORMAT_TIME: case EXTRACT_METADATA_FORMAT_DATE_TIME: var->type = READSTAT_TYPE_INT32; snprintf(var->format, sizeof(var->format), "%s", "%tC"); // %tC => is equivalent to coordinated universal time (UTC) break; default: var->type = READSTAT_TYPE_DOUBLE; snprintf(var->format, sizeof(var->format), "%%9.%df", get_decimals(c->json_md, column)); } } else if (coltype == EXTRACT_METADATA_TYPE_STRING) { var->type = READSTAT_TYPE_STRING; } } static void produce_value_label_int32_date_dta(const char* column, struct csv_metadata *c, char *code, char *label) { readstat_variable_t* variable = &c->variables[c->columns]; char *dest; int days = readstat_dta_num_days(code, &dest); if (dest == code) { fprintf(stderr, "%s:%d not a valid date: %s\n", __FILE__, __LINE__, code); exit(EXIT_FAILURE); } readstat_value_t value = { .v = { .i32_value = days }, .type = READSTAT_TYPE_INT32, }; int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); for (int i=0; i= lo && days <= hi) { value.is_tagged_missing = 1; value.tag = 'a' + i; } } } c->handle.value_label(column, value, label, c->user_ctx); } static void produce_value_label_double_dta(const char* column, struct csv_metadata *c, const char *code, const char *label) { readstat_variable_t* variable = &c->variables[c->columns]; char *endptr; double v = strtod(code, &endptr); if (endptr == code) { fprintf(stderr, "%s:%d not a number: %s\n", __FILE__, __LINE__, code); exit(EXIT_FAILURE); } readstat_value_t value = { .v = { .double_value = v }, .type = READSTAT_TYPE_DOUBLE, }; int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); for (int i=0; i= lo && v <= hi) { value.is_tagged_missing = 1; value.tag = 'a' + i; } } } c->handle.value_label(column, value, label, c->user_ctx); } void produce_value_label_dta(void *csv_metadata, const char* column) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; jsmntok_t* categories = find_variable_property(c->json_md->js, c->json_md->tok, column, "categories"); if (categories==NULL) { return; } readstat_variable_t* variable = &c->variables[c->columns]; readstat_type_t coltype = variable->type; int is_date = c->is_date[c->columns]; int j = 1; char code_buf[1024]; char label_buf[1024]; for (int i=0; isize; i++) { jsmntok_t* tok = categories+j; char* code = get_object_property(c->json_md->js, tok, "code", code_buf, sizeof(code_buf)); char* label = get_object_property(c->json_md->js, tok, "label", label_buf, sizeof(label_buf)); if (!code || !label) { fprintf(stderr, "%s:%d bogus JSON metadata input. Missing code/label for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (is_date) { produce_value_label_int32_date_dta(column, c, code, label); } else if (coltype == READSTAT_TYPE_DOUBLE) { produce_value_label_double_dta(column, c, code, label); } else if (coltype == READSTAT_TYPE_STRING) { } else { fprintf(stderr, "%s:%d unsupported column type %d for value label for column %s\n", __FILE__, __LINE__, coltype, column); exit(EXIT_FAILURE); } j += slurp_object(tok); } } static readstat_value_t value_int32_date_dta(const char *s, size_t len, struct csv_metadata *c) { readstat_variable_t *var = &c->variables[c->columns]; char* dest; int val = readstat_dta_num_days(s, &dest); if (dest == s) { fprintf(stderr, "%s:%d not a date: %s\n", __FILE__, __LINE__, (char*)s); exit(EXIT_FAILURE); } int missing_ranges_count = readstat_variable_get_missing_ranges_count(var); for (int i=0; i= lo && val <= hi) { readstat_value_t value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'a' + i, .v = { .i32_value = val } }; return value; } } readstat_value_t value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 0, .v = { .i32_value = val } }; return value; } static readstat_value_t value_double_dta(const char *s, size_t len, struct csv_metadata *c) { char *dest; readstat_variable_t *var = &c->variables[c->columns]; double val = strtod(s, &dest); if (dest == s) { fprintf(stderr, "not a number: %s\n", (char*)s); exit(EXIT_FAILURE); } int missing_ranges_count = readstat_variable_get_missing_ranges_count(var); for (int i=0; i= lo && val <= hi) { readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'a' + i, .v = { .double_value = val } }; return value; } } readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 0, .v = { .double_value = val } }; return value; } void produce_csv_value_dta(void *csv_metadata, const char *s, size_t len) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; readstat_variable_t *var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; int obs_index = c->rows - 1; // TODO: ??? readstat_value_t value; if (len == 0) { value = value_sysmiss(s, len, c); } else if (is_date) { value = value_int32_date_dta(s, len, c); } else if (var->type == READSTAT_TYPE_DOUBLE) { value = value_double_dta(s, len, c); } else if (var->type == READSTAT_TYPE_STRING) { value = value_string(s, len, c); } else { fprintf(stderr, "%s:%d unsupported variable type %d\n", __FILE__, __LINE__, var->type); exit(EXIT_FAILURE); } c->handle.value(obs_index, var, value, c->user_ctx); } ReadStat-1.1.7/src/bin/read_csv/mod_dta.h000066400000000000000000000000511410722155500201320ustar00rootroot00000000000000extern rs_read_module_t rs_read_mod_dta; ReadStat-1.1.7/src/bin/read_csv/mod_sav.c000066400000000000000000000242231410722155500201550ustar00rootroot00000000000000#include #include #include "../../readstat.h" #include "read_module.h" #include "csv_metadata.h" #include "json_metadata.h" #include "value.h" #include "../util/file_format.h" #include "../util/readstat_sav_date.h" void produce_column_header_sav(void *csv_metadata, const char *column, readstat_variable_t* var); void produce_value_label_sav(void *csv_metadata, const char* column); void produce_missingness_sav(void *csv_metadata, const char* column); void produce_csv_value_sav(void *csv_metadata, const char *s, size_t len); rs_read_module_t rs_read_mod_sav = { .format = RS_FORMAT_SAV, .header = &produce_column_header_sav, .missingness = &produce_missingness_sav, .value_label = &produce_value_label_sav, .csv_value = &produce_csv_value_sav }; static double get_double_date_missing_sav(const char *js, jsmntok_t* missing_value_token) { // SAV missing date char buf[255]; char *dest; int len = missing_value_token->end - missing_value_token->start; snprintf(buf, sizeof(buf), "%.*s", len, js + missing_value_token->start); double val = readstat_sav_date_parse(buf, &dest); if (buf == dest) { fprintf(stderr, "%s:%d failed to parse double: %s\n", __FILE__, __LINE__, buf); exit(EXIT_FAILURE); } else { fprintf(stdout, "added double date missing %s\n", buf); } return val; } static void produce_missingness_discrete_sav(struct csv_metadata *c, jsmntok_t* missing, const char* column) { readstat_variable_t* var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; const char *js = c->json_md->js; jsmntok_t* values = find_object_property(js, missing, "values"); if (!values) { fprintf(stderr, "%s:%d Expected to find missing 'values' property\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } int j = 1; for (int i=0; isize; i++) { jsmntok_t* missing_value_token = values + j; if (is_date) { readstat_variable_add_missing_double_value(var, get_double_date_missing_sav(js, missing_value_token)); } else if (var->type == READSTAT_TYPE_DOUBLE) { readstat_variable_add_missing_double_value(var, get_double_from_token(js, missing_value_token)); } else if (var->type == READSTAT_TYPE_STRING) { } else { fprintf(stderr, "%s:%d Unsupported column type %d\n", __FILE__, __LINE__, var->type); exit(EXIT_FAILURE); } j += slurp_object(missing_value_token); } } static void produce_missingness_range_sav(struct csv_metadata *c, jsmntok_t* missing, const char* column) { readstat_variable_t* var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; const char *js = c->json_md->js; jsmntok_t* low = find_object_property(js, missing, "low"); jsmntok_t* high = find_object_property(js, missing, "high"); jsmntok_t* discrete = find_object_property(js, missing, "discrete-value"); if (low && !high) { fprintf(stderr, "%s:%d missing.low specified for column %s, but missing.high not specified\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (high && !low) { fprintf(stderr, "%s:%d missing.high specified for column %s, but missing.low not specified\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (low && high) { double lo = is_date ? get_double_date_missing_sav(js, low) : get_double_from_token(js, low); double hi = is_date ? get_double_date_missing_sav(js, high) : get_double_from_token(js, high); readstat_variable_add_missing_double_range(var, lo, hi); } if (discrete) { double v = is_date ? get_double_date_missing_sav(js, discrete) : get_double_from_token(js, discrete); readstat_variable_add_missing_double_value(var, v); } } void produce_missingness_sav(void *csv_metadata, const char* column) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; const char *js = c->json_md->js; readstat_variable_t* var = &c->variables[c->columns]; var->missingness.missing_ranges_count = 0; jsmntok_t* missing = find_variable_property(js, c->json_md->tok, column, "missing"); if (!missing) { return; } jsmntok_t* missing_type = find_object_property(js, missing, "type"); if (!missing_type) { fprintf(stderr, "%s:%d expected to find missing.type for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (match_token(js, missing_type, "DISCRETE")) { produce_missingness_discrete_sav(c, missing, column); } else if (match_token(js, missing_type, "RANGE")) { produce_missingness_range_sav(c, missing, column); } else { fprintf(stderr, "%s:%d unknown missing type %.*s\n", __FILE__, __LINE__, missing_type->end - missing_type->start, js+missing_type->start); exit(EXIT_FAILURE); } } void produce_column_header_sav(void *csv_metadata, const char *column, readstat_variable_t* var) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; extract_metadata_type_t coltype = column_type(c->json_md, column, c->output_format); if (coltype == EXTRACT_METADATA_TYPE_NUMERIC) { extract_metadata_format_t colformat = column_format(c->json_md, column); switch (colformat) { case EXTRACT_METADATA_FORMAT_NUMBER: case EXTRACT_METADATA_FORMAT_PERCENT: case EXTRACT_METADATA_FORMAT_CURRENCY: var->type = READSTAT_TYPE_DOUBLE; snprintf(var->format, sizeof(var->format), "F8.%d", get_decimals(c->json_md, column)); break; case EXTRACT_METADATA_FORMAT_DATE: case EXTRACT_METADATA_FORMAT_TIME: case EXTRACT_METADATA_FORMAT_DATE_TIME: var->type = READSTAT_TYPE_DOUBLE; snprintf(var->format, sizeof(var->format), "%s", "EDATE40"); break; default: var->type = READSTAT_TYPE_DOUBLE; snprintf(var->format, sizeof(var->format), "F8.%d", get_decimals(c->json_md, column)); } } else if (coltype == EXTRACT_METADATA_TYPE_STRING) { var->type = READSTAT_TYPE_STRING; } } static void produce_value_label_double_date_sav(const char* column, struct csv_metadata *c, const char *code, const char *label) { char *endptr; double v = readstat_sav_date_parse(code, &endptr); if (endptr == code) { fprintf(stderr, "%s:%d not a valid date: %s\n", __FILE__, __LINE__, code); exit(EXIT_FAILURE); } readstat_value_t value = { .v = { .double_value = v }, .type = READSTAT_TYPE_DOUBLE, }; c->handle.value_label(column, value, label, c->user_ctx); } static void produce_value_label_string(const char* column, struct csv_metadata *c, const char *code, const char *label) { readstat_value_t value = { .v = { .string_value = code }, .type = READSTAT_TYPE_STRING, }; c->handle.value_label(column, value, label, c->user_ctx); } static void produce_value_label_double_sav(const char* column, struct csv_metadata *c, const char *code, const char *label) { char *endptr; double v = strtod(code, &endptr); if (endptr == code) { fprintf(stderr, "%s:%d not a number: %s\n", __FILE__, __LINE__, code); exit(EXIT_FAILURE); } readstat_value_t value = { .v = { .double_value = v }, .type = READSTAT_TYPE_DOUBLE, }; c->handle.value_label(column, value, label, c->user_ctx); } void produce_value_label_sav(void *csv_metadata, const char* column) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; readstat_variable_t* variable = &c->variables[c->columns]; readstat_type_t coltype = variable->type; jsmntok_t* categories = find_variable_property(c->json_md->js, c->json_md->tok, column, "categories"); if (categories==NULL) { return; } int is_date = c->is_date[c->columns]; int j = 1; char code_buf[1024]; char label_buf[1024]; for (int i=0; isize; i++) { jsmntok_t* tok = categories+j; char* code = get_object_property(c->json_md->js, tok, "code", code_buf, sizeof(code_buf)); char* label = get_object_property(c->json_md->js, tok, "label", label_buf, sizeof(label_buf)); if (!code || !label) { fprintf(stderr, "%s:%d bogus JSON metadata input. Missing code/label for column %s\n", __FILE__, __LINE__, column); exit(EXIT_FAILURE); } if (is_date) { produce_value_label_double_date_sav(column, c, code, label); } else if (coltype == READSTAT_TYPE_DOUBLE) { produce_value_label_double_sav(column, c, code, label); } else if (coltype == READSTAT_TYPE_STRING) { produce_value_label_string(column, c, code, label); } else { fprintf(stderr, "%s:%d unsupported column type %d for value label %s\n", __FILE__, __LINE__, coltype, column); exit(EXIT_FAILURE); } j += slurp_object(tok); } } static readstat_value_t value_double_date_sav(const char *s, size_t len, struct csv_metadata *c) { char *dest; double val = readstat_sav_date_parse(s, &dest); if (dest == s) { fprintf(stderr, "%s:%d not a valid date: %s\n", __FILE__, __LINE__, (char*)s); exit(EXIT_FAILURE); } readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = val } }; return value; } void produce_csv_value_sav(void *csv_metadata, const char *s, size_t len) { struct csv_metadata *c = (struct csv_metadata *)csv_metadata; readstat_variable_t *var = &c->variables[c->columns]; int is_date = c->is_date[c->columns]; int obs_index = c->rows - 1; // TODO: ??? readstat_value_t value; if (len == 0) { value = value_sysmiss(s, len, c); } else if (is_date) { value = value_double_date_sav(s, len, c); } else if (var->type == READSTAT_TYPE_DOUBLE) { value = value_double(s, len, c); } else if (var->type == READSTAT_TYPE_STRING) { value = value_string(s, len, c); } else { fprintf(stderr, "%s:%d unsupported variable type %d\n", __FILE__, __LINE__, var->type); exit(EXIT_FAILURE); } c->handle.value(obs_index, var, value, c->user_ctx); } ReadStat-1.1.7/src/bin/read_csv/mod_sav.h000066400000000000000000000000511410722155500201530ustar00rootroot00000000000000extern rs_read_module_t rs_read_mod_sav; ReadStat-1.1.7/src/bin/read_csv/read_csv.c000066400000000000000000000131251410722155500203120ustar00rootroot00000000000000#include #include #include #include #include #include "../../readstat.h" #include "../util/readstat_dta_days.h" #include "json_metadata.h" #include "read_module.h" #include "csv_metadata.h" #include "mod_csv.h" #include "mod_dta.h" #include "mod_sav.h" #define UNUSED(x) (void)(x) rs_read_module_t *rs_read_module_for_filename(rs_read_module_t *modules, long module_count, int output_format) { int i; for (i=0; ivariables[c->columns]; memset(var, 0, sizeof(readstat_variable_t)); extract_metadata_type_t coltype = column_type(c->json_md, column, c->output_format); switch (coltype) { case EXTRACT_METADATA_TYPE_STRING: var->alignment = READSTAT_ALIGNMENT_LEFT; break; case EXTRACT_METADATA_TYPE_NUMERIC: var->alignment = READSTAT_ALIGNMENT_RIGHT; break; default: var->alignment = READSTAT_ALIGNMENT_LEFT; } extract_metadata_format_t colformat = column_format(c->json_md, column); c->is_date[c->columns] = colformat == EXTRACT_METADATA_FORMAT_DATE; if (c->output_module->header) { c->output_module->header(c, column, var); } if (c->pass == 2 && coltype == EXTRACT_METADATA_TYPE_STRING) { var->storage_width = c->column_width[c->columns]; } var->index = c->columns; copy_variable_property(c->json_md, column, "label", var->label, sizeof(var->label)); snprintf(var->name, sizeof(var->name), "%.*s", (int)len, column); if (c->output_module->missingness) { c->output_module->missingness(c, column); } if (c->output_module->value_label && c->handle.value_label) { c->output_module->value_label(c, column); } if (c->handle.variable) { c->handle.variable(c->columns, var, column, c->user_ctx); } } static void csv_metadata_cell(void *s, size_t len, void *data) { struct csv_metadata *c = (struct csv_metadata *)data; if (c->rows == 0) { c->variables = realloc(c->variables, (c->columns+1) * sizeof(readstat_variable_t)); c->is_date = realloc(c->is_date, (c->columns+1) * sizeof(int)); produce_column_header(c, s, len); } else if (c->rows >= 1 && c->handle.value && c->output_module->csv_value) { c->output_module->csv_value(c, s, len); } if (c->rows >= 1 && c->pass == 1) { size_t w = c->column_width[c->columns]; c->column_width[c->columns] = (len>w) ? len : w; } c->open_row = 1; c->columns++; } static void csv_metadata_row(int cc, void *data) { UNUSED(cc); struct csv_metadata *c = (struct csv_metadata *)data; c->rows++; if (c->rows == 1 && c->pass == 1) { c->column_width = malloc(c->columns * sizeof(size_t)); for (int i=0; icolumns; i++) { c->column_width[i] = 1; } c->_columns = c->columns; } c->columns = 0; c->open_row = 0; } readstat_error_t readstat_parse_csv(readstat_parser_t *parser, const char *path, struct csv_metadata* md, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; size_t file_size = 0; size_t bytes_read; struct csv_parser csvparser; struct csv_parser *p = &csvparser; char buf[BUFSIZ]; size_t* column_width = md->column_width; md->pass = column_width ? 2 : 1; md->open_row = 0; md->columns = 0; md->_rows = md->rows; md->rows = 0; md->user_ctx = user_ctx; md->handle = parser->handlers; rs_read_module_t modules[3] = { rs_read_mod_csv, rs_read_mod_dta, rs_read_mod_sav }; if ((md->output_module = rs_read_module_for_filename(modules, 3, md->output_format)) == NULL) { fprintf(stderr, "Unsupported file format\n"); retval = READSTAT_ERROR_WRITE; goto cleanup; } if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx); if (file_size == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (csv_init(p, CSV_APPEND_NULL) != 0) { retval = READSTAT_ERROR_OPEN; goto cleanup; } unsigned char sep = get_separator(md->json_md); csv_set_delim(p, sep); while ((bytes_read = io->read(buf, sizeof(buf), io->io_ctx)) > 0) { if (csv_parse(p, buf, bytes_read, csv_metadata_cell, csv_metadata_row, md) != bytes_read) { fprintf(stderr, "Error while parsing file: %s\n", csv_strerror(csv_error(p))); retval = READSTAT_ERROR_PARSE; goto cleanup; } } csv_fini(p, csv_metadata_cell, csv_metadata_row, md); if (!md->open_row) { md->rows--; } if (md->handle.metadata && md->pass == 1) { readstat_metadata_t metadata = { .row_count = md->rows, .var_count = md->_columns }; if (md->handle.metadata(&metadata, user_ctx) == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_TOO_FEW_COLUMNS; } } cleanup: if (md->variables) { free(md->variables); md->variables = NULL; } if (md->is_date) { free(md->is_date); md->is_date = NULL; } csv_free(p); io->close(io->io_ctx); return retval; } ReadStat-1.1.7/src/bin/read_csv/read_csv.h000066400000000000000000000003031410722155500203110ustar00rootroot00000000000000#ifndef __MOD_CSV_READER_H #define __MOD_CSV_READER_H readstat_error_t readstat_parse_csv(readstat_parser_t *parser, const char *path, struct csv_metadata* md2, void *user_ctx); #endif ReadStat-1.1.7/src/bin/read_csv/read_module.h000066400000000000000000000011361410722155500210100ustar00rootroot00000000000000typedef void (*rs_produce_column_header)(void *csv_metadata, const char *column, readstat_variable_t* var); typedef void (*rs_produce_missingness)(void *csv_metadata, const char *column); typedef void (*rs_produce_value_label)(void *csv_metadata, const char *column); typedef void (*rs_produce_csv_value)(void *csv_metadata, const char *s, size_t len); typedef struct rs_read_module_s { int format; rs_produce_column_header header; rs_produce_missingness missingness; rs_produce_value_label value_label; rs_produce_csv_value csv_value; } rs_read_module_t; ReadStat-1.1.7/src/bin/read_csv/value.c000066400000000000000000000021331410722155500176350ustar00rootroot00000000000000#include #include #include "../../readstat.h" #include "read_module.h" #include "csv_metadata.h" readstat_value_t value_sysmiss(const char *s, size_t len, struct csv_metadata *c) { readstat_variable_t *var = &c->variables[c->columns]; readstat_value_t value = { .is_system_missing = 1, .is_tagged_missing = 0, .type = var->type }; return value; } readstat_value_t value_string(const char *s, size_t len, struct csv_metadata *c) { readstat_value_t value = { .is_system_missing = 0, .is_tagged_missing = 0, .v = { .string_value = s }, .type = READSTAT_TYPE_STRING }; return value; } readstat_value_t value_double(const char *s, size_t len, struct csv_metadata *c) { char *dest; double val = strtod(s, &dest); if (dest == s) { fprintf(stderr, "%s:%d not a number: %s\n", __FILE__, __LINE__, (char*)s); exit(EXIT_FAILURE); } readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = val } }; return value; } ReadStat-1.1.7/src/bin/read_csv/value.h000066400000000000000000000004741410722155500176500ustar00rootroot00000000000000#ifndef __PRODUCE_CSV_VALUE_H #define __PRODUCE_CSV_VALUE_H readstat_value_t value_sysmiss(const char *s, size_t len, struct csv_metadata *c); readstat_value_t value_string(const char *s, size_t len, struct csv_metadata *c); readstat_value_t value_double(const char *s, size_t len, struct csv_metadata *c); #endif ReadStat-1.1.7/src/bin/readstat.c000066400000000000000000000443501410722155500165510ustar00rootroot00000000000000#include #include #include #include #include #if !defined _MSC_VER # include # include #else # include # include # include # define __need_clock_t # include int gettimeofday(struct timeval* t, void* timezone) { struct _timeb timebuffer; _ftime_s(&timebuffer); t->tv_sec = timebuffer.time; t->tv_usec = 1000 * timebuffer.millitm; return 0; } #endif #include #include "../readstat.h" #include "../txt/readstat_schema.h" #include "write/module.h" #include "write/mod_readstat.h" #include "write/mod_csv.h" #if HAVE_CSVREADER #include "read_csv/json_metadata.h" #include "read_csv/read_module.h" #include "read_csv/csv_metadata.h" #include "read_csv/read_csv.h" #endif #if HAVE_XLSXWRITER #include "write/mod_xlsx.h" #endif #include "util/file_format.h" #include "util/main.h" #if defined _MSC_VER #define unlink _unlink #endif typedef struct rs_ctx_s { rs_module_t *module; void *module_ctx; const char *error_filename; long row_count; long var_count; } rs_ctx_t; rs_module_t *rs_module_for_filename(rs_module_t *modules, long module_count, const char *filename) { int i; for (i=0; imodule->handle.fweight) { return rs_ctx->module->handle.fweight(variable, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } static int handle_metadata(readstat_metadata_t *metadata, void *ctx) { rs_ctx_t *rs_ctx = (rs_ctx_t *)ctx; if (rs_ctx->module->handle.metadata) { return rs_ctx->module->handle.metadata(metadata, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } static int handle_note(int note_index, const char *note, void *ctx) { rs_ctx_t *rs_ctx = (rs_ctx_t *)ctx; if (rs_ctx->module->handle.note) { return rs_ctx->module->handle.note(note_index, note, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *ctx) { rs_ctx_t *rs_ctx = (rs_ctx_t *)ctx; if (rs_ctx->module->handle.value_label) { return rs_ctx->module->handle.value_label(val_labels, value, label, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { rs_ctx_t *rs_ctx = (rs_ctx_t *)ctx; if (rs_ctx->module->handle.variable) { return rs_ctx->module->handle.variable(index, variable, val_labels, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { rs_ctx_t *rs_ctx = (rs_ctx_t *)ctx; int var_index = readstat_variable_get_index(variable); if (var_index == 0) { rs_ctx->row_count++; } if (obs_index == 0) { rs_ctx->var_count++; } if (rs_ctx->module->handle.value) { return rs_ctx->module->handle.value(obs_index, variable, value, rs_ctx->module_ctx); } return READSTAT_HANDLER_OK; } readstat_error_t parse_file(readstat_parser_t *parser, const char *input_filename, int input_format, void *ctx) { readstat_error_t error = READSTAT_OK; if (input_format == RS_FORMAT_DTA) { error = readstat_parse_dta(parser, input_filename, ctx); } else if (input_format == RS_FORMAT_SAV || input_format == RS_FORMAT_ZSAV) { error = readstat_parse_sav(parser, input_filename, ctx); } else if (input_format == RS_FORMAT_POR) { error = readstat_parse_por(parser, input_filename, ctx); } else if (input_format == RS_FORMAT_SAS_DATA) { error = readstat_parse_sas7bdat(parser, input_filename, ctx); } else if (input_format == RS_FORMAT_SAS_CATALOG) { error = readstat_parse_sas7bcat(parser, input_filename, ctx); } else if (input_format == RS_FORMAT_XPORT) { error = readstat_parse_xport(parser, input_filename, ctx); } return error; } static void print_version() { fprintf(stdout, "ReadStat version " READSTAT_VERSION "\n"); } #if HAVE_ZLIB #define INPUT_FORMATS "dta|por|sav|sas7bdat|xpt|zsav" #else #define INPUT_FORMATS "dta|por|sav|sas7bdat|xpt" #endif #if HAVE_XLSXWRITER #define OUTPUT_FORMATS INPUT_FORMATS "|csv|xlsx" #else #define OUTPUT_FORMATS INPUT_FORMATS "|csv" #endif static void print_usage(const char *cmd) { print_version(); fprintf(stdout, "\n View a file's metadata:\n"); fprintf(stdout, "\n %s input.(" INPUT_FORMATS ")\n", cmd); fprintf(stdout, "\n Read a file, and write CSV to standard out:\n"); fprintf(stdout, "\n %s input.(" INPUT_FORMATS ") -\n", cmd); fprintf(stdout, "\n Convert a file:\n"); fprintf(stdout, "\n %s input.(" INPUT_FORMATS ") output.(" OUTPUT_FORMATS ")\n", cmd); #if HAVE_CSVREADER fprintf(stdout, "\n Convert a CSV file with column metadata stored in a separate JSON file (see extract_metadata):\n"); fprintf(stdout, "\n %s input.csv metadata.json output.(" OUTPUT_FORMATS ")\n", cmd); #endif fprintf(stdout, "\n Convert a text file with column metadata stored in a SAS command files, SPSS command file, or Stata dictionary file:\n"); fprintf(stdout, "\n %s input.xxx metadata.(dct|sas|sps) output.(" OUTPUT_FORMATS ")\n", cmd); fprintf(stdout, "\n Convert a SAS7BDAT file with value labels stored in a separate SAS catalog file:\n"); fprintf(stdout, "\n %s input.sas7bdat catalog.sas7bcat output.(dta|por|sav|xpt" #if HAVE_ZLIB "|zsav" #endif "|csv" #if HAVE_XLSXWRITER "|xlsx" #endif ")\n\n", cmd); } #if HAVE_CSVREADER static readstat_error_t parse_csv_plus_json(const char *input_filename, const char *json_filename, int output_format, rs_ctx_t *rs_ctx) { readstat_error_t error = READSTAT_OK; struct csv_metadata csv_meta = { .output_format = output_format }; struct json_metadata *json_md = NULL; readstat_parser_t *pass1_parser = NULL; readstat_parser_t *pass2_parser = NULL; json_md = get_json_metadata(json_filename); if (json_md == NULL) { rs_ctx->error_filename = json_filename; error = READSTAT_ERROR_PARSE; goto cleanup; } csv_meta.json_md = json_md; rs_ctx->error_filename = input_filename; // The two passes are necessary because we need to set the variable storage // width and # rows before passing the actual values to the write API pass1_parser = readstat_parser_init(); readstat_set_error_handler(pass1_parser, &handle_error); readstat_set_value_label_handler(pass1_parser, &handle_value_label); readstat_set_metadata_handler(pass1_parser, &handle_metadata); error = readstat_parse_csv(pass1_parser, input_filename, &csv_meta, rs_ctx); if (error != READSTAT_OK) goto cleanup; pass2_parser = readstat_parser_init(); readstat_set_error_handler(pass2_parser, &handle_error); readstat_set_variable_handler(pass2_parser, &handle_variable); readstat_set_value_handler(pass2_parser, &handle_value); error = readstat_parse_csv(pass2_parser, input_filename, &csv_meta, rs_ctx); if (error != READSTAT_OK) goto cleanup; cleanup: if (json_md) free_json_metadata(json_md); if (pass1_parser) readstat_parser_free(pass1_parser); if (pass2_parser) readstat_parser_free(pass2_parser); if (csv_meta.column_width) free(csv_meta.column_width); return error; } #endif static readstat_error_t parse_text_plus_dct(const char *input_filename, const char *dct_filename, rs_ctx_t *rs_ctx) { rs_format_e dct_format = readstat_format(dct_filename); readstat_error_t error = READSTAT_OK; readstat_schema_t *schema = NULL; readstat_parser_t *parser = NULL; parser = readstat_parser_init(); readstat_set_error_handler(parser, &handle_error); readstat_set_value_label_handler(parser, &handle_value_label); readstat_set_variable_handler(parser, &handle_variable); if (dct_format == RS_FORMAT_STATA_DICTIONARY) { schema = readstat_parse_stata_dictionary(parser, dct_filename, rs_ctx, &error); } else if (dct_format == RS_FORMAT_SAS_COMMANDS) { schema = readstat_parse_sas_commands(parser, dct_filename, rs_ctx, &error); } else if (dct_format == RS_FORMAT_SPSS_COMMANDS) { schema = readstat_parse_spss_commands(parser, dct_filename, rs_ctx, &error); } rs_ctx->error_filename = dct_filename; readstat_parser_free(parser); if (schema == NULL) goto cleanup; rs_ctx->error_filename = input_filename; parser = readstat_parser_init(); readstat_set_error_handler(parser, &handle_error); readstat_set_metadata_handler(parser, &handle_metadata); error = readstat_parse_txt(parser, input_filename, schema, rs_ctx); readstat_parser_free(parser); if (error != READSTAT_OK) goto cleanup; parser = readstat_parser_init(); readstat_set_error_handler(parser, &handle_error); readstat_set_value_handler(parser, &handle_value); error = readstat_parse_txt(parser, input_filename, schema, rs_ctx); readstat_parser_free(parser); if (error != READSTAT_OK) goto cleanup; cleanup: if (schema) readstat_schema_free(schema); return error; } static readstat_error_t parse_binary_file(const char *input_filename, const char *catalog_filename, rs_ctx_t *rs_ctx) { readstat_error_t error = READSTAT_OK; rs_format_e input_format = readstat_format(input_filename); readstat_parser_t *pass1_parser = readstat_parser_init(); readstat_parser_t *pass2_parser = readstat_parser_init(); // Pass 1 - Collect fweight and value labels readstat_set_error_handler(pass1_parser, &handle_error); readstat_set_value_label_handler(pass1_parser, &handle_value_label); readstat_set_fweight_handler(pass1_parser, &handle_fweight); if (catalog_filename) { error = parse_file(pass1_parser, catalog_filename, RS_FORMAT_SAS_CATALOG, rs_ctx); rs_ctx->error_filename = catalog_filename; } else { error = parse_file(pass1_parser, input_filename, input_format, rs_ctx); rs_ctx->error_filename = input_filename; } if (error != READSTAT_OK) goto cleanup; // Pass 2 - Parse full file readstat_set_error_handler(pass2_parser, &handle_error); readstat_set_metadata_handler(pass2_parser, &handle_metadata); readstat_set_note_handler(pass2_parser, &handle_note); readstat_set_variable_handler(pass2_parser, &handle_variable); readstat_set_value_handler(pass2_parser, &handle_value); error = parse_file(pass2_parser, input_filename, input_format, rs_ctx); rs_ctx->error_filename = input_filename; if (error != READSTAT_OK) goto cleanup; cleanup: if (pass1_parser) readstat_parser_free(pass1_parser); if (pass2_parser) readstat_parser_free(pass2_parser); return error; } static int convert_file(const char *input_filename, const char *catalog_filename, const char *output_filename, rs_module_t *modules, int modules_count, int force) { readstat_error_t error = READSTAT_OK; struct timeval start_time, end_time; rs_module_t *module = rs_module_for_filename(modules, modules_count, output_filename); rs_ctx_t *rs_ctx = calloc(1, sizeof(rs_ctx_t)); void *module_ctx = NULL; int file_exists = 0; struct stat filestat; gettimeofday(&start_time, NULL); if (!force && stat(output_filename, &filestat) == 0) { error = READSTAT_ERROR_OPEN; file_exists = 1; goto cleanup; } module_ctx = module->init(output_filename); if (module_ctx == NULL) { error = READSTAT_ERROR_OPEN; rs_ctx->error_filename = output_filename; goto cleanup; } rs_ctx->module = module; rs_ctx->module_ctx = module_ctx; if (is_json(catalog_filename)) { #if HAVE_CSVREADER error = parse_csv_plus_json(input_filename, catalog_filename, readstat_format(output_filename), rs_ctx); #endif } else if (is_dictionary(catalog_filename)) { error = parse_text_plus_dct(input_filename, catalog_filename, rs_ctx); } else { error = parse_binary_file(input_filename, catalog_filename, rs_ctx); } gettimeofday(&end_time, NULL); fprintf(stderr, "Converted %ld variables and %ld rows in %.2lf seconds\n", rs_ctx->var_count, rs_ctx->row_count, (end_time.tv_sec + 1e-6 * end_time.tv_usec) - (start_time.tv_sec + 1e-6 * start_time.tv_usec)); cleanup: if (module->finish) { module->finish(rs_ctx->module_ctx); } free(rs_ctx); if (error != READSTAT_OK) { if (file_exists) { fprintf(stderr, "Error opening %s: File exists (Use -f to overwrite)\n", output_filename); } else { fprintf(stderr, "Error processing %s: %s\n", rs_ctx->error_filename, readstat_error_message(error)); unlink(output_filename); } return 1; } return 0; } size_t readstat_strftime(char *s, size_t maxsize, const char *format, time_t timestamp) { #if !defined _MSC_VER return strftime(s, maxsize, format, localtime(×tamp)); #else struct tm ltm; localtime_s(<m, ×tamp); return strftime(s, maxsize, format, <m); #endif } static int dump_metadata(readstat_metadata_t *metadata, void *ctx) { printf("Columns: %d\n", readstat_get_var_count(metadata)); printf("Rows: %d\n", readstat_get_row_count(metadata)); const char *table_name = readstat_get_table_name(metadata); const char *file_label = readstat_get_file_label(metadata); const char *orig_encoding = readstat_get_file_encoding(metadata); long version = readstat_get_file_format_version(metadata); time_t timestamp = readstat_get_creation_time(metadata); readstat_compress_t compression = readstat_get_compression(metadata); readstat_endian_t endianness = readstat_get_endianness(metadata); if (table_name && table_name[0]) { if (*(rs_format_e *)ctx == RS_FORMAT_SAS_CATALOG) { printf("Catalog name: %s\n", table_name); } else { printf("Table name: %s\n", table_name); } } if (file_label && file_label[0]) { printf("Table label: %s\n", file_label); } if (version) { printf("Format version: %ld\n", version); } if (orig_encoding) { printf("Text encoding: %s\n", orig_encoding); } if (compression == READSTAT_COMPRESS_ROWS) { printf("Compression: rows\n"); } else if (compression == READSTAT_COMPRESS_BINARY) { printf("Compression: binary\n"); } if (endianness == READSTAT_ENDIAN_LITTLE) { printf("Byte order: little-endian\n"); } else if (endianness == READSTAT_ENDIAN_BIG) { printf("Byte order: big-endian\n"); } if (timestamp) { char buffer[128]; readstat_strftime(buffer, sizeof(buffer), "%d %b %Y %H:%M", timestamp); printf("Timestamp: %s\n", buffer); } return 0; } static int dump_file(const char *input_filename) { rs_format_e input_format = readstat_format(input_filename); readstat_parser_t *parser = readstat_parser_init(); readstat_error_t error = READSTAT_OK; printf("Format: %s\n", readstat_format_name(input_format)); readstat_set_error_handler(parser, &handle_error); readstat_set_metadata_handler(parser, &dump_metadata); error = parse_file(parser, input_filename, input_format, &input_format); readstat_parser_free(parser); if (error != READSTAT_OK) { fprintf(stderr, "Error processing %s: %s\n", input_filename, readstat_error_message(error)); return 1; } return 0; } int portable_main(int argc, char** argv) { char *input_filename = NULL; char *catalog_filename = NULL; char *output_filename = NULL; rs_module_t *modules = NULL; long modules_count = 2; long module_index = 0; int force = 0; #if HAVE_XLSXWRITER modules_count++; #endif modules = calloc(modules_count, sizeof(rs_module_t)); modules[module_index++] = rs_mod_readstat; modules[module_index++] = rs_mod_csv; #if HAVE_XLSXWRITER modules[module_index++] = rs_mod_xlsx; #endif if (argc == 2 && (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0)) { print_version(); return 0; } if (argc == 2 && (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0)) { print_usage(argv[0]); return 0; } if (argc > 1) { int argpos = 1; if (strcmp(argv[argpos], "-f") == 0) { force = 1; argpos++; } if (argpos + 1 == argc) { if (can_read(argv[argpos])) { input_filename = argv[argpos]; } } else if (argpos + 2 == argc) { if (can_read(argv[argpos]) && can_write(modules, modules_count, argv[argpos+1])) { input_filename = argv[argpos]; output_filename = argv[argpos+1]; } } else if (argpos + 3 == argc) { if (can_write(modules, modules_count, argv[argpos+2]) && (is_dictionary(argv[argpos+1]) || (can_read(argv[argpos]) && (is_json(argv[argpos+1]) || is_catalog(argv[argpos+1]))))) { input_filename = argv[argpos]; catalog_filename = argv[argpos+1]; output_filename = argv[argpos+2]; } } } int ret; if (output_filename) { ret = convert_file(input_filename, catalog_filename, output_filename, modules, modules_count, force); } else if (input_filename) { ret = dump_file(input_filename); } else { print_usage(argv[0]); ret = 1; } free(modules); return ret; } ReadStat-1.1.7/src/bin/util/000077500000000000000000000000001410722155500155455ustar00rootroot00000000000000ReadStat-1.1.7/src/bin/util/file_format.c000066400000000000000000000062551410722155500202100ustar00rootroot00000000000000#include #if defined(_MSC_VER) # define strncasecmp _strnicmp # define strcasecmp _stricmp #else # include #endif #include "file_format.h" #include "../../readstat.h" rs_format_e readstat_format(const char *filename) { if (filename == NULL) return RS_FORMAT_UNKNOWN; size_t len = strlen(filename); if (len < sizeof(".dta")-1) return RS_FORMAT_UNKNOWN; if (strncasecmp(filename + len - 4, ".dta", 4) == 0) return RS_FORMAT_DTA; if (strncasecmp(filename + len - 4, ".dct", 4) == 0) return RS_FORMAT_STATA_DICTIONARY; if (strncasecmp(filename + len - 4, ".por", 4) == 0) return RS_FORMAT_POR; if (strncasecmp(filename + len - 4, ".sas", 4) == 0) return RS_FORMAT_SAS_COMMANDS; if (strncasecmp(filename + len - 4, ".sps", 4) == 0) return RS_FORMAT_SPSS_COMMANDS; if (strncasecmp(filename + len - 4, ".sav", 4) == 0) return RS_FORMAT_SAV; #if HAVE_CSVREADER if (strncasecmp(filename + len - 4, ".csv", 4) == 0) return RS_FORMAT_CSV; #endif if (strncasecmp(filename + len - 4, ".xpt", 4) == 0) return RS_FORMAT_XPORT; if (len < sizeof(".json")-1) return RS_FORMAT_UNKNOWN; if (strncasecmp(filename + len - 5, ".json", 5) == 0) return RS_FORMAT_JSON; if (strncasecmp(filename + len - 5, ".zsav", 5) == 0) return RS_FORMAT_ZSAV; if (len < sizeof(".sas7bdat")-1) return RS_FORMAT_UNKNOWN; if (strncasecmp(filename + len - 9, ".sas7bdat", 9) == 0) return RS_FORMAT_SAS_DATA; if (strncasecmp(filename + len - 9, ".sas7bcat", 9) == 0) return RS_FORMAT_SAS_CATALOG; return RS_FORMAT_UNKNOWN; } const char *readstat_format_name(rs_format_e format) { if (format == RS_FORMAT_DTA) return "Stata binary file (DTA)"; if (format == RS_FORMAT_STATA_DICTIONARY) return "Stata dictionary file (DCT)"; if (format == RS_FORMAT_SAV) return "SPSS binary file (SAV)"; if (format == RS_FORMAT_ZSAV) return "SPSS compressed binary file (ZSAV)"; if (format == RS_FORMAT_POR) return "SPSS portable file (POR)"; if (format == RS_FORMAT_SPSS_COMMANDS) return "SPSS command file"; if (format == RS_FORMAT_SAS_DATA) return "SAS data file (SAS7BDAT)"; if (format == RS_FORMAT_SAS_CATALOG) return "SAS catalog file (SAS7BCAT)"; if (format == RS_FORMAT_SAS_COMMANDS) return "SAS command file"; if (format == RS_FORMAT_CSV) return "CSV"; if (format == RS_FORMAT_XPORT) return "SAS transport file (XPORT)"; return "Unknown"; } int is_catalog(const char *filename) { return (readstat_format(filename) == RS_FORMAT_SAS_CATALOG); } int is_json(const char *filename) { return (readstat_format(filename) == RS_FORMAT_JSON); } int is_dictionary(const char *filename) { return (readstat_format(filename) == RS_FORMAT_STATA_DICTIONARY || readstat_format(filename) == RS_FORMAT_SAS_COMMANDS || readstat_format(filename) == RS_FORMAT_SPSS_COMMANDS); } int can_read(const char *filename) { return (readstat_format(filename) != RS_FORMAT_UNKNOWN); } ReadStat-1.1.7/src/bin/util/file_format.h000066400000000000000000000011701410722155500202040ustar00rootroot00000000000000#ifndef __FORMAT_H #define __FORMAT_H typedef enum { RS_FORMAT_UNKNOWN, RS_FORMAT_DTA, RS_FORMAT_SAV, RS_FORMAT_ZSAV, RS_FORMAT_POR, RS_FORMAT_SAS_DATA, RS_FORMAT_SAS_CATALOG, RS_FORMAT_XPORT, RS_FORMAT_SAS_COMMANDS, RS_FORMAT_SPSS_COMMANDS, RS_FORMAT_STATA_DICTIONARY, RS_FORMAT_CSV, RS_FORMAT_JSON } rs_format_e; rs_format_e readstat_format(const char *filename); const char *readstat_format_name(rs_format_e format); int is_catalog(const char *filename); int is_json(const char *filename); int is_dictionary(const char *filename); int can_read(const char *filename); #endif ReadStat-1.1.7/src/bin/util/main.h000066400000000000000000000040451410722155500166450ustar00rootroot00000000000000#include #include // True main for all platforms int portable_main(int argc, char *argv[]); #if defined _WIN32 #include // Standard way of decoding wide-string command-line arguments on Windows. // Call portable_main with UTF-8 strings. int main(int unused_argc, char *unused_argv[]) { int argc; int ret = 1; wchar_t** utf16_argv = NULL; char** utf8_argv = NULL; // Manual standard argument decoding needed since wmain is not supported by MinGW by default. utf16_argv = CommandLineToArgvW(GetCommandLineW(), &argc); if(utf16_argv == NULL) { fprintf(stderr, "Fatal error: command line argument extraction failure\n"); goto cleanup; } utf8_argv = calloc(argc, sizeof(char*)); for (int i=0; i static int escape(const char *s, char* dest) { char c = s[0]; if (c == '\\') { if (dest) { dest[0] = '\\'; dest[1] = '\\'; } return 2 + escape(&s[1], dest ? &dest[2] : NULL); } else if (c == '"') { if (dest) { dest[0] = '\\'; dest[1] = '"'; } return 2 + escape(&s[1], dest ? &dest[2] : NULL); } else if (c) { if (dest) { dest[0] = c; } return 1 + escape(&s[1], dest ? &dest[1] : NULL); } else { if (dest) { dest[0] = '"'; dest[1] = 0; } return 1; } } char* quote_and_escape(const char *src) { int newlen = 2 + escape(src, NULL); char *dest = malloc(newlen); dest[0] = '"'; escape(src, &dest[1]); return dest; } ReadStat-1.1.7/src/bin/util/quote_and_escape.h000066400000000000000000000000521410722155500212120ustar00rootroot00000000000000 char* quote_and_escape(const char *src); ReadStat-1.1.7/src/bin/util/readstat_dta_days.c000066400000000000000000000054141410722155500213740ustar00rootroot00000000000000#include #include #include static inline int is_leap(int year) { return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0); } int readstat_dta_num_days(const char *s, char **dest) { int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31}; int year, month, day; if (strlen(s) == 0) { *dest = (char*) s; return 0; } int ret = sscanf(s, "%d-%d-%d", &year, &month, &day); month--; if (month < 0 || month > 11 || ret!=3) { *dest = (char*)s; return 0; } int maxdays = (is_leap(year) ? daysPerMonthLeap : daysPerMonth)[month]; if (day < 1 || day > maxdays) { *dest = (char*)s; return 0; } else { int days = 0; for (int i=year; i<1960; i++) { days -= is_leap(i) ? 366 : 365; } for (int i=1960; i 0) { int days_in_year = is_leap(yr) ? 366 : 365; if (days > days_in_year) { yr-=1; days-=days_in_year; continue; } int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month]; if (days > days_in_month) { month-=1; days-=days_in_month; continue; } day = days_in_month-days + 1; days = 0; } } else { while (days > 0) { int days_in_year = is_leap(yr) ? 366 : 365; if (days >= days_in_year) { yr+=1; days-=days_in_year; continue; } int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month]; if (days >= days_in_month) { month+=1; days-=days_in_month; continue; } day+= days; days = 0; } } snprintf(dest, size, "%04d-%02d-%02d", yr, month+1, day); return dest; } ReadStat-1.1.7/src/bin/util/readstat_dta_days.h000066400000000000000000000002741410722155500214000ustar00rootroot00000000000000#ifndef __READSTAT_DTA_DAYS_H #define __READSTAT_DTA_DAYS_H int readstat_dta_num_days(const char *s, char** dest); char* readstat_dta_days_string(int days, char* dest, int size); #endif ReadStat-1.1.7/src/bin/util/readstat_sav_date.c000066400000000000000000000050461410722155500213730ustar00rootroot00000000000000#include #include #include static inline int is_leap(int year) { return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0); } double readstat_sav_date_parse(const char *s, char **dest) { // A SPSS date stored as the number of seconds since the start of the Gregorian calendar (midnight, Oct 14, 1582) // Through the C interface in savReaderWriter I've verifed that leap seconds is ignored int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31}; int year, month, day; if (strlen(s) == 0) { *dest = (char*) s; return 0; } int ret = sscanf(s, "%d-%d-%d", &year, &month, &day); month--; if (month < 0 || month > 11 || ret!=3) { *dest = (char*)s; return 0; } int maxdays = (is_leap(year) ? daysPerMonthLeap : daysPerMonth)[month]; if (day < 1 || day > maxdays) { *dest =(char*)s; return 0; } else { int days = 0; for (int i=1582; i 0) { int days_in_year = is_leap(yr) ? 366 : 365; if (days >= days_in_year) { yr+=1; days-=days_in_year; continue; } int days_in_month = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month]; if (days >= days_in_month) { month+=1; days-=days_in_month; continue; } day+= days; days = 0; } snprintf(dest, size, "%04d-%02d-%02d", yr, month+1, day); return dest; } ReadStat-1.1.7/src/bin/util/readstat_sav_date.h000066400000000000000000000003071410722155500213730ustar00rootroot00000000000000#ifndef __READSTAT_SAV_DATE_H #define __READSTAT_SAV_DATE_H double readstat_sav_date_parse(const char *s, char **dest); char* readstat_sav_date_string(double seconds, char* dest, int size); #endif ReadStat-1.1.7/src/bin/write/000077500000000000000000000000001410722155500157225ustar00rootroot00000000000000ReadStat-1.1.7/src/bin/write/double_decimals.c000066400000000000000000000010431410722155500211770ustar00rootroot00000000000000#include #include int double_decimals(double value) { char buf[255]; snprintf(buf, sizeof(buf), "%.14f", value); int len = strlen(buf); int dot_pos = 0; int relevant_decimal_pos = 0; for (int i=0; i #include #include #include #include #include "../../../readstat.h" #include "../../util/readstat_sav_date.h" #include "../../util/readstat_dta_days.h" #include "../../extract_metadata.h" #include "write_missing_values.h" static void handle_missing_discrete(struct context *ctx, readstat_variable_t *variable) { const char *format = readstat_variable_get_format(variable); int spss_date = format && (strcmp(format, "EDATE40") == 0) && variable->type == READSTAT_TYPE_DOUBLE; int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); fprintf(ctx->fp, ", \"missing\": { \"type\": \"DISCRETE\", \"values\": ["); for (int i=0; i=1) { fprintf(ctx->fp, ", "); } if (readstat_value_type(lo_val) == READSTAT_TYPE_DOUBLE) { double lo = readstat_double_value(lo_val); double hi = readstat_double_value(hi_val); if (lo == hi && spss_date) { char buf[255]; char *s = readstat_sav_date_string(lo, buf, sizeof(buf)-1); if (!s) { fprintf(stderr, "Could not parse date %lf\n", lo); exit(EXIT_FAILURE); } fprintf(ctx->fp, "\"%s\"", s); } else if (lo == hi) { fprintf(ctx->fp, "%g", lo); } else { fprintf(stderr, "%s:%d column %s unsupported lo %lf hi %lf\n", __FILE__, __LINE__, variable->name, lo, hi); exit(EXIT_FAILURE); } } else { fprintf(stderr, "%s:%d unsupported missing type\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } } fprintf(ctx->fp, "]} "); } static void handle_missing_range(struct context *ctx, readstat_variable_t *variable) { const char *format = readstat_variable_get_format(variable); int spss_date = format && (strcmp(format, "EDATE40") == 0) && variable->type == READSTAT_TYPE_DOUBLE; int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); fprintf(ctx->fp, ", \"missing\": { \"type\": \"RANGE\", "); for (int i=0; i=1) { fprintf(ctx->fp, ", "); } if (readstat_value_type(lo_val) == READSTAT_TYPE_DOUBLE) { double lo = readstat_double_value(lo_val); double hi = readstat_double_value(hi_val); if (spss_date) { char buf[255]; char buf2[255]; char *s = readstat_sav_date_string(lo, buf, sizeof(buf)-1); char *s2 = readstat_sav_date_string(hi, buf2, sizeof(buf2)-1); if (!s) { fprintf(stderr, "Could not parse date %lf\n", lo); exit(EXIT_FAILURE); } if (!s2) { fprintf(stderr, "Could not parse date %lf\n", hi); exit(EXIT_FAILURE); } if (lo == hi) { fprintf(ctx->fp, "\"discrete-value\": \"%s\"", s); } else { fprintf(ctx->fp, "\"low\": \"%s\", \"high\": \"%s\"", s, s2); } } else { if (lo == hi) { fprintf(ctx->fp, "\"discrete-value\": %lf", lo); } else { fprintf(ctx->fp, "\"low\": %lf, \"high\": %lf", lo, hi); } } } else { fprintf(stderr, "%s:%d unsupported missing type\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } } fprintf(ctx->fp, "} "); } void add_missing_values(struct context *ctx, readstat_variable_t *variable) { int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); if (missing_ranges_count == 0) { return; } int is_range = 0; int discrete = 0; int only_double = 1; for (int i=0; i #include #include #include #include #include "../../../readstat.h" #include "../../../readstat_iconv.h" #include "../../../stata/readstat_dta.h" #include "../../util/readstat_sav_date.h" #include "../../util/readstat_dta_days.h" #include "../../util/quote_and_escape.h" #include "../../util/file_format.h" #include "../../extract_metadata.h" #include "write_value_labels.h" static readstat_label_set_t * get_label_set(const char *val_labels, struct context *ctx) { for (int i=0; ivariable_count; i++) { readstat_label_set_t * lbl = &ctx->label_set[i]; if (0 == strcmp(lbl->name, val_labels)) { return lbl; } } return NULL; } void add_val_labels(struct context *ctx, readstat_variable_t *variable, const char *val_labels) { if (!val_labels) { return; } else { fprintf(stdout, "extracting value labels for %s\n", val_labels); } const char *format = readstat_variable_get_format(variable); int sav_date = format && (strcmp(variable->format, "EDATE40") == 0) && variable->type == READSTAT_TYPE_DOUBLE; int dta_date = format && (strcmp(variable->format, "%td") == 0) && variable->type == READSTAT_TYPE_INT32; readstat_label_set_t * label_set = get_label_set(val_labels, ctx); if (!label_set) { fprintf(stderr, "Could not find label set %s!\n", val_labels); exit(EXIT_FAILURE); } fprintf(ctx->fp, ", \"categories\": ["); for (int i=0; ivalue_labels_count; i++) { readstat_value_label_t* value_label = &label_set->value_labels[i]; if (i>0) { fprintf(ctx->fp, ", "); } if (sav_date) { char* lbl = quote_and_escape(value_label->label); char buf[255]; char *s = readstat_sav_date_string(value_label->double_key, buf, sizeof(buf)-1); if (!s) { fprintf(stderr, "%s:%d could not parse double value %lf to date\n", __FILE__, __LINE__, value_label->double_key); exit(EXIT_FAILURE); } fprintf(ctx->fp, "{ \"code\": \"%s\", \"label\": %s} ", s, lbl); free(lbl); } else if (dta_date) { char* lbl = quote_and_escape(value_label->label); char buf[255]; int k = value_label->int32_key; char tag = 0; if (k >= DTA_113_MISSING_INT32_A) { tag = (k-DTA_113_MISSING_INT32_A)+'a'; } char *s = readstat_dta_days_string(value_label->int32_key, buf, sizeof(buf)-1); if (!s) { fprintf(stderr, "%s:%d could not parse int32 value %d to date\n", __FILE__, __LINE__, value_label->int32_key); exit(EXIT_FAILURE); } if (tag) { fprintf(ctx->fp, "{ \"code\": \".%c\", \"label\": %s} ", tag, lbl); } else { fprintf(ctx->fp, "{ \"code\": \"%s\", \"label\": %s} ", s, lbl); } free(lbl); } else if (readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_NUMERIC && ctx->input_format == RS_FORMAT_DTA) { char* lbl = quote_and_escape(value_label->label); int k = value_label->int32_key; char tag = 0; if (k >= DTA_113_MISSING_INT32_A) { tag = (k-DTA_113_MISSING_INT32_A)+'a'; } if (tag) { fprintf(ctx->fp, "{ \"code\": \".%c\", \"label\": %s} ", tag, lbl); } else { fprintf(ctx->fp, "{ \"code\": %d, \"label\": %s} ", k, lbl); } free(lbl); } else if (variable->type == READSTAT_TYPE_DOUBLE && ctx->input_format == RS_FORMAT_SAV) { char* lbl = quote_and_escape(value_label->label); fprintf(ctx->fp, "{ \"code\": %lf, \"label\": %s} ", value_label->double_key, lbl); free(lbl); } else if (variable->type == READSTAT_TYPE_STRING) { char* lbl = quote_and_escape(value_label->label); char* stringkey = quote_and_escape(value_label->string_key); fprintf(ctx->fp, "{ \"code\": %s, \"label\": %s} ", stringkey, lbl); free(lbl); free(stringkey); } else { fprintf(stderr, "%s:%d Unsupported type %d\n", __FILE__, __LINE__, variable->type); exit(EXIT_FAILURE); } } fprintf(ctx->fp, "] "); } ReadStat-1.1.7/src/bin/write/json/write_value_labels.h000066400000000000000000000003541410722155500227160ustar00rootroot00000000000000#ifndef __WRITE_VALUE_LABELS_H #define __WRITE_VALUE_LABELS_H #include "../../../readstat.h" #include "../../extract_metadata.h" void add_val_labels(struct context *ctx, readstat_variable_t *variable, const char *val_labels); #endif ReadStat-1.1.7/src/bin/write/mod_csv.c000066400000000000000000000125521410722155500175250ustar00rootroot00000000000000#include #include #include #include #include "../../readstat.h" #include "module_util.h" #include "module.h" #include "../util/readstat_dta_days.h" #include "../util/readstat_sav_date.h" #include "double_decimals.h" typedef struct mod_csv_ctx_s { FILE *out_file; long var_count; } mod_csv_ctx_t; static int accept_file(const char *filename); static void *ctx_init(const char *filename); static void finish_file(void *ctx); static int handle_metadata(readstat_metadata_t *metadata, void *ctx); static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx); static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx); rs_module_t rs_mod_csv = { .accept = accept_file, .init = ctx_init, .finish = finish_file, .handle = { .metadata = handle_metadata, .variable = handle_variable, .value = handle_value } }; static int accept_file(const char *filename) { return strcmp(filename, "-") == 0 || rs_ends_with(filename, ".csv"); } static void *ctx_init(const char *filename) { mod_csv_ctx_t *mod_ctx = malloc(sizeof(mod_csv_ctx_t)); if (strcmp(filename, "-") == 0) { mod_ctx->out_file = stdout; } else { mod_ctx->out_file = fopen(filename, "w"); } if (mod_ctx->out_file == NULL) { fprintf(stderr, "Error opening %s for writing: %s\n", filename, strerror(errno)); return NULL; } return mod_ctx; } static void finish_file(void *ctx) { mod_csv_ctx_t *mod_ctx = (mod_csv_ctx_t *)ctx; if (mod_ctx) { if (mod_ctx->out_file == stdout) { fflush(mod_ctx->out_file); } else if (mod_ctx->out_file != NULL) { fclose(mod_ctx->out_file); } } } static int handle_metadata(readstat_metadata_t *metadata, void *ctx) { mod_csv_ctx_t *mod_ctx = (mod_csv_ctx_t *)ctx; mod_ctx->var_count = readstat_get_var_count(metadata); return mod_ctx->var_count == 0; } static void emit_escaped_string(mod_csv_ctx_t *mod_ctx, const char *string) { if (string == NULL) { fprintf(mod_ctx->out_file, "\"\""); } else { fprintf(mod_ctx->out_file, "\""); const char *p = NULL; while ((string = strchr(p = string, '"'))) { fwrite(p, string - p, 1, mod_ctx->out_file); fprintf(mod_ctx->out_file, "\"\""); string++; } fprintf(mod_ctx->out_file, "%s\"", p); } } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { mod_csv_ctx_t *mod_ctx = (mod_csv_ctx_t *)ctx; const char *name = readstat_variable_get_name(variable); if (index > 0) { fprintf(mod_ctx->out_file, ","); } emit_escaped_string(mod_ctx, name); if (index == mod_ctx->var_count - 1) { fprintf(mod_ctx->out_file, "\n"); } return 0; } static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { mod_csv_ctx_t *mod_ctx = (mod_csv_ctx_t *)ctx; readstat_type_t type = readstat_value_type(value); const char *format = readstat_variable_get_format(variable); int var_index = readstat_variable_get_index(variable); if (var_index > 0) { fprintf(mod_ctx->out_file, ","); } if (readstat_value_is_system_missing(value)) { /* void */ } else if (readstat_value_is_tagged_missing(value)) { /* void */ } else if (type == READSTAT_TYPE_STRING) { emit_escaped_string(mod_ctx, readstat_string_value(value)); } else if (type == READSTAT_TYPE_INT8) { #ifdef __MINGW32__ __mingw_fprintf(mod_ctx->out_file, "%hhd", readstat_int8_value(value)); #else fprintf(mod_ctx->out_file, "%hhd", readstat_int8_value(value)); #endif } else if (type == READSTAT_TYPE_INT16) { fprintf(mod_ctx->out_file, "%hd", readstat_int16_value(value)); } else if (type == READSTAT_TYPE_INT32 && format && 0 == strncmp("%td", format, strlen("%td"))) { int days = readstat_int32_value(value); char days_str[255]; readstat_dta_days_string(days, days_str, sizeof(days_str)-1); fprintf(mod_ctx->out_file, "%s", days_str); } else if (type == READSTAT_TYPE_DOUBLE && format && 0 == strncmp("EDATE40", format, strlen("EDATE40"))) { double v = readstat_double_value(value); char date_str[255]; char *s = readstat_sav_date_string(v, date_str, sizeof(date_str)-1); if (!s) { fprintf(stderr, "%s:%d Could not parse SPSS date double: %lf\n", __FILE__, __LINE__, v); exit(EXIT_FAILURE); } fprintf(mod_ctx->out_file, "%s", s); } else if (type == READSTAT_TYPE_INT32) { fprintf(mod_ctx->out_file, "%d", readstat_int32_value(value)); } else if (type == READSTAT_TYPE_FLOAT) { fprintf(mod_ctx->out_file, "%f", readstat_float_value(value)); } else if (type == READSTAT_TYPE_DOUBLE) { double v = readstat_double_value(value); int decimals = double_decimals(v); if (decimals <= 6) { fprintf(mod_ctx->out_file, "%lf", v); } else { fprintf(mod_ctx->out_file, "%.14f", v); } } if (var_index == mod_ctx->var_count - 1) { fprintf(mod_ctx->out_file, "\n"); } return 0; } ReadStat-1.1.7/src/bin/write/mod_csv.h000066400000000000000000000000401410722155500175170ustar00rootroot00000000000000 extern rs_module_t rs_mod_csv; ReadStat-1.1.7/src/bin/write/mod_readstat.c000066400000000000000000000274011410722155500205400ustar00rootroot00000000000000#include #include #include #include #include "../../readstat.h" #include "../../CKHashTable.h" #include "module_util.h" #include "module.h" typedef struct mod_readstat_ctx_s { readstat_writer_t *writer; ck_hash_table_t *label_set_dict; long fweight_index; long var_count; long row_count; FILE *out_file; unsigned int is_sav:1; unsigned int is_zsav:1; unsigned int is_dta:1; unsigned int is_por:1; unsigned int is_sas7bdat:1; unsigned int is_xport:1; } mod_readstat_ctx_t; static ssize_t write_data(const void *bytes, size_t len, void *ctx); static int accept_file(const char *filename); static void *ctx_init(const char *filename); static void finish_file(void *ctx); static int handle_fweight(readstat_variable_t *variable, void *ctx); static int handle_metadata(readstat_metadata_t *metadata, void *ctx); static int handle_note(int note_index, const char *note, void *ctx); static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *ctx); static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx); static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx); rs_module_t rs_mod_readstat = { .accept = accept_file, .init = ctx_init, .finish = finish_file, .handle = { .metadata = handle_metadata, .note = handle_note, .variable = handle_variable, .fweight = handle_fweight, .value = handle_value, .value_label = handle_value_label } }; static ssize_t write_data(const void *bytes, size_t len, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; return fwrite(bytes, 1, len, mod_ctx->out_file); } static int accept_file(const char *filename) { return (rs_ends_with(filename, ".dta") || rs_ends_with(filename, ".sav") || rs_ends_with(filename, ".zsav") || rs_ends_with(filename, ".por") || rs_ends_with(filename, ".sas7bdat") || rs_ends_with(filename, ".xpt")); } static void *ctx_init(const char *filename) { mod_readstat_ctx_t *mod_ctx = malloc(sizeof(mod_readstat_ctx_t)); mod_ctx->label_set_dict = ck_hash_table_init(1024, 16); mod_ctx->is_sav = rs_ends_with(filename, ".sav"); mod_ctx->is_zsav = rs_ends_with(filename, ".zsav"); mod_ctx->is_dta = rs_ends_with(filename, ".dta"); mod_ctx->is_por = rs_ends_with(filename, ".por"); mod_ctx->is_sas7bdat = rs_ends_with(filename, ".sas7bdat"); mod_ctx->is_xport = rs_ends_with(filename, ".xpt"); mod_ctx->out_file = fopen(filename, "wb"); if (mod_ctx->out_file == NULL) { fprintf(stderr, "Error opening %s for writing: %s\n", filename, strerror(errno)); return NULL; } mod_ctx->writer = readstat_writer_init(); readstat_writer_set_file_label(mod_ctx->writer, "Created by ReadStat "); readstat_set_data_writer(mod_ctx->writer, &write_data); return mod_ctx; } void finish_file(void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; if (mod_ctx) { if (mod_ctx->out_file) fclose(mod_ctx->out_file); if (mod_ctx->label_set_dict) ck_hash_table_free(mod_ctx->label_set_dict); if (mod_ctx->writer) readstat_writer_free(mod_ctx->writer); free(mod_ctx); } } static int handle_fweight(readstat_variable_t *variable, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; readstat_writer_t *writer = mod_ctx->writer; readstat_variable_t *new_variable = readstat_get_variable(writer, readstat_variable_get_index(variable)); readstat_writer_set_fweight_variable(writer, new_variable); return READSTAT_HANDLER_OK; } static int handle_metadata(readstat_metadata_t *metadata, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; readstat_writer_t *writer = mod_ctx->writer; mod_ctx->var_count = readstat_get_var_count(metadata); mod_ctx->row_count = readstat_get_row_count(metadata); if (mod_ctx->var_count == 0 || mod_ctx->row_count == 0) return READSTAT_HANDLER_ABORT; readstat_writer_set_file_label(writer, readstat_get_file_label(metadata)); return READSTAT_HANDLER_OK; } static int handle_note(int note_index, const char *note, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; readstat_writer_t *writer = mod_ctx->writer; readstat_add_note(writer, note); return READSTAT_HANDLER_OK; } static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; readstat_writer_t *writer = mod_ctx->writer; readstat_label_set_t *label_set = NULL; readstat_type_t type = readstat_value_type(value); label_set = (readstat_label_set_t *)ck_str_hash_lookup(val_labels, mod_ctx->label_set_dict); if (label_set == NULL) { label_set = readstat_add_label_set(writer, type, val_labels); ck_str_hash_insert(val_labels, label_set, mod_ctx->label_set_dict); } if (mod_ctx->is_dta && readstat_value_is_tagged_missing(value)) { readstat_label_tagged_value(label_set, readstat_value_tag(value), label); } else if (type == READSTAT_TYPE_INT32) { readstat_label_int32_value(label_set, readstat_int32_value(value), label); } else if (type == READSTAT_TYPE_DOUBLE) { readstat_label_double_value(label_set, readstat_double_value(value), label); } else if (type == READSTAT_TYPE_STRING) { readstat_label_string_value(label_set, readstat_string_value(value), label); } return READSTAT_HANDLER_OK; } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { mod_readstat_ctx_t *mod_ctx = (mod_readstat_ctx_t *)ctx; readstat_writer_t *writer = mod_ctx->writer; readstat_type_t type = readstat_variable_get_type(variable); const char *name = readstat_variable_get_name(variable); const char *label = readstat_variable_get_label(variable); size_t storage_width = readstat_variable_get_storage_width(variable); int display_width = readstat_variable_get_display_width(variable); int missing_ranges_count = readstat_variable_get_missing_ranges_count(variable); readstat_alignment_t alignment = readstat_variable_get_alignment(variable); readstat_measure_t measure = readstat_variable_get_measure(variable); // TODO format translation (readstat_variable_get_format + readstat_variable_set_format) readstat_variable_t *new_variable = readstat_add_variable(writer, name, type, storage_width); if (val_labels) { readstat_label_set_t *label_set = (readstat_label_set_t *)ck_str_hash_lookup(val_labels, mod_ctx->label_set_dict); readstat_variable_set_label_set(new_variable, label_set); if (mod_ctx->is_sas7bdat) { readstat_variable_set_format(new_variable, val_labels); } } int i; for (i=0; iwriter; int var_index = readstat_variable_get_index(old_variable); readstat_variable_t *variable = readstat_get_variable(writer, var_index); readstat_type_t type = readstat_value_type(value); readstat_error_t error = READSTAT_OK; if (var_index == 0) { if (obs_index == 0) { if (mod_ctx->is_sav) { readstat_writer_set_compression(writer, READSTAT_COMPRESS_ROWS); error = readstat_begin_writing_sav(writer, mod_ctx, mod_ctx->row_count); } else if (mod_ctx->is_zsav) { readstat_writer_set_compression(writer, READSTAT_COMPRESS_BINARY); error = readstat_begin_writing_sav(writer, mod_ctx, mod_ctx->row_count); } else if (mod_ctx->is_dta) { error = readstat_begin_writing_dta(writer, mod_ctx, mod_ctx->row_count); } else if (mod_ctx->is_por) { error = readstat_begin_writing_por(writer, mod_ctx, mod_ctx->row_count); } else if (mod_ctx->is_sas7bdat) { error = readstat_begin_writing_sas7bdat(writer, mod_ctx, mod_ctx->row_count); } else if (mod_ctx->is_xport) { error = readstat_begin_writing_xport(writer, mod_ctx, mod_ctx->row_count); } if (error != READSTAT_OK) { fprintf(stderr, "Error beginning file: %s\n", readstat_error_message(error)); goto cleanup; } } error = readstat_begin_row(writer); if (error != READSTAT_OK) { fprintf(stderr, "Error beginning row #%d: %s\n", obs_index, readstat_error_message(error)); goto cleanup; } } if (readstat_value_is_system_missing(value)) { error = readstat_insert_missing_value(writer, variable); } else if (mod_ctx->is_dta && readstat_value_is_tagged_missing(value)) { error = readstat_insert_tagged_missing_value(writer, variable, value.tag); } else if (type == READSTAT_TYPE_STRING) { error = readstat_insert_string_value(writer, variable, readstat_string_value(value)); } else if (type == READSTAT_TYPE_INT8) { error = readstat_insert_int8_value(writer, variable, readstat_int8_value(value)); } else if (type == READSTAT_TYPE_INT16) { error = readstat_insert_int16_value(writer, variable, readstat_int16_value(value)); } else if (type == READSTAT_TYPE_INT32) { error = readstat_insert_int32_value(writer, variable, readstat_int32_value(value)); } else if (type == READSTAT_TYPE_FLOAT) { error = readstat_insert_float_value(writer, variable, readstat_float_value(value)); } else if (type == READSTAT_TYPE_DOUBLE) { error = readstat_insert_double_value(writer, variable, readstat_double_value(value)); } if (error != READSTAT_OK) { fprintf(stderr, "Error inserting value: %s\n", readstat_error_message(error)); goto cleanup; } if (var_index == mod_ctx->var_count - 1) { error = readstat_end_row(writer); if (error != READSTAT_OK) { fprintf(stderr, "Error ending row: %s\n", readstat_error_message(error)); goto cleanup; } if (obs_index == mod_ctx->row_count - 1) { error = readstat_end_writing(writer); if (error != READSTAT_OK) { fprintf(stderr, "Error ending file: %s\n", readstat_error_message(error)); goto cleanup; } } } cleanup: if (error != READSTAT_OK) { return READSTAT_HANDLER_ABORT; } return READSTAT_HANDLER_OK; } ReadStat-1.1.7/src/bin/write/mod_readstat.h000066400000000000000000000000451410722155500205400ustar00rootroot00000000000000 extern rs_module_t rs_mod_readstat; ReadStat-1.1.7/src/bin/write/mod_xlsx.c000066400000000000000000000063751410722155500177360ustar00rootroot00000000000000#include #include #include #include #include #include "../../readstat.h" #include "module_util.h" #include "module.h" #define MIN_ROWS_TO_SPLIT 35 typedef struct mod_xlsx_ctx_s { lxw_workbook *workbook; lxw_worksheet *worksheet; lxw_format *label_fmt; lxw_format *missing_fmt; long row_count; } mod_xlsx_ctx_t; static int accept_file(const char *filename); static void *ctx_init(const char *filename); static void finish_file(void *ctx); static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx); static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx); rs_module_t rs_mod_xlsx = { .accept = accept_file, .init = ctx_init, .finish = finish_file, .handle = { .variable = handle_variable, .value = handle_value } }; static int accept_file(const char *filename) { return rs_ends_with(filename, ".xlsx"); } static void *ctx_init(const char *filename) { mod_xlsx_ctx_t *mod_ctx = malloc(sizeof(mod_xlsx_ctx_t)); mod_ctx->workbook = workbook_new(filename); mod_ctx->worksheet = workbook_add_worksheet(mod_ctx->workbook, "Data"); mod_ctx->label_fmt = workbook_add_format(mod_ctx->workbook); format_set_bold(mod_ctx->label_fmt); format_set_align(mod_ctx->label_fmt, LXW_ALIGN_CENTER); mod_ctx->missing_fmt = workbook_add_format(mod_ctx->workbook); format_set_font_color(mod_ctx->missing_fmt, LXW_COLOR_GRAY); return mod_ctx; } static void finish_file(void *ctx) { mod_xlsx_ctx_t *mod_ctx = (mod_xlsx_ctx_t *)ctx; if (mod_ctx) { if (mod_ctx->row_count > MIN_ROWS_TO_SPLIT) { worksheet_freeze_panes(mod_ctx->worksheet, 1, 0); } workbook_close(mod_ctx->workbook); free(mod_ctx); } } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { mod_xlsx_ctx_t *mod_ctx = (mod_xlsx_ctx_t *)ctx; const char *name = readstat_variable_get_name(variable); worksheet_write_string(mod_ctx->worksheet, 0, index, name, mod_ctx->label_fmt); worksheet_set_column(mod_ctx->worksheet, index, index, 2 * LXW_DEF_COL_WIDTH, NULL); return 0; } static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { mod_xlsx_ctx_t *mod_ctx = (mod_xlsx_ctx_t *)ctx; lxw_format *value_fmt = readstat_value_is_defined_missing(value, variable) ? mod_ctx->missing_fmt : NULL; int var_index = readstat_variable_get_index(variable); if (var_index == 0) { mod_ctx->row_count++; } if (readstat_value_is_system_missing(value) || readstat_value_is_tagged_missing(value)) { worksheet_write_blank(mod_ctx->worksheet, obs_index+1, var_index, NULL); } else if (readstat_value_type(value) == READSTAT_TYPE_STRING) { worksheet_write_string(mod_ctx->worksheet, obs_index+1, var_index, readstat_string_value(value), value_fmt); } else if (readstat_value_type_class(value) == READSTAT_TYPE_CLASS_NUMERIC) { worksheet_write_number(mod_ctx->worksheet, obs_index+1, var_index, readstat_double_value(value), value_fmt); } return 0; } ReadStat-1.1.7/src/bin/write/mod_xlsx.h000066400000000000000000000000411410722155500177230ustar00rootroot00000000000000 extern rs_module_t rs_mod_xlsx; ReadStat-1.1.7/src/bin/write/module.h000066400000000000000000000005411410722155500173600ustar00rootroot00000000000000typedef int (*rs_mod_will_write_file)(const char *filename); typedef void * (*rs_mod_ctx_init)(const char *filename); typedef void (*rs_mod_finish_file)(void *ctx); typedef struct rs_module_s { rs_mod_will_write_file accept; rs_mod_ctx_init init; rs_mod_finish_file finish; readstat_callbacks_t handle; } rs_module_t; ReadStat-1.1.7/src/bin/write/module_util.c000066400000000000000000000004301410722155500204050ustar00rootroot00000000000000#include int rs_ends_with(const char *filename, const char *ending) { size_t filename_len = strlen(filename); size_t ending_len = strlen(ending); return filename_len > ending_len && strncmp(filename + filename_len - ending_len, ending, ending_len) == 0; } ReadStat-1.1.7/src/bin/write/module_util.h000066400000000000000000000000751410722155500204170ustar00rootroot00000000000000 int rs_ends_with(const char *filename, const char *ending); ReadStat-1.1.7/src/fuzz/000077500000000000000000000000001410722155500150165ustar00rootroot00000000000000ReadStat-1.1.7/src/fuzz/fuzz_compression_sas_rle.c000066400000000000000000000020741410722155500223140ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../sas/readstat_sas_rle.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { ssize_t compressed_len = sas_rle_compressed_len(Data, Size); if (compressed_len <= 0 || Size == 0) return 0; uint8_t *compressed = malloc(compressed_len); uint8_t *decompressed = malloc(Size); ssize_t actual_len = 0; if ((actual_len = sas_rle_compress(compressed, compressed_len, Data, Size)) != compressed_len) { printf("Unexpected compressed size (Expected: %ld Got: %ld)\n", compressed_len, actual_len); __builtin_trap(); } if ((actual_len = sas_rle_decompress(decompressed, Size, compressed, compressed_len)) != Size) { printf("Unexpected decompressed size (Expected: %ld Got: %ld)\n", Size, actual_len); __builtin_trap(); } if (memcmp(Data, decompressed, Size) != 0) { printf("Decompressed data doesn't match original\n"); __builtin_trap(); } free(compressed); free(decompressed); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_compression_sav.c000066400000000000000000000006311410722155500214520ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../spss/readstat_sav_compress.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { unsigned char buffer[32768]; struct sav_row_stream_s state = { .next_in = Data, .avail_in = Size, .next_out = buffer, .avail_out = sizeof(buffer) }; sav_decompress_row(&state); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format.c000066400000000000000000000032151410722155500175310ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer_io.h" static int handle_metadata(readstat_metadata_t *metadata, void *ctx) { return READSTAT_HANDLER_OK; } static int handle_note(int index, const char *note, void *ctx) { return READSTAT_HANDLER_OK; } static int handle_fweight(readstat_variable_t *variable, void *ctx) { return READSTAT_HANDLER_OK; } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { return READSTAT_HANDLER_OK; } static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { return READSTAT_HANDLER_OK; } static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *ctx) { return READSTAT_HANDLER_OK; } readstat_parser_t *fuzzer_parser_init(const uint8_t *Data, size_t Size) { readstat_parser_t *parser = readstat_parser_init(); readstat_set_open_handler(parser, rt_open_handler); readstat_set_close_handler(parser, rt_close_handler); readstat_set_seek_handler(parser, rt_seek_handler); readstat_set_read_handler(parser, rt_read_handler); readstat_set_update_handler(parser, rt_update_handler); readstat_set_metadata_handler(parser, &handle_metadata); readstat_set_note_handler(parser, &handle_note); readstat_set_variable_handler(parser, &handle_variable); readstat_set_fweight_handler(parser, &handle_fweight); readstat_set_value_handler(parser, &handle_value); readstat_set_value_label_handler(parser, &handle_value_label); return parser; } ReadStat-1.1.7/src/fuzz/fuzz_format.h000066400000000000000000000001121410722155500175270ustar00rootroot00000000000000 readstat_parser_t *fuzzer_parser_init(const uint8_t *Data, size_t Size); ReadStat-1.1.7/src/fuzz/fuzz_format_dta.c000066400000000000000000000011031410722155500203530ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_dta(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_por.c000066400000000000000000000011031410722155500204030ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_por(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_sas7bcat.c000066400000000000000000000011101410722155500213100ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_sas7bcat(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_sas7bdat.c000066400000000000000000000011101410722155500213110ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_sas7bdat(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_sas_commands.c000066400000000000000000000012671410722155500222650ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_schema_t *schema = readstat_parse_sas_commands(parser, NULL, NULL, NULL); if (schema) readstat_schema_free(schema); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_sav.c000066400000000000000000000011031410722155500203740ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_sav(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_spss_commands.c000066400000000000000000000012701410722155500224610ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_schema_t *schema = readstat_parse_spss_commands(parser, NULL, NULL, NULL); if (schema) readstat_schema_free(schema); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_stata_dictionary.c000066400000000000000000000012731410722155500231540ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_schema_t *schema = readstat_parse_stata_dictionary(parser, NULL, NULL, NULL); if (schema) readstat_schema_free(schema); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_format_xport.c000066400000000000000000000011051410722155500207610ustar00rootroot00000000000000#include #include "../readstat.h" #include "../test/test_types.h" #include "../test/test_buffer.h" #include "../test/test_buffer_io.h" #include "fuzz_format.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { rt_buffer_t buffer = { .bytes = (char *)Data, .size = Size, .used = Size }; rt_buffer_ctx_t buffer_ctx = { .buffer = &buffer }; readstat_parser_t *parser = fuzzer_parser_init(Data, Size); readstat_set_io_ctx(parser, &buffer_ctx); readstat_parse_xport(parser, NULL, NULL); readstat_parser_free(parser); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_grammar_dta_timestamp.c000066400000000000000000000004471410722155500226060ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../stata/readstat_dta_parse_timestamp.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { struct tm timestamp; dta_parse_timestamp((const char *)Data, Size, ×tamp, NULL, NULL); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_grammar_por_double.c000066400000000000000000000003611410722155500221000ustar00rootroot00000000000000#include #include "../readstat.h" #include "../spss/readstat_por_parse.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { readstat_por_parse_double((const char *)Data, Size, NULL, NULL, NULL); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_grammar_sav_date.c000066400000000000000000000004421410722155500215340ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../spss/readstat_sav_parse_timestamp.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { struct tm timestamp; sav_parse_date((const char *)Data, Size, ×tamp, NULL, NULL); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_grammar_sav_time.c000066400000000000000000000004421410722155500215550ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../spss/readstat_sav_parse_timestamp.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { struct tm timestamp; sav_parse_time((const char *)Data, Size, ×tamp, NULL, NULL); return 0; } ReadStat-1.1.7/src/fuzz/fuzz_grammar_spss_format.c000066400000000000000000000004601410722155500223060ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../spss/readstat_spss.h" #include "../spss/readstat_spss_parse.h" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { spss_format_t format; spss_parse_format((const char *)Data, Size, &format); return 0; } ReadStat-1.1.7/src/fuzz/generate_corpus.c000066400000000000000000000050041410722155500203460ustar00rootroot00000000000000#include #include #include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../stata/readstat_dta.h" #include "../test/test_buffer.h" #include "../test/test_types.h" #include "../test/test_error.h" #include "../test/test_readstat.h" #include "../test/test_read.h" #include "../test/test_write.h" #include "../test/test_list.h" #if (defined(_WIN32) || defined(__WIN32__)) #define mkdir(A, B) mkdir(A) #endif #define CORPUS_DIR "fuzz/corpus" static void dump_buffer(rt_buffer_t *buffer, long format, int test_case) { char filename[128]; snprintf(filename, sizeof(filename), CORPUS_DIR "/%s/test-case-%03d", file_extension(format), test_case); FILE *file = fopen(filename, "wb"); if (!file) { perror(filename); } ssize_t bytes_written = fwrite(buffer->bytes, 1, buffer->used, file); if (bytes_written < 0) { perror(filename); } fclose(file); } int main(int argc, char *argv[]) { rt_buffer_t *buffer = buffer_init(); int g, t, f; int file_count = 0, test_count = 0; if (mkdir(CORPUS_DIR, 0755) == -1 && errno != EEXIST) perror(CORPUS_DIR); for (f=RT_FORMAT_DTA_104; ftest_formats & f)) continue; if (file->write_error != READSTAT_OK) continue; buffer_reset(buffer); error = write_file_to_buffer(file, buffer, f); if (error != READSTAT_OK) { printf("Error writing to file \"%s\": %s\n", file->label, readstat_error_message(error)); exit(1); } dump_buffer(buffer, f, test_count); file_count++; } test_count++; } } buffer_free(buffer); printf("Generated %d corpus files (%d test cases)\n", file_count, test_count); return 0; } ReadStat-1.1.7/src/readstat.h000066400000000000000000000702271410722155500160100ustar00rootroot00000000000000// // readstat.h - API and internal data structures for ReadStat // // Copyright Evan Miller and ReadStat authors (see LICENSE) // #ifndef INCLUDE_READSTAT_H #define INCLUDE_READSTAT_H #ifdef __cplusplus extern "C" { #endif #include #include #include #include #include enum { READSTAT_HANDLER_OK, READSTAT_HANDLER_ABORT, READSTAT_HANDLER_SKIP_VARIABLE }; typedef enum readstat_type_e { READSTAT_TYPE_STRING, READSTAT_TYPE_INT8, READSTAT_TYPE_INT16, READSTAT_TYPE_INT32, READSTAT_TYPE_FLOAT, READSTAT_TYPE_DOUBLE, READSTAT_TYPE_STRING_REF } readstat_type_t; typedef enum readstat_type_class_e { READSTAT_TYPE_CLASS_STRING, READSTAT_TYPE_CLASS_NUMERIC } readstat_type_class_t; typedef enum readstat_measure_e { READSTAT_MEASURE_UNKNOWN, READSTAT_MEASURE_NOMINAL = 1, READSTAT_MEASURE_ORDINAL, READSTAT_MEASURE_SCALE } readstat_measure_t; typedef enum readstat_alignment_e { READSTAT_ALIGNMENT_UNKNOWN, READSTAT_ALIGNMENT_LEFT = 1, READSTAT_ALIGNMENT_CENTER, READSTAT_ALIGNMENT_RIGHT } readstat_alignment_t; typedef enum readstat_compress_e { READSTAT_COMPRESS_NONE, READSTAT_COMPRESS_ROWS, READSTAT_COMPRESS_BINARY } readstat_compress_t; typedef enum readstat_endian_e { READSTAT_ENDIAN_NONE, READSTAT_ENDIAN_LITTLE, READSTAT_ENDIAN_BIG } readstat_endian_t; typedef enum readstat_error_e { READSTAT_OK, READSTAT_ERROR_OPEN = 1, READSTAT_ERROR_READ, READSTAT_ERROR_MALLOC, READSTAT_ERROR_USER_ABORT, READSTAT_ERROR_PARSE, READSTAT_ERROR_UNSUPPORTED_COMPRESSION, READSTAT_ERROR_UNSUPPORTED_CHARSET, READSTAT_ERROR_COLUMN_COUNT_MISMATCH, READSTAT_ERROR_ROW_COUNT_MISMATCH, READSTAT_ERROR_ROW_WIDTH_MISMATCH, READSTAT_ERROR_BAD_FORMAT_STRING, READSTAT_ERROR_VALUE_TYPE_MISMATCH, READSTAT_ERROR_WRITE, READSTAT_ERROR_WRITER_NOT_INITIALIZED, READSTAT_ERROR_SEEK, READSTAT_ERROR_CONVERT, READSTAT_ERROR_CONVERT_BAD_STRING, READSTAT_ERROR_CONVERT_SHORT_STRING, READSTAT_ERROR_CONVERT_LONG_STRING, READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE, READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG, READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED, READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION, READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER, READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER, READSTAT_ERROR_NAME_IS_RESERVED_WORD, READSTAT_ERROR_NAME_IS_TOO_LONG, READSTAT_ERROR_BAD_TIMESTAMP_STRING, READSTAT_ERROR_BAD_FREQUENCY_WEIGHT, READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS, READSTAT_ERROR_NOTE_IS_TOO_LONG, READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED, READSTAT_ERROR_STRING_REF_IS_REQUIRED, READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE, READSTAT_ERROR_TOO_FEW_COLUMNS, READSTAT_ERROR_TOO_MANY_COLUMNS, READSTAT_ERROR_NAME_IS_ZERO_LENGTH, READSTAT_ERROR_BAD_TIMESTAMP_VALUE } readstat_error_t; const char *readstat_error_message(readstat_error_t error_code); typedef struct readstat_metadata_s { int64_t row_count; int64_t var_count; time_t creation_time; time_t modified_time; int64_t file_format_version; readstat_compress_t compression; readstat_endian_t endianness; const char *table_name; const char *file_label; const char *file_encoding; unsigned int is64bit:1; } readstat_metadata_t; /* If the row count is unknown (e.g. it's an XPORT or POR file, or an SAV * file created with non-conforming software), then readstat_get_row_count * returns -1. */ int readstat_get_row_count(readstat_metadata_t *metadata); int readstat_get_var_count(readstat_metadata_t *metadata); time_t readstat_get_creation_time(readstat_metadata_t *metadata); time_t readstat_get_modified_time(readstat_metadata_t *metadata); int readstat_get_file_format_version(readstat_metadata_t *metadata); int readstat_get_file_format_is_64bit(readstat_metadata_t *metadata); readstat_compress_t readstat_get_compression(readstat_metadata_t *metadata); readstat_endian_t readstat_get_endianness(readstat_metadata_t *metadata); const char *readstat_get_table_name(readstat_metadata_t *metadata); const char *readstat_get_file_label(readstat_metadata_t *metadata); const char *readstat_get_file_encoding(readstat_metadata_t *metadata); typedef struct readstat_value_s { union { float float_value; double double_value; int8_t i8_value; int16_t i16_value; int32_t i32_value; const char *string_value; } v; readstat_type_t type; char tag; unsigned int is_system_missing:1; unsigned int is_tagged_missing:1; } readstat_value_t; /* Internal data structures */ typedef struct readstat_value_label_s { double double_key; int32_t int32_key; char tag; char *string_key; size_t string_key_len; char *label; size_t label_len; } readstat_value_label_t; typedef struct readstat_label_set_s { readstat_type_t type; char name[256]; readstat_value_label_t *value_labels; long value_labels_count; long value_labels_capacity; void *variables; long variables_count; long variables_capacity; } readstat_label_set_t; typedef struct readstat_missingness_s { readstat_value_t missing_ranges[32]; long missing_ranges_count; } readstat_missingness_t; typedef struct readstat_variable_s { readstat_type_t type; int index; char name[300]; char format[256]; char label[1024]; readstat_label_set_t *label_set; off_t offset; size_t storage_width; size_t user_width; readstat_missingness_t missingness; readstat_measure_t measure; readstat_alignment_t alignment; int display_width; int decimals; int skip; int index_after_skipping; } readstat_variable_t; typedef struct readstat_schema_entry_s { uint32_t row; uint32_t col; uint32_t len; int skip; readstat_variable_t variable; char labelset[32]; char decimal_separator; } readstat_schema_entry_t; typedef struct readstat_schema_s { char filename[255]; uint32_t rows_per_observation; uint32_t cols_per_observation; int first_line; int entry_count; char field_delimiter; readstat_schema_entry_t *entries; } readstat_schema_t; /* Value accessors */ readstat_type_t readstat_value_type(readstat_value_t value); readstat_type_class_t readstat_value_type_class(readstat_value_t value); /* Values can be missing in one of three ways: * 1. "System missing", delivered to value handlers as NaN. Occurs in all file * types. The most common kind of missing value. * 2. Tagged missing, also delivered as NaN, but with a single character tag * accessible via readstat_value_tag(). The tag might be 'a', 'b', etc, * corresponding to Stata's .a, .b, values etc. Occurs only in Stata and * SAS files. * 3. Defined missing. The value is a real number but is to be treated as * missing according to the variable's missingness rules (such as "value < 0 || * value == 999"). Occurs only in SPSS files. access the rules via: * * readstat_variable_get_missing_ranges_count() * readstat_variable_get_missing_range_lo() * readstat_variable_get_missing_range_hi() * * Note that "ranges" include individual values where lo == hi. * * readstat_value_is_missing() is equivalent to: * * (readstat_value_is_system_missing() * || readstat_value_is_tagged_missing() * || readstat_value_is_defined_missing()) */ int readstat_value_is_missing(readstat_value_t value, readstat_variable_t *variable); int readstat_value_is_system_missing(readstat_value_t value); int readstat_value_is_tagged_missing(readstat_value_t value); int readstat_value_is_defined_missing(readstat_value_t value, readstat_variable_t *variable); char readstat_value_tag(readstat_value_t value); char readstat_int8_value(readstat_value_t value); int16_t readstat_int16_value(readstat_value_t value); int32_t readstat_int32_value(readstat_value_t value); float readstat_float_value(readstat_value_t value); double readstat_double_value(readstat_value_t value); const char *readstat_string_value(readstat_value_t value); readstat_type_class_t readstat_type_class(readstat_type_t type); /* Accessor methods for use inside variable handlers */ int readstat_variable_get_index(const readstat_variable_t *variable); int readstat_variable_get_index_after_skipping(const readstat_variable_t *variable); const char *readstat_variable_get_name(const readstat_variable_t *variable); const char *readstat_variable_get_label(const readstat_variable_t *variable); const char *readstat_variable_get_format(const readstat_variable_t *variable); readstat_type_t readstat_variable_get_type(const readstat_variable_t *variable); readstat_type_class_t readstat_variable_get_type_class(const readstat_variable_t *variable); size_t readstat_variable_get_storage_width(const readstat_variable_t *variable); int readstat_variable_get_display_width(const readstat_variable_t *variable); readstat_measure_t readstat_variable_get_measure(const readstat_variable_t *variable); readstat_alignment_t readstat_variable_get_alignment(const readstat_variable_t *variable); int readstat_variable_get_missing_ranges_count(const readstat_variable_t *variable); readstat_value_t readstat_variable_get_missing_range_lo(const readstat_variable_t *variable, int i); readstat_value_t readstat_variable_get_missing_range_hi(const readstat_variable_t *variable, int i); /* Callbacks should return 0 (aka READSTAT_HANDLER_OK) on success and 1 (aka READSTAT_HANDLER_ABORT) to abort. */ /* If the variable handler returns READSTAT_HANDLER_SKIP_VARIABLE, the value handler will not be called on * the associated variable. (Note that subsequent variables will retain their original index values.) */ typedef int (*readstat_metadata_handler)(readstat_metadata_t *metadata, void *ctx); typedef int (*readstat_note_handler)(int note_index, const char *note, void *ctx); typedef int (*readstat_variable_handler)(int index, readstat_variable_t *variable, const char *val_labels, void *ctx); typedef int (*readstat_fweight_handler)(readstat_variable_t *variable, void *ctx); typedef int (*readstat_value_handler)(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx); typedef int (*readstat_value_label_handler)(const char *val_labels, readstat_value_t value, const char *label, void *ctx); typedef void (*readstat_error_handler)(const char *error_message, void *ctx); typedef int (*readstat_progress_handler)(double progress, void *ctx); #if defined(_MSC_VER) #include typedef SSIZE_T ssize_t; typedef __int64 readstat_off_t; #elif defined _WIN32 || defined __CYGWIN__ typedef _off64_t readstat_off_t; #elif defined _AIX typedef off64_t readstat_off_t; #else typedef off_t readstat_off_t; #endif typedef enum readstat_io_flags_e { READSTAT_SEEK_SET, READSTAT_SEEK_CUR, READSTAT_SEEK_END } readstat_io_flags_t; typedef int (*readstat_open_handler)(const char *path, void *io_ctx); typedef int (*readstat_close_handler)(void *io_ctx); typedef readstat_off_t (*readstat_seek_handler)(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx); typedef ssize_t (*readstat_read_handler)(void *buf, size_t nbyte, void *io_ctx); typedef readstat_error_t (*readstat_update_handler)(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx); typedef struct readstat_io_s { readstat_open_handler open; readstat_close_handler close; readstat_seek_handler seek; readstat_read_handler read; readstat_update_handler update; void *io_ctx; int io_ctx_needs_free; } readstat_io_t; typedef struct readstat_callbacks_s { readstat_metadata_handler metadata; readstat_note_handler note; readstat_variable_handler variable; readstat_fweight_handler fweight; readstat_value_handler value; readstat_value_label_handler value_label; readstat_error_handler error; readstat_progress_handler progress; } readstat_callbacks_t; typedef struct readstat_parser_s { readstat_callbacks_t handlers; readstat_io_t *io; const char *input_encoding; const char *output_encoding; long row_limit; long row_offset; } readstat_parser_t; readstat_parser_t *readstat_parser_init(void); void readstat_parser_free(readstat_parser_t *parser); void readstat_io_free(readstat_io_t *io); readstat_error_t readstat_set_metadata_handler(readstat_parser_t *parser, readstat_metadata_handler metadata_handler); readstat_error_t readstat_set_note_handler(readstat_parser_t *parser, readstat_note_handler note_handler); readstat_error_t readstat_set_variable_handler(readstat_parser_t *parser, readstat_variable_handler variable_handler); readstat_error_t readstat_set_fweight_handler(readstat_parser_t *parser, readstat_fweight_handler fweight_handler); readstat_error_t readstat_set_value_handler(readstat_parser_t *parser, readstat_value_handler value_handler); readstat_error_t readstat_set_value_label_handler(readstat_parser_t *parser, readstat_value_label_handler value_label_handler); readstat_error_t readstat_set_error_handler(readstat_parser_t *parser, readstat_error_handler error_handler); readstat_error_t readstat_set_progress_handler(readstat_parser_t *parser, readstat_progress_handler progress_handler); readstat_error_t readstat_set_open_handler(readstat_parser_t *parser, readstat_open_handler open_handler); readstat_error_t readstat_set_close_handler(readstat_parser_t *parser, readstat_close_handler close_handler); readstat_error_t readstat_set_seek_handler(readstat_parser_t *parser, readstat_seek_handler seek_handler); readstat_error_t readstat_set_read_handler(readstat_parser_t *parser, readstat_read_handler read_handler); readstat_error_t readstat_set_update_handler(readstat_parser_t *parser, readstat_update_handler update_handler); readstat_error_t readstat_set_io_ctx(readstat_parser_t *parser, void *io_ctx); // Usually inferred from the file, but sometimes a manual override is desirable. // In particular, pre-14 Stata uses the system encoding, which is usually Win 1252 // but could be anything. `encoding' should be an iconv-compatible name. readstat_error_t readstat_set_file_character_encoding(readstat_parser_t *parser, const char *encoding); // Defaults to UTF-8. Pass in NULL to disable transliteration. readstat_error_t readstat_set_handler_character_encoding(readstat_parser_t *parser, const char *encoding); readstat_error_t readstat_set_row_limit(readstat_parser_t *parser, long row_limit); readstat_error_t readstat_set_row_offset(readstat_parser_t *parser, long row_offset); /* Parse binary / portable files */ readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *path, void *user_ctx); readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx); readstat_error_t readstat_parse_por(readstat_parser_t *parser, const char *path, void *user_ctx); readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *path, void *user_ctx); readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx); readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx); /* Parse a schema file... */ readstat_schema_t *readstat_parse_sas_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError); readstat_schema_t *readstat_parse_spss_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError); readstat_schema_t *readstat_parse_stata_dictionary(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError); /* ... then pass the schema to the plain-text parser ... */ readstat_error_t readstat_parse_txt(readstat_parser_t *parser, const char *filename, readstat_schema_t *schema, void *user_ctx); /* ... and free the schema structure */ void readstat_schema_free(readstat_schema_t *schema); /* Internal module callbacks */ typedef struct readstat_string_ref_s { int64_t first_v; int64_t first_o; size_t len; char data[1]; // Flexible array; using [1] for C++98 compatibility } readstat_string_ref_t; typedef size_t (*readstat_variable_width_callback)(readstat_type_t type, size_t user_width); typedef readstat_error_t (*readstat_variable_ok_callback)(const readstat_variable_t *variable); typedef readstat_error_t (*readstat_write_int8_callback)(void *row_data, const readstat_variable_t *variable, int8_t value); typedef readstat_error_t (*readstat_write_int16_callback)(void *row_data, const readstat_variable_t *variable, int16_t value); typedef readstat_error_t (*readstat_write_int32_callback)(void *row_data, const readstat_variable_t *variable, int32_t value); typedef readstat_error_t (*readstat_write_float_callback)(void *row_data, const readstat_variable_t *variable, float value); typedef readstat_error_t (*readstat_write_double_callback)(void *row_data, const readstat_variable_t *variable, double value); typedef readstat_error_t (*readstat_write_string_callback)(void *row_data, const readstat_variable_t *variable, const char *value); typedef readstat_error_t (*readstat_write_string_ref_callback)(void *row_data, const readstat_variable_t *variable, readstat_string_ref_t *ref); typedef readstat_error_t (*readstat_write_missing_callback)(void *row_data, const readstat_variable_t *variable); typedef readstat_error_t (*readstat_write_tagged_callback)(void *row_data, const readstat_variable_t *variable, char tag); typedef readstat_error_t (*readstat_begin_data_callback)(void *writer); typedef readstat_error_t (*readstat_write_row_callback)(void *writer, void *row_data, size_t row_len); typedef readstat_error_t (*readstat_end_data_callback)(void *writer); typedef void (*readstat_module_ctx_free_callback)(void *module_ctx); typedef readstat_error_t (*readstat_metadata_ok_callback)(void *writer); typedef struct readstat_writer_callbacks_s { readstat_variable_width_callback variable_width; readstat_variable_ok_callback variable_ok; readstat_write_int8_callback write_int8; readstat_write_int16_callback write_int16; readstat_write_int32_callback write_int32; readstat_write_float_callback write_float; readstat_write_double_callback write_double; readstat_write_string_callback write_string; readstat_write_string_ref_callback write_string_ref; readstat_write_missing_callback write_missing_string; readstat_write_missing_callback write_missing_number; readstat_write_tagged_callback write_missing_tagged; readstat_begin_data_callback begin_data; readstat_write_row_callback write_row; readstat_end_data_callback end_data; readstat_module_ctx_free_callback module_ctx_free; readstat_metadata_ok_callback metadata_ok; } readstat_writer_callbacks_t; /* You'll need to define one of these to get going. Should return # bytes written, * or -1 on error, a la write(2) */ typedef ssize_t (*readstat_data_writer)(const void *data, size_t len, void *ctx); typedef struct readstat_writer_s { readstat_data_writer data_writer; size_t bytes_written; long version; int is_64bit; // SAS only readstat_compress_t compression; time_t timestamp; readstat_variable_t **variables; long variables_count; long variables_capacity; readstat_label_set_t **label_sets; long label_sets_count; long label_sets_capacity; char **notes; long notes_count; long notes_capacity; readstat_string_ref_t **string_refs; long string_refs_count; long string_refs_capacity; unsigned char *row; size_t row_len; int row_count; int current_row; char file_label[257]; char table_name[33]; const readstat_variable_t *fweight_variable; readstat_writer_callbacks_t callbacks; readstat_error_handler error_handler; void *module_ctx; void *user_ctx; int initialized; } readstat_writer_t; /* Writer API */ // First call this... readstat_writer_t *readstat_writer_init(void); // Then specify a function that will handle the output bytes... readstat_error_t readstat_set_data_writer(readstat_writer_t *writer, readstat_data_writer data_writer); // Next define your value labels, if any. Create as many named sets as you'd like. readstat_label_set_t *readstat_add_label_set(readstat_writer_t *writer, readstat_type_t type, const char *name); void readstat_label_double_value(readstat_label_set_t *label_set, double value, const char *label); void readstat_label_int32_value(readstat_label_set_t *label_set, int32_t value, const char *label); void readstat_label_string_value(readstat_label_set_t *label_set, const char *value, const char *label); void readstat_label_tagged_value(readstat_label_set_t *label_set, char tag, const char *label); // Now define your variables. Note that `storage_width' is used for: // * READSTAT_TYPE_STRING variables in all formats // * READSTAT_TYPE_DOUBLE variables, but only in the SAS XPORT format (valid values 3-8, defaults to 8) readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char *name, readstat_type_t type, size_t storage_width); void readstat_variable_set_label(readstat_variable_t *variable, const char *label); void readstat_variable_set_format(readstat_variable_t *variable, const char *format); void readstat_variable_set_label_set(readstat_variable_t *variable, readstat_label_set_t *label_set); void readstat_variable_set_measure(readstat_variable_t *variable, readstat_measure_t measure); void readstat_variable_set_alignment(readstat_variable_t *variable, readstat_alignment_t alignment); void readstat_variable_set_display_width(readstat_variable_t *variable, int display_width); readstat_error_t readstat_variable_add_missing_double_value(readstat_variable_t *variable, double value); readstat_error_t readstat_variable_add_missing_double_range(readstat_variable_t *variable, double lo, double hi); readstat_error_t readstat_variable_add_missing_string_value(readstat_variable_t *variable, const char *value); readstat_error_t readstat_variable_add_missing_string_range(readstat_variable_t *variable, const char *lo, const char *hi); readstat_variable_t *readstat_get_variable(readstat_writer_t *writer, int index); // "Notes" appear in the file metadata. In SPSS these are stored as // lines in the Document Record; in Stata these are stored using // the "notes" feature. // // Note that the line length in SPSS is 80 characters; ReadStat will // produce a write error if a note is longer than this limit. void readstat_add_note(readstat_writer_t *writer, const char *note); // String refs are used for creating a READSTAT_TYPE_STRING_REF column, // which is only supported in Stata. String references can be shared // across columns, and inserted with readstat_insert_string_ref(). readstat_string_ref_t *readstat_add_string_ref(readstat_writer_t *writer, const char *string); readstat_string_ref_t *readstat_get_string_ref(readstat_writer_t *writer, int index); // Optional metadata readstat_error_t readstat_writer_set_file_label(readstat_writer_t *writer, const char *file_label); readstat_error_t readstat_writer_set_file_timestamp(readstat_writer_t *writer, time_t timestamp); readstat_error_t readstat_writer_set_fweight_variable(readstat_writer_t *writer, const readstat_variable_t *variable); readstat_error_t readstat_writer_set_file_format_version(readstat_writer_t *writer, uint8_t file_format_version); // e.g. 104-119 for DTA; 5 or 8 for SAS Transport. // SAV files support 2 or 3, where 3 is equivalent to setting // readstat_writer_set_compression(READSTAT_COMPRESS_BINARY) readstat_error_t readstat_writer_set_table_name(readstat_writer_t *writer, const char *table_name); // Only used in XPORT files at the moment (defaults to DATASET) readstat_error_t readstat_writer_set_file_format_is_64bit(readstat_writer_t *writer, int is_64bit); // applies only to SAS files; defaults to 1=true readstat_error_t readstat_writer_set_compression(readstat_writer_t *writer, readstat_compress_t compression); // READSTAT_COMPRESS_BINARY is supported only with SAV files (i.e. ZSAV files) // READSTAT_COMPRESS_ROWS is supported only with sas7bdat and SAV files // Optional error handler readstat_error_t readstat_writer_set_error_handler(readstat_writer_t *writer, readstat_error_handler error_handler); // Call one of these at any time before the first invocation of readstat_begin_row readstat_error_t readstat_begin_writing_dta(readstat_writer_t *writer, void *user_ctx, long row_count); readstat_error_t readstat_begin_writing_por(readstat_writer_t *writer, void *user_ctx, long row_count); readstat_error_t readstat_begin_writing_sas7bcat(readstat_writer_t *writer, void *user_ctx); readstat_error_t readstat_begin_writing_sas7bdat(readstat_writer_t *writer, void *user_ctx, long row_count); readstat_error_t readstat_begin_writing_sav(readstat_writer_t *writer, void *user_ctx, long row_count); readstat_error_t readstat_begin_writing_xport(readstat_writer_t *writer, void *user_ctx, long row_count); // Optional, file-specific validation routines, to be called AFTER readstat_begin_writing_XXX readstat_error_t readstat_validate_metadata(readstat_writer_t *writer); readstat_error_t readstat_validate_variable(readstat_writer_t *writer, const readstat_variable_t *variable); // Start a row of data (that is, a case or observation) readstat_error_t readstat_begin_row(readstat_writer_t *writer); // Then call one of these for each variable readstat_error_t readstat_insert_int8_value(readstat_writer_t *writer, const readstat_variable_t *variable, int8_t value); readstat_error_t readstat_insert_int16_value(readstat_writer_t *writer, const readstat_variable_t *variable, int16_t value); readstat_error_t readstat_insert_int32_value(readstat_writer_t *writer, const readstat_variable_t *variable, int32_t value); readstat_error_t readstat_insert_float_value(readstat_writer_t *writer, const readstat_variable_t *variable, float value); readstat_error_t readstat_insert_double_value(readstat_writer_t *writer, const readstat_variable_t *variable, double value); readstat_error_t readstat_insert_string_value(readstat_writer_t *writer, const readstat_variable_t *variable, const char *value); readstat_error_t readstat_insert_string_ref(readstat_writer_t *writer, const readstat_variable_t *variable, readstat_string_ref_t *ref); readstat_error_t readstat_insert_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable); readstat_error_t readstat_insert_tagged_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable, char tag); // Finally, close out the row readstat_error_t readstat_end_row(readstat_writer_t *writer); // Once you've written all the rows, clean up after yourself readstat_error_t readstat_end_writing(readstat_writer_t *writer); void readstat_writer_free(readstat_writer_t *writer); #ifdef __cplusplus } #endif #endif ReadStat-1.1.7/src/readstat_bits.c000066400000000000000000000031651410722155500170210ustar00rootroot00000000000000// // readstat_bits.c - Bit-twiddling utility functions // #include #include #include #include "readstat_bits.h" int machine_is_little_endian() { int test_byte_order = 1; return ((char *)&test_byte_order)[0]; } char ones_to_twos_complement1(char num) { return num < 0 ? num+1 : num; } int16_t ones_to_twos_complement2(int16_t num) { return num < 0 ? num+1 : num; } int32_t ones_to_twos_complement4(int32_t num) { return num < 0 ? num+1 : num; } char twos_to_ones_complement1(char num) { return num < 0 ? num-1 : num; } int16_t twos_to_ones_complement2(int16_t num) { return num < 0 ? num-1 : num; } int32_t twos_to_ones_complement4(int32_t num) { return num < 0 ? num-1 : num; } uint16_t byteswap2(uint16_t num) { return ((num & 0xFF00) >> 8) | ((num & 0x00FF) << 8); } uint32_t byteswap4(uint32_t num) { num = ((num & 0xFFFF0000) >> 16) | ((num & 0x0000FFFF) << 16); return ((num & 0xFF00FF00) >> 8) | ((num & 0x00FF00FF) << 8); } uint64_t byteswap8(uint64_t num) { num = ((num & 0xFFFFFFFF00000000) >> 32) | ((num & 0x00000000FFFFFFFF) << 32); num = ((num & 0xFFFF0000FFFF0000) >> 16) | ((num & 0x0000FFFF0000FFFF) << 16); return ((num & 0xFF00FF00FF00FF00) >> 8) | ((num & 0x00FF00FF00FF00FF) << 8); } float byteswap_float(float num) { uint32_t answer = 0; memcpy(&answer, &num, 4); answer = byteswap4(answer); memcpy(&num, &answer, 4); return num; } double byteswap_double(double num) { uint64_t answer = 0; memcpy(&answer, &num, 8); answer = byteswap8(answer); memcpy(&num, &answer, 8); return num; } ReadStat-1.1.7/src/readstat_bits.h000066400000000000000000000010601410722155500170160ustar00rootroot00000000000000// // readstat_bit.h - Bit-twiddling utility functions // #define READSTAT_MACHINE_IS_TWOS_COMPLEMENT ((char)0xFF == (char)-1) #undef READSTAT_MACHINE_IS_TWOS_COMPLEMENT #define READSTAT_MACHINE_IS_TWOS_COMPLEMENT 0 int machine_is_little_endian(); char ones_to_twos_complement1(char num); int16_t ones_to_twos_complement2(int16_t num); int32_t ones_to_twos_complement4(int32_t num); uint16_t byteswap2(uint16_t num); uint32_t byteswap4(uint32_t num); uint64_t byteswap8(uint64_t num); float byteswap_float(float num); double byteswap_double(double num); ReadStat-1.1.7/src/readstat_convert.c000066400000000000000000000024561410722155500175420ustar00rootroot00000000000000 #include #include "readstat.h" #include "readstat_iconv.h" #include "readstat_convert.h" readstat_error_t readstat_convert(char *dst, size_t dst_len, const char *src, size_t src_len, iconv_t converter) { /* strip off spaces from the input because the programs use ASCII space * padding even with non-ASCII encoding. */ while (src_len && src[src_len-1] == ' ') { src_len--; } if (dst_len == 0) { return READSTAT_ERROR_CONVERT_LONG_STRING; } else if (converter) { size_t dst_left = dst_len - 1; char *dst_end = dst; size_t status = iconv(converter, (readstat_iconv_inbuf_t)&src, &src_len, &dst_end, &dst_left); if (status == (size_t)-1) { if (errno == E2BIG) { return READSTAT_ERROR_CONVERT_LONG_STRING; } else if (errno == EILSEQ) { return READSTAT_ERROR_CONVERT_BAD_STRING; } else if (errno != EINVAL) { /* EINVAL indicates improper truncation; accept it */ return READSTAT_ERROR_CONVERT; } } dst[dst_len - dst_left - 1] = '\0'; } else if (src_len + 1 > dst_len) { return READSTAT_ERROR_CONVERT_LONG_STRING; } else { memcpy(dst, src, src_len); dst[src_len] = '\0'; } return READSTAT_OK; } ReadStat-1.1.7/src/readstat_convert.h000066400000000000000000000001631410722155500175400ustar00rootroot00000000000000 readstat_error_t readstat_convert(char *dst, size_t dst_len, const char *src, size_t src_len, iconv_t converter); ReadStat-1.1.7/src/readstat_error.c000066400000000000000000000121651410722155500172110ustar00rootroot00000000000000 #include "readstat.h" const char *readstat_error_message(readstat_error_t error_code) { if (error_code == READSTAT_OK) return NULL; if (error_code == READSTAT_ERROR_OPEN) return "Unable to open file"; if (error_code == READSTAT_ERROR_READ) return "Unable to read from file"; if (error_code == READSTAT_ERROR_MALLOC) return "Unable to allocate memory"; if (error_code == READSTAT_ERROR_USER_ABORT) return "The parsing was aborted (callback returned non-zero value)"; if (error_code == READSTAT_ERROR_PARSE) return "Invalid file, or file has unsupported features"; if (error_code == READSTAT_ERROR_UNSUPPORTED_COMPRESSION) return "File has unsupported compression scheme"; if (error_code == READSTAT_ERROR_UNSUPPORTED_CHARSET) return "File has an unsupported character set"; if (error_code == READSTAT_ERROR_COLUMN_COUNT_MISMATCH) return "File did not contain the expected number of columns"; if (error_code == READSTAT_ERROR_ROW_COUNT_MISMATCH) return "File did not contain the expected number of rows"; if (error_code == READSTAT_ERROR_ROW_WIDTH_MISMATCH) return "A row in the file was not the expected length"; if (error_code == READSTAT_ERROR_BAD_FORMAT_STRING) return "A provided format string could not be understood"; if (error_code == READSTAT_ERROR_VALUE_TYPE_MISMATCH) return "A provided value was incompatible with the variable's declared type"; if (error_code == READSTAT_ERROR_WRITE) return "Unable to write data"; if (error_code == READSTAT_ERROR_WRITER_NOT_INITIALIZED) return "The writer object was not properly initialized (call and check return value of readstat_begin_writing_XXX)"; if (error_code == READSTAT_ERROR_SEEK) return "Unable to seek within file"; if (error_code == READSTAT_ERROR_CONVERT) return "Unable to convert string to the requested encoding"; if (error_code == READSTAT_ERROR_CONVERT_BAD_STRING) return "Unable to convert string to the requested encoding (invalid byte sequence)"; if (error_code == READSTAT_ERROR_CONVERT_SHORT_STRING) return "Unable to convert string to the requested encoding (incomplete byte sequence)"; if (error_code == READSTAT_ERROR_CONVERT_LONG_STRING) return "Unable to convert string to the requested encoding (output buffer too small)"; if (error_code == READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE) return "A provided numeric value was outside the range of representable values in the specified file format"; if (error_code == READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE) return "A provided tag value was outside the range of allowed values in the specified file format"; if (error_code == READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG) return "A provided string value was longer than the available storage size of the specified column"; if (error_code == READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED) return "The file format does not supported character tags for missing values"; if (error_code == READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION) return "This version of the file format is not supported"; if (error_code == READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER) return "A provided name begins with an illegal character"; if (error_code == READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER) return "A provided name contains an illegal character"; if (error_code == READSTAT_ERROR_NAME_IS_RESERVED_WORD) return "A provided name is a reserved word"; if (error_code == READSTAT_ERROR_NAME_IS_TOO_LONG) return "A provided name is too long for the file format"; if (error_code == READSTAT_ERROR_NAME_IS_ZERO_LENGTH) return "A provided name is blank or empty"; if (error_code == READSTAT_ERROR_BAD_TIMESTAMP_STRING) return "The file's timestamp string is invalid"; if (error_code == READSTAT_ERROR_BAD_FREQUENCY_WEIGHT) return "The provided variable can't be used as a frequency weight"; if (error_code == READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS) return "The number of defined missing values exceeds the format limit"; if (error_code == READSTAT_ERROR_NOTE_IS_TOO_LONG) return "The provided note is too long for the file format"; if (error_code == READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED) return "This version of the file format does not support string references"; if (error_code == READSTAT_ERROR_STRING_REF_IS_REQUIRED) return "The provided value was not a valid string reference"; if (error_code == READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE) return "A row of data will not fit into the file format"; if (error_code == READSTAT_ERROR_TOO_FEW_COLUMNS) return "One or more columns must be provided"; if (error_code == READSTAT_ERROR_TOO_MANY_COLUMNS) return "Too many columns for this file format version"; if (error_code == READSTAT_ERROR_BAD_TIMESTAMP_VALUE) return "The provided file timestamp is invalid"; return "Unknown error"; } ReadStat-1.1.7/src/readstat_iconv.h000066400000000000000000000006171410722155500172020ustar00rootroot00000000000000#include /* ICONV_CONST defined by autotools during configure according * to the current platform. Some people copy-paste the source code, so * provide some fallback logic */ #ifndef ICONV_CONST #define ICONV_CONST #endif typedef ICONV_CONST char ** readstat_iconv_inbuf_t; typedef struct readstat_charset_entry_s { int code; char name[32]; } readstat_charset_entry_t; ReadStat-1.1.7/src/readstat_io_unistd.c000066400000000000000000000070441410722155500200550ustar00rootroot00000000000000 #include #include #include #if defined _WIN32 # include # include #endif #if !defined(_MSC_VER) # include #else #define open _open #define read _read #define close _close #endif #if defined _WIN32 || defined __CYGWIN__ #define UNISTD_OPEN_OPTIONS O_RDONLY | O_BINARY #elif defined _AIX #define UNISTD_OPEN_OPTIONS O_RDONLY | O_LARGEFILE #else #define UNISTD_OPEN_OPTIONS O_RDONLY #endif #if defined _WIN32 #define lseek _lseeki64 #elif defined _AIX #define lseek lseek64 #endif #include "readstat.h" #include "readstat_io_unistd.h" int open_with_unicode(const char *path, int options) { #if defined _WIN32 const int buffer_size = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0); if(buffer_size <= 0) return -1; wchar_t* wpath = malloc((buffer_size + 1) * sizeof(wchar_t)); const int res = MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, buffer_size); wpath[buffer_size] = 0; if(res <= 0) { free(wpath); return -1; } int fd = _wopen(wpath, options); free(wpath); return fd; #else return open(path, options); #endif } int unistd_open_handler(const char *path, void *io_ctx) { int fd = open_with_unicode(path, UNISTD_OPEN_OPTIONS); ((unistd_io_ctx_t*) io_ctx)->fd = fd; return fd; } int unistd_close_handler(void *io_ctx) { int fd = ((unistd_io_ctx_t*) io_ctx)->fd; if (fd != -1) return close(fd); else return 0; } readstat_off_t unistd_seek_handler(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx) { int flag = 0; switch(whence) { case READSTAT_SEEK_SET: flag = SEEK_SET; break; case READSTAT_SEEK_CUR: flag = SEEK_CUR; break; case READSTAT_SEEK_END: flag = SEEK_END; break; default: return -1; } int fd = ((unistd_io_ctx_t*) io_ctx)->fd; return lseek(fd, offset, flag); } ssize_t unistd_read_handler(void *buf, size_t nbyte, void *io_ctx) { int fd = ((unistd_io_ctx_t*) io_ctx)->fd; ssize_t out = read(fd, buf, nbyte); return out; } readstat_error_t unistd_update_handler(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx) { if (!progress_handler) return READSTAT_OK; int fd = ((unistd_io_ctx_t*) io_ctx)->fd; readstat_off_t current_offset = lseek(fd, 0, SEEK_CUR); if (current_offset == -1) return READSTAT_ERROR_SEEK; if (progress_handler(1.0 * current_offset / file_size, user_ctx)) return READSTAT_ERROR_USER_ABORT; return READSTAT_OK; } readstat_error_t unistd_io_init(readstat_parser_t *parser) { readstat_error_t retval = READSTAT_OK; unistd_io_ctx_t *io_ctx = NULL; if ((retval = readstat_set_open_handler(parser, unistd_open_handler)) != READSTAT_OK) return retval; if ((retval = readstat_set_close_handler(parser, unistd_close_handler)) != READSTAT_OK) return retval; if ((retval = readstat_set_seek_handler(parser, unistd_seek_handler)) != READSTAT_OK) return retval; if ((retval = readstat_set_read_handler(parser, unistd_read_handler)) != READSTAT_OK) return retval; if ((readstat_set_update_handler(parser, unistd_update_handler)) != READSTAT_OK) return retval; io_ctx = calloc(1, sizeof(unistd_io_ctx_t)); io_ctx->fd = -1; retval = readstat_set_io_ctx(parser, (void*) io_ctx); parser->io->io_ctx_needs_free = 1; return retval; } ReadStat-1.1.7/src/readstat_io_unistd.h000066400000000000000000000010311410722155500200500ustar00rootroot00000000000000 typedef struct unistd_io_ctx_s { int fd; } unistd_io_ctx_t; int unistd_open_handler(const char *path, void *io_ctx); int unistd_close_handler(void *io_ctx); readstat_off_t unistd_seek_handler(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx); ssize_t unistd_read_handler(void *buf, size_t nbytes, void *io_ctx); readstat_error_t unistd_update_handler(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx); readstat_error_t unistd_io_init(readstat_parser_t *parser); ReadStat-1.1.7/src/readstat_malloc.c000066400000000000000000000015621410722155500173260ustar00rootroot00000000000000#include #define MAX_MALLOC_SIZE 0xFFF000 /* ~16 MB. Needs to be at least 0x3FF00, i.e. the default ~4MB block size used * in compressed SPSS (ZSAV) files. The purpose here is to prevent massive * allocations in the event of a malformed file or a bug in the library. */ void *readstat_malloc(size_t len) { if (len > MAX_MALLOC_SIZE || len == 0) { return NULL; } return malloc(len); } void *readstat_calloc(size_t count, size_t size) { if (count > MAX_MALLOC_SIZE || size > MAX_MALLOC_SIZE || count * size > MAX_MALLOC_SIZE) { return NULL; } if (count == 0 || size == 0) { return NULL; } return calloc(count, size); } void *readstat_realloc(void *ptr, size_t len) { if (len > MAX_MALLOC_SIZE || len == 0) { if (ptr) free(ptr); return NULL; } return realloc(ptr, len); } ReadStat-1.1.7/src/readstat_malloc.h000066400000000000000000000002061410722155500173250ustar00rootroot00000000000000 void *readstat_malloc(size_t size); void *readstat_calloc(size_t count, size_t size); void *readstat_realloc(void *ptr, size_t len); ReadStat-1.1.7/src/readstat_metadata.c000066400000000000000000000022461410722155500176370ustar00rootroot00000000000000#include "readstat.h" int readstat_get_row_count(readstat_metadata_t *metadata) { return metadata->row_count; } int readstat_get_var_count(readstat_metadata_t *metadata) { return metadata->var_count; } time_t readstat_get_creation_time(readstat_metadata_t *metadata) { return metadata->creation_time; } time_t readstat_get_modified_time(readstat_metadata_t *metadata) { return metadata->modified_time; } int readstat_get_file_format_version(readstat_metadata_t *metadata) { return metadata->file_format_version; } int readstat_get_file_format_is_64bit(readstat_metadata_t *metadata) { return metadata->is64bit; } readstat_compress_t readstat_get_compression(readstat_metadata_t *metadata) { return metadata->compression; } readstat_endian_t readstat_get_endianness(readstat_metadata_t *metadata) { return metadata->endianness; } const char *readstat_get_file_label(readstat_metadata_t *metadata) { return metadata->file_label; } const char *readstat_get_file_encoding(readstat_metadata_t *metadata) { return metadata->file_encoding; } const char *readstat_get_table_name(readstat_metadata_t *metadata) { return metadata->table_name; } ReadStat-1.1.7/src/readstat_parser.c000066400000000000000000000075411410722155500173560ustar00rootroot00000000000000 #include #include "readstat.h" #include "readstat_io_unistd.h" readstat_parser_t *readstat_parser_init() { readstat_parser_t *parser = calloc(1, sizeof(readstat_parser_t)); parser->io = calloc(1, sizeof(readstat_io_t)); if (unistd_io_init(parser) != READSTAT_OK) { readstat_parser_free(parser); return NULL; } parser->output_encoding = "UTF-8"; return parser; } void readstat_parser_free(readstat_parser_t *parser) { if (parser) { if (parser->io) { readstat_set_io_ctx(parser, NULL); free(parser->io); } free(parser); } } readstat_error_t readstat_set_metadata_handler(readstat_parser_t *parser, readstat_metadata_handler metadata_handler) { parser->handlers.metadata = metadata_handler; return READSTAT_OK; } readstat_error_t readstat_set_note_handler(readstat_parser_t *parser, readstat_note_handler note_handler) { parser->handlers.note = note_handler; return READSTAT_OK; } readstat_error_t readstat_set_variable_handler(readstat_parser_t *parser, readstat_variable_handler variable_handler) { parser->handlers.variable = variable_handler; return READSTAT_OK; } readstat_error_t readstat_set_value_handler(readstat_parser_t *parser, readstat_value_handler value_handler) { parser->handlers.value = value_handler; return READSTAT_OK; } readstat_error_t readstat_set_value_label_handler(readstat_parser_t *parser, readstat_value_label_handler label_handler) { parser->handlers.value_label = label_handler; return READSTAT_OK; } readstat_error_t readstat_set_error_handler(readstat_parser_t *parser, readstat_error_handler error_handler) { parser->handlers.error = error_handler; return READSTAT_OK; } readstat_error_t readstat_set_progress_handler(readstat_parser_t *parser, readstat_progress_handler progress_handler) { parser->handlers.progress = progress_handler; return READSTAT_OK; } readstat_error_t readstat_set_fweight_handler(readstat_parser_t *parser, readstat_fweight_handler fweight_handler) { parser->handlers.fweight = fweight_handler; return READSTAT_OK; } readstat_error_t readstat_set_open_handler(readstat_parser_t *parser, readstat_open_handler open_handler) { parser->io->open = open_handler; return READSTAT_OK; } readstat_error_t readstat_set_close_handler(readstat_parser_t *parser, readstat_close_handler close_handler) { parser->io->close = close_handler; return READSTAT_OK; } readstat_error_t readstat_set_seek_handler(readstat_parser_t *parser, readstat_seek_handler seek_handler) { parser->io->seek = seek_handler; return READSTAT_OK; } readstat_error_t readstat_set_read_handler(readstat_parser_t *parser, readstat_read_handler read_handler) { parser->io->read = read_handler; return READSTAT_OK; } readstat_error_t readstat_set_update_handler(readstat_parser_t *parser, readstat_update_handler update_handler) { parser->io->update = update_handler; return READSTAT_OK; } readstat_error_t readstat_set_io_ctx(readstat_parser_t *parser, void *io_ctx) { if (parser->io->io_ctx_needs_free) { free(parser->io->io_ctx); } parser->io->io_ctx = io_ctx; parser->io->io_ctx_needs_free = 0; return READSTAT_OK; } readstat_error_t readstat_set_file_character_encoding(readstat_parser_t *parser, const char *encoding) { parser->input_encoding = encoding; return READSTAT_OK; } readstat_error_t readstat_set_handler_character_encoding(readstat_parser_t *parser, const char *encoding) { parser->output_encoding = encoding; return READSTAT_OK; } readstat_error_t readstat_set_row_limit(readstat_parser_t *parser, long row_limit) { parser->row_limit = row_limit; return READSTAT_OK; } readstat_error_t readstat_set_row_offset(readstat_parser_t *parser, long row_offset) { parser->row_offset = row_offset; return READSTAT_OK; } ReadStat-1.1.7/src/readstat_strings.h000066400000000000000000000001731410722155500175520ustar00rootroot00000000000000#if defined(_MSC_VER) # define strncasecmp _strnicmp # define strcasecmp _stricmp #else # include #endif ReadStat-1.1.7/src/readstat_value.c000066400000000000000000000127031410722155500171720ustar00rootroot00000000000000 #include "readstat.h" readstat_type_class_t readstat_type_class(readstat_type_t type) { if (type == READSTAT_TYPE_STRING || type == READSTAT_TYPE_STRING_REF) return READSTAT_TYPE_CLASS_STRING; return READSTAT_TYPE_CLASS_NUMERIC; } readstat_type_t readstat_value_type(readstat_value_t value) { return value.type; } readstat_type_class_t readstat_value_type_class(readstat_value_t value) { return readstat_type_class(value.type); } char readstat_value_tag(readstat_value_t value) { return value.tag; } int readstat_value_is_missing(readstat_value_t value, readstat_variable_t *variable) { if (value.is_system_missing || value.is_tagged_missing) return 1; if (variable) return readstat_value_is_defined_missing(value, variable); return 0; } int readstat_value_is_system_missing(readstat_value_t value) { return (value.is_system_missing); } int readstat_value_is_tagged_missing(readstat_value_t value) { return (value.is_tagged_missing); } static int readstat_double_is_defined_missing(double fp_value, readstat_variable_t *variable) { int count = readstat_variable_get_missing_ranges_count(variable); int i; for (i=0; i= lo && fp_value <= hi) { return 1; } } return 0; } static int readstat_string_is_defined_missing(const char *string, readstat_variable_t *variable) { if (string == NULL) return 0; int count = readstat_variable_get_missing_ranges_count(variable); int i; for (i=0; i= 0 && strcmp(string, hi) <= 0) { return 1; } } return 0; } int readstat_value_is_defined_missing(readstat_value_t value, readstat_variable_t *variable) { if (readstat_value_type_class(value) != readstat_variable_get_type_class(variable)) return 0; if (readstat_value_type_class(value) == READSTAT_TYPE_CLASS_STRING) return readstat_string_is_defined_missing(readstat_string_value(value), variable); if (readstat_value_type_class(value) == READSTAT_TYPE_CLASS_NUMERIC) return readstat_double_is_defined_missing(readstat_double_value(value), variable); return 0; } char readstat_int8_value(readstat_value_t value) { if (readstat_value_is_system_missing(value)) return 0; if (value.type == READSTAT_TYPE_DOUBLE) return (char)value.v.double_value; if (value.type == READSTAT_TYPE_FLOAT) return (char)value.v.float_value; if (value.type == READSTAT_TYPE_INT32) return (char)value.v.i32_value; if (value.type == READSTAT_TYPE_INT16) return (char)value.v.i16_value; if (value.type == READSTAT_TYPE_INT8) return value.v.i8_value; return 0; } int16_t readstat_int16_value(readstat_value_t value) { if (readstat_value_is_system_missing(value)) return 0; if (value.type == READSTAT_TYPE_DOUBLE) return (int16_t)value.v.double_value; if (value.type == READSTAT_TYPE_FLOAT) return (int16_t)value.v.float_value; if (value.type == READSTAT_TYPE_INT32) return (int16_t)value.v.i32_value; if (value.type == READSTAT_TYPE_INT16) return value.v.i16_value; if (value.type == READSTAT_TYPE_INT8) return value.v.i8_value; return 0; } int32_t readstat_int32_value(readstat_value_t value) { if (readstat_value_is_system_missing(value)) return 0; if (value.type == READSTAT_TYPE_DOUBLE) return (int32_t)value.v.double_value; if (value.type == READSTAT_TYPE_FLOAT) return (int32_t)value.v.float_value; if (value.type == READSTAT_TYPE_INT32) return value.v.i32_value; if (value.type == READSTAT_TYPE_INT16) return value.v.i16_value; if (value.type == READSTAT_TYPE_INT8) return value.v.i8_value; return 0; } float readstat_float_value(readstat_value_t value) { if (readstat_value_is_system_missing(value)) return NAN; if (value.type == READSTAT_TYPE_DOUBLE) return (float)value.v.double_value; if (value.type == READSTAT_TYPE_FLOAT) return value.v.float_value; if (value.type == READSTAT_TYPE_INT32) return value.v.i32_value; if (value.type == READSTAT_TYPE_INT16) return value.v.i16_value; if (value.type == READSTAT_TYPE_INT8) return value.v.i8_value; return value.v.float_value; } double readstat_double_value(readstat_value_t value) { if (readstat_value_is_system_missing(value)) return NAN; if (value.type == READSTAT_TYPE_DOUBLE) return value.v.double_value; if (value.type == READSTAT_TYPE_FLOAT) return value.v.float_value; if (value.type == READSTAT_TYPE_INT32) return value.v.i32_value; if (value.type == READSTAT_TYPE_INT16) return value.v.i16_value; if (value.type == READSTAT_TYPE_INT8) return value.v.i8_value; return NAN; } const char *readstat_string_value(readstat_value_t value) { if (readstat_value_type(value) == READSTAT_TYPE_STRING) return value.v.string_value; return NULL; } ReadStat-1.1.7/src/readstat_variable.c000066400000000000000000000106751410722155500176510ustar00rootroot00000000000000 #include #include "readstat.h" static readstat_value_t make_blank_value(); static readstat_value_t make_double_value(double dval); static readstat_value_t make_blank_value() { readstat_value_t value = { .is_system_missing = 1, .v = { .double_value = NAN }, .type = READSTAT_TYPE_DOUBLE }; return value; } static readstat_value_t make_double_value(double dval) { readstat_value_t value = { .v = { .double_value = dval }, .type = READSTAT_TYPE_DOUBLE }; return value; } static readstat_value_t make_string_value(const char *string) { readstat_value_t value = { .v = { .string_value = string }, .type = READSTAT_TYPE_STRING }; return value; } const char *readstat_variable_get_name(const readstat_variable_t *variable) { if (variable->name[0]) return variable->name; return NULL; } const char *readstat_variable_get_label(const readstat_variable_t *variable) { if (variable->label[0]) return variable->label; return NULL; } const char *readstat_variable_get_format(const readstat_variable_t *variable) { if (variable->format[0]) return variable->format; return NULL; } readstat_type_t readstat_variable_get_type(const readstat_variable_t *variable) { return variable->type; } readstat_type_class_t readstat_variable_get_type_class(const readstat_variable_t *variable) { return readstat_type_class(variable->type); } int readstat_variable_get_index(const readstat_variable_t *variable) { return variable->index; } int readstat_variable_get_index_after_skipping(const readstat_variable_t *variable) { return variable->index_after_skipping; } size_t readstat_variable_get_storage_width(const readstat_variable_t *variable) { return variable->storage_width; } readstat_measure_t readstat_variable_get_measure(const readstat_variable_t *variable) { return variable->measure; } readstat_alignment_t readstat_variable_get_alignment(const readstat_variable_t *variable) { return variable->alignment; } int readstat_variable_get_display_width(const readstat_variable_t *variable) { return variable->display_width; } int readstat_variable_get_missing_ranges_count(const readstat_variable_t *variable) { return variable->missingness.missing_ranges_count; } readstat_value_t readstat_variable_get_missing_range_lo(const readstat_variable_t *variable, int i) { if (i < variable->missingness.missing_ranges_count && 2*i+1 < sizeof(variable->missingness.missing_ranges)/sizeof(variable->missingness.missing_ranges[0])) { return variable->missingness.missing_ranges[2*i]; } return make_blank_value(); } readstat_value_t readstat_variable_get_missing_range_hi(const readstat_variable_t *variable, int i) { if (i < variable->missingness.missing_ranges_count && 2*i+1 < sizeof(variable->missingness.missing_ranges)/sizeof(variable->missingness.missing_ranges[0])) { return variable->missingness.missing_ranges[2*i+1]; } return make_blank_value(); } static readstat_error_t readstat_variable_add_missing_value_range(readstat_variable_t *variable, readstat_value_t lo, readstat_value_t hi) { int i = readstat_variable_get_missing_ranges_count(variable); if (2*i < sizeof(variable->missingness.missing_ranges)/sizeof(variable->missingness.missing_ranges[0])) { variable->missingness.missing_ranges[2*i] = lo; variable->missingness.missing_ranges[2*i+1] = hi; variable->missingness.missing_ranges_count++; return READSTAT_OK; } return READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS; } readstat_error_t readstat_variable_add_missing_double_range(readstat_variable_t *variable, double lo, double hi) { return readstat_variable_add_missing_value_range(variable, make_double_value(lo), make_double_value(hi)); } readstat_error_t readstat_variable_add_missing_double_value(readstat_variable_t *variable, double value) { return readstat_variable_add_missing_value_range(variable, make_double_value(value), make_double_value(value)); } readstat_error_t readstat_variable_add_missing_string_range(readstat_variable_t *variable, const char *lo, const char *hi) { return readstat_variable_add_missing_value_range(variable, make_string_value(lo), make_string_value(hi)); } readstat_error_t readstat_variable_add_missing_string_value(readstat_variable_t *variable, const char *value) { return readstat_variable_add_missing_value_range(variable, make_string_value(value), make_string_value(value)); } ReadStat-1.1.7/src/readstat_writer.c000066400000000000000000000606651410722155500174040ustar00rootroot00000000000000 #include #include #include "readstat.h" #include "readstat_writer.h" #define VARIABLES_INITIAL_CAPACITY 50 #define LABEL_SETS_INITIAL_CAPACITY 50 #define NOTES_INITIAL_CAPACITY 50 #define VALUE_LABELS_INITIAL_CAPACITY 10 #define STRING_REFS_INITIAL_CAPACITY 100 #define LABEL_SET_VARIABLES_INITIAL_CAPACITY 2 static readstat_error_t readstat_write_row_default_callback(void *writer_ctx, void *bytes, size_t len) { return readstat_write_bytes((readstat_writer_t *)writer_ctx, bytes, len); } static int readstat_compare_string_refs(const void *elem1, const void *elem2) { readstat_string_ref_t *ref1 = *(readstat_string_ref_t **)elem1; readstat_string_ref_t *ref2 = *(readstat_string_ref_t **)elem2; if (ref1->first_o == ref2->first_o) return ref1->first_v - ref2->first_v; return ref1->first_o - ref2->first_o; } readstat_string_ref_t *readstat_string_ref_init(const char *string) { size_t len = strlen(string) + 1; readstat_string_ref_t *ref = calloc(1, sizeof(readstat_string_ref_t) + len); ref->first_o = -1; ref->first_v = -1; ref->len = len; memcpy(&ref->data[0], string, len); return ref; } readstat_writer_t *readstat_writer_init() { readstat_writer_t *writer = calloc(1, sizeof(readstat_writer_t)); writer->variables = calloc(VARIABLES_INITIAL_CAPACITY, sizeof(readstat_variable_t *)); writer->variables_capacity = VARIABLES_INITIAL_CAPACITY; writer->label_sets = calloc(LABEL_SETS_INITIAL_CAPACITY, sizeof(readstat_label_set_t *)); writer->label_sets_capacity = LABEL_SETS_INITIAL_CAPACITY; writer->notes = calloc(NOTES_INITIAL_CAPACITY, sizeof(char *)); writer->notes_capacity = NOTES_INITIAL_CAPACITY; writer->string_refs = calloc(STRING_REFS_INITIAL_CAPACITY, sizeof(readstat_string_ref_t *)); writer->string_refs_capacity = STRING_REFS_INITIAL_CAPACITY; writer->timestamp = time(NULL); writer->is_64bit = 1; writer->callbacks.write_row = &readstat_write_row_default_callback; return writer; } static void readstat_variable_free(readstat_variable_t *variable) { free(variable); } static void readstat_label_set_free(readstat_label_set_t *label_set) { int i; for (i=0; ivalue_labels_count; i++) { readstat_value_label_t *value_label = readstat_get_value_label(label_set, i); if (value_label->label) free(value_label->label); if (value_label->string_key) free(value_label->string_key); } free(label_set->value_labels); free(label_set->variables); free(label_set); } static void readstat_copy_label(readstat_value_label_t *value_label, const char *label) { if (label && strlen(label)) { value_label->label_len = strlen(label); value_label->label = malloc(value_label->label_len); memcpy(value_label->label, label, value_label->label_len); } } static readstat_value_label_t *readstat_add_value_label(readstat_label_set_t *label_set, const char *label) { if (label_set->value_labels_count == label_set->value_labels_capacity) { label_set->value_labels_capacity *= 2; label_set->value_labels = realloc(label_set->value_labels, label_set->value_labels_capacity * sizeof(readstat_value_label_t)); } readstat_value_label_t *new_value_label = &label_set->value_labels[label_set->value_labels_count++]; memset(new_value_label, 0, sizeof(readstat_value_label_t)); readstat_copy_label(new_value_label, label); return new_value_label; } readstat_error_t readstat_validate_variable(readstat_writer_t *writer, const readstat_variable_t *variable) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (writer->callbacks.variable_ok) return writer->callbacks.variable_ok(variable); return READSTAT_OK; } readstat_error_t readstat_validate_metadata(readstat_writer_t *writer) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (writer->callbacks.metadata_ok) return writer->callbacks.metadata_ok(writer); return READSTAT_OK; } static readstat_error_t readstat_begin_writing_data(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; size_t row_len = 0; int i; retval = readstat_validate_metadata(writer); if (retval != READSTAT_OK) goto cleanup; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); variable->storage_width = writer->callbacks.variable_width(variable->type, variable->user_width); variable->offset = row_len; row_len += variable->storage_width; } if (writer->callbacks.variable_ok) { for (i=0; ivariables_count; i++) { retval = readstat_validate_variable(writer, readstat_get_variable(writer, i)); if (retval != READSTAT_OK) goto cleanup; } } writer->row_len = row_len; writer->row = malloc(writer->row_len); if (writer->callbacks.begin_data) { retval = writer->callbacks.begin_data(writer); } cleanup: return retval; } void readstat_writer_free(readstat_writer_t *writer) { int i; if (writer) { if (writer->callbacks.module_ctx_free && writer->module_ctx) { writer->callbacks.module_ctx_free(writer->module_ctx); } if (writer->variables) { for (i=0; ivariables_count; i++) { readstat_variable_free(writer->variables[i]); } free(writer->variables); } if (writer->label_sets) { for (i=0; ilabel_sets_count; i++) { readstat_label_set_free(writer->label_sets[i]); } free(writer->label_sets); } if (writer->notes) { for (i=0; inotes_count; i++) { free(writer->notes[i]); } free(writer->notes); } if (writer->string_refs) { for (i=0; istring_refs_count; i++) { free(writer->string_refs[i]); } free(writer->string_refs); } if (writer->row) { free(writer->row); } free(writer); } } readstat_error_t readstat_set_data_writer(readstat_writer_t *writer, readstat_data_writer data_writer) { writer->data_writer = data_writer; return READSTAT_OK; } readstat_error_t readstat_write_bytes(readstat_writer_t *writer, const void *bytes, size_t len) { size_t bytes_written = writer->data_writer(bytes, len, writer->user_ctx); if (bytes_written < len) { return READSTAT_ERROR_WRITE; } writer->bytes_written += bytes_written; return READSTAT_OK; } readstat_error_t readstat_write_bytes_as_lines(readstat_writer_t *writer, const void *bytes, size_t len, size_t line_len, const char *line_sep) { size_t line_sep_len = strlen(line_sep); readstat_error_t retval = READSTAT_OK; size_t bytes_written = 0; while (bytes_written < len) { ssize_t bytes_left_in_line = line_len - (writer->bytes_written % (line_len + line_sep_len)); if (len - bytes_written < bytes_left_in_line) { retval = readstat_write_bytes(writer, ((const char *)bytes) + bytes_written, len - bytes_written); bytes_written = len; } else { retval = readstat_write_bytes(writer, ((const char *)bytes) + bytes_written, bytes_left_in_line); bytes_written += bytes_left_in_line; } if (retval != READSTAT_OK) break; if (writer->bytes_written % (line_len + line_sep_len) == line_len) { if ((retval = readstat_write_bytes(writer, line_sep, line_sep_len)) != READSTAT_OK) break; } } return retval; } readstat_error_t readstat_write_line_padding(readstat_writer_t *writer, char pad, size_t line_len, const char *line_sep) { size_t line_sep_len = strlen(line_sep); if (writer->bytes_written % (line_len + line_sep_len) == 0) return READSTAT_OK; readstat_error_t error = READSTAT_OK; ssize_t bytes_left_in_line = line_len - (writer->bytes_written % (line_len + line_sep_len)); char *bytes = malloc(bytes_left_in_line); memset(bytes, pad, bytes_left_in_line); if ((error = readstat_write_bytes(writer, bytes, bytes_left_in_line)) != READSTAT_OK) goto cleanup; if ((error = readstat_write_bytes(writer, line_sep, line_sep_len)) != READSTAT_OK) goto cleanup; cleanup: if (bytes) free(bytes); return READSTAT_OK; } readstat_error_t readstat_write_string(readstat_writer_t *writer, const char *bytes) { return readstat_write_bytes(writer, bytes, strlen(bytes)); } static readstat_error_t readstat_write_repeated_byte(readstat_writer_t *writer, char byte, size_t len) { if (len == 0) return READSTAT_OK; char *zeros = malloc(len); memset(zeros, byte, len); readstat_error_t error = readstat_write_bytes(writer, zeros, len); free(zeros); return error; } readstat_error_t readstat_write_zeros(readstat_writer_t *writer, size_t len) { return readstat_write_repeated_byte(writer, '\0', len); } readstat_error_t readstat_write_spaces(readstat_writer_t *writer, size_t len) { return readstat_write_repeated_byte(writer, ' ', len); } readstat_error_t readstat_write_space_padded_string(readstat_writer_t *writer, const char *string, size_t max_len) { readstat_error_t retval = READSTAT_OK; if (string == NULL || string[0] == '\0') return readstat_write_spaces(writer, max_len); size_t len = strlen(string); if (len > max_len) len = max_len; if ((retval = readstat_write_bytes(writer, string, len)) != READSTAT_OK) return retval; return readstat_write_spaces(writer, max_len - len); } readstat_label_set_t *readstat_add_label_set(readstat_writer_t *writer, readstat_type_t type, const char *name) { if (writer->label_sets_count == writer->label_sets_capacity) { writer->label_sets_capacity *= 2; writer->label_sets = realloc(writer->label_sets, writer->label_sets_capacity * sizeof(readstat_label_set_t *)); } readstat_label_set_t *new_label_set = calloc(1, sizeof(readstat_label_set_t)); writer->label_sets[writer->label_sets_count++] = new_label_set; new_label_set->type = type; snprintf(new_label_set->name, sizeof(new_label_set->name), "%s", name); new_label_set->value_labels = calloc(VALUE_LABELS_INITIAL_CAPACITY, sizeof(readstat_value_label_t)); new_label_set->value_labels_capacity = VALUE_LABELS_INITIAL_CAPACITY; new_label_set->variables = calloc(LABEL_SET_VARIABLES_INITIAL_CAPACITY, sizeof(readstat_variable_t *)); new_label_set->variables_capacity = LABEL_SET_VARIABLES_INITIAL_CAPACITY; return new_label_set; } readstat_label_set_t *readstat_get_label_set(readstat_writer_t *writer, int index) { if (index < writer->label_sets_count) { return writer->label_sets[index]; } return NULL; } void readstat_sort_label_set(readstat_label_set_t *label_set, int (*compare)(const readstat_value_label_t *, const readstat_value_label_t *)) { qsort(label_set->value_labels, label_set->value_labels_count, sizeof(readstat_value_label_t), (int (*)(const void *, const void *))compare); } readstat_value_label_t *readstat_get_value_label(readstat_label_set_t *label_set, int index) { if (index < label_set->value_labels_count) { return &label_set->value_labels[index]; } return NULL; } readstat_variable_t *readstat_get_label_set_variable(readstat_label_set_t *label_set, int index) { if (index < label_set->variables_count) { return ((readstat_variable_t **)label_set->variables)[index]; } return NULL; } void readstat_label_double_value(readstat_label_set_t *label_set, double value, const char *label) { readstat_value_label_t *new_value_label = readstat_add_value_label(label_set, label); new_value_label->double_key = value; new_value_label->int32_key = value; } void readstat_label_int32_value(readstat_label_set_t *label_set, int32_t value, const char *label) { readstat_value_label_t *new_value_label = readstat_add_value_label(label_set, label); new_value_label->double_key = value; new_value_label->int32_key = value; } void readstat_label_string_value(readstat_label_set_t *label_set, const char *value, const char *label) { readstat_value_label_t *new_value_label = readstat_add_value_label(label_set, label); if (value && strlen(value)) { new_value_label->string_key_len = strlen(value); new_value_label->string_key = malloc(new_value_label->string_key_len); memcpy(new_value_label->string_key, value, new_value_label->string_key_len); } } void readstat_label_tagged_value(readstat_label_set_t *label_set, char tag, const char *label) { readstat_value_label_t *new_value_label = readstat_add_value_label(label_set, label); new_value_label->tag = tag; } readstat_variable_t *readstat_add_variable(readstat_writer_t *writer, const char *name, readstat_type_t type, size_t width) { if (writer->variables_count == writer->variables_capacity) { writer->variables_capacity *= 2; writer->variables = realloc(writer->variables, writer->variables_capacity * sizeof(readstat_variable_t *)); } readstat_variable_t *new_variable = calloc(1, sizeof(readstat_variable_t)); new_variable->index = writer->variables_count++; writer->variables[new_variable->index] = new_variable; new_variable->user_width = width; new_variable->type = type; if (readstat_variable_get_type_class(new_variable) == READSTAT_TYPE_CLASS_STRING) { new_variable->alignment = READSTAT_ALIGNMENT_LEFT; } else { new_variable->alignment = READSTAT_ALIGNMENT_RIGHT; } new_variable->measure = READSTAT_MEASURE_UNKNOWN; if (name) { snprintf(new_variable->name, sizeof(new_variable->name), "%s", name); } return new_variable; } static void readstat_append_string_ref(readstat_writer_t *writer, readstat_string_ref_t *ref) { if (writer->string_refs_count == writer->string_refs_capacity) { writer->string_refs_capacity *= 2; writer->string_refs = realloc(writer->string_refs, writer->string_refs_capacity * sizeof(readstat_string_ref_t *)); } writer->string_refs[writer->string_refs_count++] = ref; } readstat_string_ref_t *readstat_add_string_ref(readstat_writer_t *writer, const char *string) { readstat_string_ref_t *ref = readstat_string_ref_init(string); readstat_append_string_ref(writer, ref); return ref; } void readstat_add_note(readstat_writer_t *writer, const char *note) { if (writer->notes_count == writer->notes_capacity) { writer->notes_capacity *= 2; writer->notes = realloc(writer->notes, writer->notes_capacity * sizeof(const char *)); } char *note_copy = malloc(strlen(note) + 1); strcpy(note_copy, note); writer->notes[writer->notes_count++] = note_copy; } void readstat_variable_set_label(readstat_variable_t *variable, const char *label) { if (label) { snprintf(variable->label, sizeof(variable->label), "%s", label); } else { memset(variable->label, '\0', sizeof(variable->label)); } } void readstat_variable_set_format(readstat_variable_t *variable, const char *format) { if (format) { snprintf(variable->format, sizeof(variable->format), "%s", format); } else { memset(variable->format, '\0', sizeof(variable->format)); } } void readstat_variable_set_measure(readstat_variable_t *variable, readstat_measure_t measure) { variable->measure = measure; } void readstat_variable_set_alignment(readstat_variable_t *variable, readstat_alignment_t alignment) { variable->alignment = alignment; } void readstat_variable_set_display_width(readstat_variable_t *variable, int display_width) { variable->display_width = display_width; } void readstat_variable_set_label_set(readstat_variable_t *variable, readstat_label_set_t *label_set) { variable->label_set = label_set; if (label_set) { if (label_set->variables_count == label_set->variables_capacity) { label_set->variables_capacity *= 2; label_set->variables = realloc(label_set->variables, label_set->variables_capacity * sizeof(readstat_variable_t *)); } ((readstat_variable_t **)label_set->variables)[label_set->variables_count++] = variable; } } readstat_variable_t *readstat_get_variable(readstat_writer_t *writer, int index) { if (index < writer->variables_count) { return writer->variables[index]; } return NULL; } readstat_string_ref_t *readstat_get_string_ref(readstat_writer_t *writer, int index) { if (index < writer->string_refs_count) { return writer->string_refs[index]; } return NULL; } readstat_error_t readstat_writer_set_file_label(readstat_writer_t *writer, const char *file_label) { snprintf(writer->file_label, sizeof(writer->file_label), "%s", file_label); return READSTAT_OK; } readstat_error_t readstat_writer_set_file_timestamp(readstat_writer_t *writer, time_t timestamp) { writer->timestamp = timestamp; return READSTAT_OK; } readstat_error_t readstat_writer_set_table_name(readstat_writer_t *writer, const char *table_name) { snprintf(writer->table_name, sizeof(writer->table_name), "%s", table_name); return READSTAT_OK; } readstat_error_t readstat_writer_set_fweight_variable(readstat_writer_t *writer, const readstat_variable_t *variable) { if (readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_STRING) return READSTAT_ERROR_BAD_FREQUENCY_WEIGHT; writer->fweight_variable = variable; return READSTAT_OK; } readstat_error_t readstat_writer_set_file_format_version(readstat_writer_t *writer, uint8_t version) { writer->version = version; return READSTAT_OK; } readstat_error_t readstat_writer_set_file_format_is_64bit(readstat_writer_t *writer, int is_64bit) { writer->is_64bit = is_64bit; return READSTAT_OK; } readstat_error_t readstat_writer_set_compression(readstat_writer_t *writer, readstat_compress_t compression) { writer->compression = compression; return READSTAT_OK; } readstat_error_t readstat_writer_set_error_handler(readstat_writer_t *writer, readstat_error_handler error_handler) { writer->error_handler = error_handler; return READSTAT_OK; } readstat_error_t readstat_begin_writing_file(readstat_writer_t *writer, void *user_ctx, long row_count) { writer->row_count = row_count; writer->user_ctx = user_ctx; writer->initialized = 1; return readstat_validate_metadata(writer); } readstat_error_t readstat_begin_row(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (writer->current_row == 0) retval = readstat_begin_writing_data(writer); memset(writer->row, '\0', writer->row_len); return retval; } // Then call one of these for each variable readstat_error_t readstat_insert_int8_value(readstat_writer_t *writer, const readstat_variable_t *variable, int8_t value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_INT8) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_int8(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_int16_value(readstat_writer_t *writer, const readstat_variable_t *variable, int16_t value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_INT16) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_int16(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_int32_value(readstat_writer_t *writer, const readstat_variable_t *variable, int32_t value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_INT32) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_int32(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_float_value(readstat_writer_t *writer, const readstat_variable_t *variable, float value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_FLOAT) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_float(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_double_value(readstat_writer_t *writer, const readstat_variable_t *variable, double value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_DOUBLE) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_double(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_string_value(readstat_writer_t *writer, const readstat_variable_t *variable, const char *value) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_STRING) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; return writer->callbacks.write_string(&writer->row[variable->offset], variable, value); } readstat_error_t readstat_insert_string_ref(readstat_writer_t *writer, const readstat_variable_t *variable, readstat_string_ref_t *ref) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type != READSTAT_TYPE_STRING_REF) return READSTAT_ERROR_VALUE_TYPE_MISMATCH; if (!writer->callbacks.write_string_ref) return READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED; if (ref && ref->first_o == -1 && ref->first_v == -1) { ref->first_o = writer->current_row; ref->first_v = variable->index; } return writer->callbacks.write_string_ref(&writer->row[variable->offset], variable, ref); } readstat_error_t readstat_insert_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (variable->type == READSTAT_TYPE_STRING) { return writer->callbacks.write_missing_string(&writer->row[variable->offset], variable); } if (variable->type == READSTAT_TYPE_STRING_REF) { return readstat_insert_string_ref(writer, variable, NULL); } return writer->callbacks.write_missing_number(&writer->row[variable->offset], variable); } readstat_error_t readstat_insert_tagged_missing_value(readstat_writer_t *writer, const readstat_variable_t *variable, char tag) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (!writer->callbacks.write_missing_tagged) { /* Write out a missing number but return an error */ writer->callbacks.write_missing_number(&writer->row[variable->offset], variable); return READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED; } return writer->callbacks.write_missing_tagged(&writer->row[variable->offset], variable, tag); } readstat_error_t readstat_end_row(readstat_writer_t *writer) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; readstat_error_t error = writer->callbacks.write_row(writer, writer->row, writer->row_len); if (error == READSTAT_OK) writer->current_row++; return error; } readstat_error_t readstat_end_writing(readstat_writer_t *writer) { if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; if (writer->current_row != writer->row_count) return READSTAT_ERROR_ROW_COUNT_MISMATCH; if (writer->row_count == 0) { readstat_error_t retval = readstat_begin_writing_data(writer); if (retval != READSTAT_OK) return retval; } /* Sort if out of order */ int i; for (i=1; istring_refs_count; i++) { if (readstat_compare_string_refs(&writer->string_refs[i-1], &writer->string_refs[i]) > 0) { qsort(writer->string_refs, writer->string_refs_count, sizeof(readstat_string_ref_t *), &readstat_compare_string_refs); break; } } if (!writer->callbacks.end_data) return READSTAT_OK; return writer->callbacks.end_data(writer); } ReadStat-1.1.7/src/readstat_writer.h000066400000000000000000000025661410722155500174050ustar00rootroot00000000000000 #define READSTAT_PRODUCT_NAME "ReadStat" #define READSTAT_PRODUCT_URL "https://github.com/WizardMac/ReadStat" readstat_error_t readstat_begin_writing_file(readstat_writer_t *writer, void *user_ctx, long row_count); readstat_error_t readstat_write_bytes(readstat_writer_t *writer, const void *bytes, size_t len); readstat_error_t readstat_write_bytes_as_lines(readstat_writer_t *writer, const void *bytes, size_t len, size_t line_len, const char *line_sep); readstat_error_t readstat_write_line_padding(readstat_writer_t *writer, char pad, size_t line_len, const char *line_sep); readstat_error_t readstat_write_zeros(readstat_writer_t *writer, size_t len); readstat_error_t readstat_write_spaces(readstat_writer_t *writer, size_t len); readstat_error_t readstat_write_string(readstat_writer_t *writer, const char *bytes); readstat_error_t readstat_write_space_padded_string(readstat_writer_t *writer, const char *string, size_t max_len); readstat_value_label_t *readstat_get_value_label(readstat_label_set_t *label_set, int index); readstat_label_set_t *readstat_get_label_set(readstat_writer_t *writer, int index); readstat_variable_t *readstat_get_label_set_variable(readstat_label_set_t *label_set, int index); void readstat_sort_label_set(readstat_label_set_t *label_set, int (*compare)(const readstat_value_label_t *, const readstat_value_label_t *)); ReadStat-1.1.7/src/sas/000077500000000000000000000000001410722155500146065ustar00rootroot00000000000000ReadStat-1.1.7/src/sas/ieee.c000066400000000000000000000343371410722155500156730ustar00rootroot00000000000000#include #include #include "ieee.h" #include "../readstat_bits.h" /* These routines are modified versions of those found in SAS publication TS-140, * "RECORD LAYOUT OF A SAS VERSION 5 OR 6 DATA SET IN SAS TRANSPORT (XPORT) FORMAT" * https://support.sas.com/techsup/technote/ts140.pdf * * Modifications include using stdint.h and supporting infinite IEEE values. */ static void xpt2ieee(unsigned char *xport, unsigned char *ieee); static void ieee2xpt(unsigned char *ieee, unsigned char *xport); #ifndef FLOATREP #define FLOATREP get_native() int get_native(); #endif void memreverse(void *intp_void, int l) { if (!machine_is_little_endian()) return; int i,j; char save; char *intp = (char *)intp_void; j = l/2; for (i=0;i=0;i--) { temp[7-i] = from[i]; } from = temp; fromtype = CN_TYPE_IEEEB; /* Break intentionally omitted. */ case CN_TYPE_IEEEB : /* Break intentionally omitted. */ case CN_TYPE_XPORT : break; default: return(-1); } if (totype == CN_TYPE_NATIVE) { totype = FLOATREP; } switch(totype) { case CN_TYPE_XPORT : case CN_TYPE_IEEEB : case CN_TYPE_IEEEL : break; default: return(-2); } if (fromtype == totype) { memcpy(to,from,8); return(0); } switch(fromtype) { case CN_TYPE_IEEEB : if (totype == CN_TYPE_XPORT) ieee2xpt(from,to); else memcpy(to,from,8); break; case CN_TYPE_XPORT : xpt2ieee(from,to); break; } if (totype == CN_TYPE_IEEEL) { memcpy(temp,to,8); for (i=7;i>=0;i--) { to[7-i] = temp[i]; } } return(0); } int get_native() { static unsigned char float_reps[][8] = { {0x41,0x10,0x00,0x00,0x00,0x00,0x00,0x00}, {0x3f,0xf0,0x00,0x00,0x00,0x00,0x00,0x00}, {0x00,0x00,0x00,0x00,0x00,0x00,0xf0,0x3f} }; static double one = 1.00; int i,j; j = sizeof(float_reps)/8; for (i=0;i>= shift; ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift))); } /* clear the 1 bit to the left of the binary point */ ieee1 &= 0xffefffff; /* set the exponent of the ieee number to be the actual */ /* exponent plus the shift count + 1023. Or this into the */ /* first half of the ieee number. The ibm exponent is excess */ /* 64 but is adjusted by 65 since during conversion to ibm */ /* format the exponent is incremented by 1 and the fraction */ /* bits left 4 positions to the right of the radix point. */ ieee1 |= (((((int32_t)(*temp & 0x7f) - 65) * 4) + shift + 1023) << 20) | (xport1 & 0x80000000); doret: memreverse(&ieee1,sizeof(uint32_t)); memcpy(ieee,&ieee1,sizeof(uint32_t)); memreverse(&ieee2,sizeof(uint32_t)); memcpy(ieee+4,&ieee2,sizeof(uint32_t)); return; } /*-------------------------------------------------------------*/ /* Name: ieee2xpt */ /* Purpose: converts IEEE to transport */ /* Usage: rc = ieee2xpt(to_ieee,p_data); */ /* Notes: this routine is an adaptation of the wzctdbl routine */ /* from the Apollo. */ /*-------------------------------------------------------------*/ void ieee2xpt(unsigned char *ieee, unsigned char *xport) { register int shift; unsigned char misschar; int ieee_exp; uint32_t xport1,xport2; uint32_t ieee1 = 0; uint32_t ieee2 = 0; char ieee8[8]; memcpy(ieee8,ieee,8); /*------get 2 longs for shifting------------------------------*/ memcpy(&ieee1,ieee8,sizeof(uint32_t)); memreverse(&ieee1,sizeof(uint32_t)); memcpy(&ieee2,ieee8+4,sizeof(uint32_t)); memreverse(&ieee2,sizeof(uint32_t)); memset(xport,0,8); /*-----if IEEE value is missing (1st 2 bytes are FFFF)-----*/ if (*ieee8 == (char)0xff && ieee8[1] == (char)0xff) { misschar = ~ieee8[2]; *xport = (misschar == 0xD2) ? 0x6D : misschar; return; } /**************************************************************/ /* Translate IEEE floating point number into IBM format float */ /* */ /* IEEE format: */ /* */ /* 6 5 0 */ /* 3 1 0 */ /* */ /* SEEEEEEEEEEEMMMM ........ MMMM */ /* */ /* Sign bit, 11 bit exponent, 52 fraction. Exponent is excess */ /* 1023. The fraction is multiplied by a power of 2 of the */ /* actual exponent. Normalized floating point numbers are */ /* represented with the binary point immediately to the left */ /* of the fraction with an implied "1" to the left of the */ /* binary point. */ /* */ /* IBM format: */ /* */ /* 6 5 0 */ /* 3 5 0 */ /* */ /* SEEEEEEEMMMM ......... MMMM */ /* */ /* Sign bit, 7 bit exponent, 56 bit fraction. Exponent is */ /* excess 64. The fraction is multiplied by a power of 16 of */ /* of the actual exponent. Normalized floating point numbers */ /* are presented with the radix point immediately to the left */ /* of the high order hex fraction digit. */ /* */ /* How do you translate from local to IBM format? */ /* */ /* The ieee format gives you a number that has a power of 2 */ /* exponent and a fraction of the form "1.". */ /* The first step is to get that "1" bit back into the */ /* fraction. Right shift it down 1 position, set the high */ /* order bit and reduce the binary exponent by 1. Now we have */ /* a fraction that looks like ".1" and it's */ /* ready to be shoved into ibm format. The ibm fraction has 4 */ /* more bits than the ieee, the ieee fraction must therefore */ /* be shifted left 4 positions before moving it in. We must */ /* also correct the fraction bits to account for the loss of 2*/ /* bits when converting from a binary exponent to a hex one */ /* (>> 2). We must shift the fraction left for 0, 1, 2, or 3 */ /* positions to maintain the proper magnitude. Doing */ /* conversion this way would tend to lose bits in the fraction*/ /* which is not desirable or necessary if we cheat a bit. */ /* First of all, we know that we are going to have to shift */ /* the ieee fraction left 4 places to put it in the right */ /* position; we won't do that, we'll just leave it where it is*/ /* and increment the ibm exponent by one, this will have the */ /* same effect and we won't have to do any shifting. Now, */ /* since we have 4 bits in front of the fraction to work with,*/ /* we won't lose any bits. We set the bit to the left of the */ /* fraction which is the implicit "1" in the ieee fraction. We*/ /* then adjust the fraction to account for the loss of bits */ /* when going to a hex exponent. This adjustment will never */ /* involve shifting by more than 3 positions so no bits are */ /* lost. */ /* Get ieee number less the exponent into the first half of */ /* the ibm number */ xport1 = ieee1 & 0x000fffff; /* get the second half of the number into the second half of */ /* the ibm number and see if both halves are 0. If so, ibm is */ /* also 0 and we just return */ if ((!(xport2 = ieee2)) && !ieee1) { ieee_exp = 0; goto doret; } /* get the actual exponent value out of the ieee number. The */ /* ibm fraction is a power of 16 and the ieee fraction a power*/ /* of 2 (16 ** n == 2 ** 4n). Save the low order 2 bits since */ /* they will get lost when we divide the exponent by 4 (right */ /* shift by 2) and we will have to shift the fraction by the */ /* appropriate number of bits to keep the proper magnitude. */ shift = (int) (ieee_exp = (int)(((ieee1 >> 16) & 0x7ff0) >> 4) - 1023) & 3; /* the ieee format has an implied "1" immdeiately to the left */ /* of the binary point. Show it in here. */ xport1 |= 0x00100000; if (shift) { /* set the first half of the ibm number by shifting it left */ /* the appropriate number of bits and oring in the bits */ /* from the lower half that would have been shifted in (if */ /* we could shift a double). The shift count can never */ /* exceed 3, so all we care about are the high order 3 */ /* bits. We don't want sign extention so make sure it's an */ /* unsigned char. We'll shift either5, 6, or 7 places to */ /* keep 3, 2, or 1 bits. After that, shift the second half */ /* of the number the right number of places. We always get */ /* zero fill on left shifts. */ xport1 = (xport1 << shift) | ((unsigned char) (((ieee2 >> 24) & 0xE0) >> (5 + (3 - shift)))); xport2 <<= shift; } /* Now set the ibm exponent and the sign of the fraction. The */ /* power of 2 ieee exponent must be divided by 4 and made */ /* excess 64 (we add 65 here because of the poisition of the */ /* fraction bits, essentially 4 positions lower than they */ /* should be so we incrment the ibm exponent). */ xport1 |= (((ieee_exp >>2) + 65) | ((ieee1 >> 24) & 0x80)) << 24; /* If the ieee exponent is greater than 248 or less than -260, */ /* then it cannot fit in the ibm exponent field. Send back the */ /* appropriate flag. */ doret: if (ieee_exp < -260) { memset(xport,0x00,8); } else if (ieee_exp > 248) { memset(xport+1,0xFF,7); *xport = 0x7F | ((ieee1 >> 24) & 0x80); } else { memreverse(&xport1,sizeof(uint32_t)); memcpy(xport,&xport1,sizeof(uint32_t)); memreverse(&xport2,sizeof(uint32_t)); memcpy(xport+4,&xport2,sizeof(uint32_t)); } return; } ReadStat-1.1.7/src/sas/ieee.h000066400000000000000000000002621410722155500156660ustar00rootroot00000000000000#define CN_TYPE_NATIVE 0 #define CN_TYPE_XPORT 1 #define CN_TYPE_IEEEB 2 #define CN_TYPE_IEEEL 3 int cnxptiee(const void *from_bytes, int fromtype, void *to_bytes, int totype); ReadStat-1.1.7/src/sas/readstat_sas.c000066400000000000000000000435141410722155500174360ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include "readstat_sas.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_writer.h" #define SAS_FILE_HEADER_SIZE_32BIT 1024 #define SAS_FILE_HEADER_SIZE_64BIT 8192 #define SAS_DEFAULT_PAGE_SIZE 4096 #define SAS_DEFAULT_STRING_ENCODING "WINDOWS-1252" unsigned char sas7bdat_magic_number[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x60, 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11 }; unsigned char sas7bcat_magic_number[32] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x63, 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11 }; /* This table is cobbled together from extant files and: * https://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm * https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html * * Discrepancies form the official documentation are noted with a comment. It * appears that in some instances that SAS software uses a newer encoding than * what's listed in the docs. In these cases the encoding used by ReadStat * represents the author's best guess. */ static readstat_charset_entry_t _charset_table[] = { { .code = 0, .name = SAS_DEFAULT_STRING_ENCODING }, { .code = 20, .name = "UTF-8" }, { .code = 28, .name = "US-ASCII" }, { .code = 29, .name = "ISO-8859-1" }, { .code = 30, .name = "ISO-8859-2" }, { .code = 31, .name = "ISO-8859-3" }, { .code = 32, .name = "ISO-8859-4" }, { .code = 33, .name = "ISO-8859-5" }, { .code = 34, .name = "ISO-8859-6" }, { .code = 35, .name = "ISO-8859-7" }, { .code = 36, .name = "ISO-8859-8" }, { .code = 37, .name = "ISO-8859-9" }, { .code = 39, .name = "ISO-8859-11" }, { .code = 40, .name = "ISO-8859-15" }, { .code = 41, .name = "CP437" }, { .code = 42, .name = "CP850" }, { .code = 43, .name = "CP852" }, { .code = 44, .name = "CP857" }, { .code = 45, .name = "CP858" }, { .code = 46, .name = "CP862" }, { .code = 47, .name = "CP864" }, { .code = 48, .name = "CP865" }, { .code = 49, .name = "CP866" }, { .code = 50, .name = "CP869" }, { .code = 51, .name = "CP874" }, { .code = 52, .name = "CP921" }, { .code = 53, .name = "CP922" }, { .code = 54, .name = "CP1129" }, { .code = 55, .name = "CP720" }, { .code = 56, .name = "CP737" }, { .code = 57, .name = "CP775" }, { .code = 58, .name = "CP860" }, { .code = 59, .name = "CP863" }, { .code = 60, .name = "WINDOWS-1250" }, { .code = 61, .name = "WINDOWS-1251" }, { .code = 62, .name = "WINDOWS-1252" }, { .code = 63, .name = "WINDOWS-1253" }, { .code = 64, .name = "WINDOWS-1254" }, { .code = 65, .name = "WINDOWS-1255" }, { .code = 66, .name = "WINDOWS-1256" }, { .code = 67, .name = "WINDOWS-1257" }, { .code = 68, .name = "WINDOWS-1258" }, { .code = 69, .name = "MACROMAN" }, { .code = 70, .name = "MACARABIC" }, { .code = 71, .name = "MACHEBREW" }, { .code = 72, .name = "MACGREEK" }, { .code = 73, .name = "MACTHAI" }, { .code = 75, .name = "MACTURKISH" }, { .code = 76, .name = "MACUKRAINE" }, { .code = 118, .name = "CP950" }, { .code = 119, .name = "EUC-TW" }, { .code = 123, .name = "BIG-5" }, { .code = 125, .name = "GB18030" }, // "euc-cn" in SAS { .code = 126, .name = "WINDOWS-936" }, // "zwin" { .code = 128, .name = "CP1381" }, // "zpce" { .code = 134, .name = "EUC-JP" }, { .code = 136, .name = "CP949" }, { .code = 137, .name = "CP942" }, { .code = 138, .name = "CP932" }, // "shift-jis" in SAS { .code = 140, .name = "EUC-KR" }, { .code = 141, .name = "CP949" }, // "kpce" { .code = 142, .name = "CP949" }, // "kwin" { .code = 163, .name = "MACICELAND" }, { .code = 167, .name = "ISO-2022-JP" }, { .code = 168, .name = "ISO-2022-KR" }, { .code = 169, .name = "ISO-2022-CN" }, { .code = 172, .name = "ISO-2022-CN-EXT" }, { .code = 204, .name = SAS_DEFAULT_STRING_ENCODING }, // "any" in SAS { .code = 205, .name = "GB18030" }, { .code = 227, .name = "ISO-8859-14" }, { .code = 242, .name = "ISO-8859-13" }, { .code = 245, .name = "MACCROATIAN" }, { .code = 246, .name = "MACCYRILLIC" }, { .code = 247, .name = "MACROMANIA" }, { .code = 248, .name = "SHIFT_JISX0213" }, }; static time_t sas_epoch() { return - 3653 * 86400; // seconds between 01-01-1960 and 01-01-1970 } static time_t sas_convert_time(double time, time_t epoch) { time += epoch; if (isnan(time)) return 0; if (time > (double)LONG_MAX) return LONG_MAX; if (time < (double)LONG_MIN) return LONG_MIN; return time; } uint64_t sas_read8(const char *data, int bswap) { uint64_t tmp; memcpy(&tmp, data, 8); return bswap ? byteswap8(tmp) : tmp; } uint32_t sas_read4(const char *data, int bswap) { uint32_t tmp; memcpy(&tmp, data, 4); return bswap ? byteswap4(tmp) : tmp; } uint16_t sas_read2(const char *data, int bswap) { uint16_t tmp; memcpy(&tmp, data, 2); return bswap ? byteswap2(tmp) : tmp; } size_t sas_subheader_remainder(size_t len, size_t signature_len) { return len - (4+2*signature_len); } readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, readstat_error_handler error_handler, void *user_ctx) { sas_header_start_t header_start; sas_header_end_t header_end; int retval = READSTAT_OK; char error_buf[1024]; time_t epoch = sas_epoch(); if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { hinfo->pad1 = 4; } if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { hinfo->u64 = 1; } int bswap = 0; if (header_start.endian == SAS_ENDIAN_BIG) { bswap = machine_is_little_endian(); hinfo->little_endian = 0; } else if (header_start.endian == SAS_ENDIAN_LITTLE) { bswap = !machine_is_little_endian(); hinfo->little_endian = 1; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } int i; for (i=0; iencoding = _charset_table[i].name; break; } } if (hinfo->encoding == NULL) { if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d", header_start.encoding); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name)); if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } double creation_time, modification_time; if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (bswap) creation_time = byteswap_double(creation_time); if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (bswap) modification_time = byteswap_double(modification_time); hinfo->creation_time = sas_convert_time(creation_time, epoch); hinfo->modification_time = sas_convert_time(modification_time, epoch); if (io->seek(16, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } uint32_t header_size, page_size; if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->header_size = bswap ? byteswap4(header_size) : header_size; hinfo->page_size = bswap ? byteswap4(page_size) : page_size; if (hinfo->header_size < 1024 || hinfo->page_size < 1024) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (hinfo->header_size > (1<<24) || hinfo->page_size > (1<<24)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (hinfo->u64) { hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT; hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT; } else { hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT; hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT; } if (hinfo->u64) { uint64_t page_count; if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap8(page_count) : page_count; } else { uint32_t page_count; if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } hinfo->page_count = bswap ? byteswap4(page_count) : page_count; } if (hinfo->page_count > (1<<24)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d", 8); error_handler(error_buf, user_ctx); } goto cleanup; } if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } char major; int minor, revision; if (sscanf(header_end.release, "%c.%04dM%1d", &major, &minor, &revision) != 3) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (major >= '1' && major <= '9') { hinfo->major_version = major - '0'; } else if (major == 'V') { // It appears that SAS Visual Forecaster reports the major version as "V" // Treat it as version 9 for all intents and purposes hinfo->major_version = 9; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } hinfo->minor_version = minor; hinfo->revision = revision; if ((major == '8' || major == '9') && minor == 0 && revision == 0) { /* A bit of a hack, but most SAS installations are running a minor update */ hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER; } else { hinfo->vendor = READSTAT_VENDOR_SAS; } if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek to position %" PRId64, hinfo->header_size); error_handler(error_buf, user_ctx); } goto cleanup; } cleanup: return retval; } readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start) { readstat_error_t retval = READSTAT_OK; time_t epoch = sas_epoch(); memset(header_start.table_name, ' ', sizeof(header_start.table_name)); size_t table_name_len = strlen(writer->table_name); if (table_name_len > sizeof(header_start.table_name)) table_name_len = sizeof(header_start.table_name); if (table_name_len) { memcpy(header_start.table_name, writer->table_name, table_name_len); } else { memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1); } retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_zeros(writer, hinfo->pad1); if (retval != READSTAT_OK) goto cleanup; double creation_time = hinfo->creation_time - epoch; retval = readstat_write_bytes(writer, &creation_time, sizeof(double)); if (retval != READSTAT_OK) goto cleanup; double modification_time = hinfo->modification_time - epoch; retval = readstat_write_bytes(writer, &modification_time, sizeof(double)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_zeros(writer, 16); if (retval != READSTAT_OK) goto cleanup; uint32_t header_size = hinfo->header_size; uint32_t page_size = hinfo->page_size; retval = readstat_write_bytes(writer, &header_size, sizeof(uint32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &page_size, sizeof(uint32_t)); if (retval != READSTAT_OK) goto cleanup; if (hinfo->u64) { uint64_t page_count = hinfo->page_count; retval = readstat_write_bytes(writer, &page_count, sizeof(uint64_t)); } else { uint32_t page_count = hinfo->page_count; retval = readstat_write_bytes(writer, &page_count, sizeof(uint32_t)); } if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_zeros(writer, 8); if (retval != READSTAT_OK) goto cleanup; sas_header_end_t header_end = { .host = "9.0401M6Linux" }; char release[sizeof(header_end.release)+1] = { 0 }; snprintf(release, sizeof(release), "%1d.%04dM0", (unsigned int)writer->version % 10, 101); memcpy(header_end.release, release, sizeof(header_end.release)); retval = readstat_write_bytes(writer, &header_end, sizeof(sas_header_end_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_zeros(writer, hinfo->header_size-writer->bytes_written); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit) { sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); hinfo->creation_time = writer->timestamp; hinfo->modification_time = writer->timestamp; hinfo->page_size = SAS_DEFAULT_PAGE_SIZE; hinfo->u64 = !!is_64bit; if (hinfo->u64) { hinfo->header_size = SAS_FILE_HEADER_SIZE_64BIT; hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT; hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT; } else { hinfo->header_size = SAS_FILE_HEADER_SIZE_32BIT; hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT; hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT; } return hinfo; } readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo) { if ((writer->bytes_written - hinfo->header_size) % hinfo->page_size) { size_t num_zeros = (hinfo->page_size - (writer->bytes_written - hinfo->header_size) % hinfo->page_size); return readstat_write_zeros(writer, num_zeros); } return READSTAT_OK; } readstat_error_t sas_validate_name(const char *name, size_t max_len) { int j; for (j=0; name[j]; j++) { if (name[j] != '_' && !(name[j] >= 'a' && name[j] <= 'z') && !(name[j] >= 'A' && name[j] <= 'Z') && !(name[j] >= '0' && name[j] <= '9')) { return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; } } char first_char = name[0]; if (!first_char) return READSTAT_ERROR_NAME_IS_ZERO_LENGTH; if (first_char != '_' && !(first_char >= 'a' && first_char <= 'z') && !(first_char >= 'A' && first_char <= 'Z')) { return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER; } if (strcmp(name, "_N_") == 0 || strcmp(name, "_ERROR_") == 0 || strcmp(name, "_NUMERIC_") == 0 || strcmp(name, "_CHARACTER_") == 0 || strcmp(name, "_ALL_") == 0) { return READSTAT_ERROR_NAME_IS_RESERVED_WORD; } if (strlen(name) > max_len) return READSTAT_ERROR_NAME_IS_TOO_LONG; return READSTAT_OK; } readstat_error_t sas_validate_variable(const readstat_variable_t *variable) { return sas_validate_name(readstat_variable_get_name(variable), 32); } readstat_error_t sas_validate_tag(char tag) { if (tag == '_' || (tag >= 'A' && tag <= 'Z')) return READSTAT_OK; return READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE; } void sas_assign_tag(readstat_value_t *value, uint8_t tag) { /* We accommodate two tag schemes. In the first, the tag is an ASCII code * given by uint8_t tag above. System missing is represented by an ASCII * period. In the second scheme, (tag-2) is an offset from 'A', except when * tag == 0, in which case it represents an underscore, or tag == 1, in * which case it represents system-missing. */ if (tag == 0) { tag = '_'; } else if (tag >= 2 && tag < 28) { tag = 'A' + (tag - 2); } if (sas_validate_tag(tag) == READSTAT_OK) { value->tag = tag; value->is_tagged_missing = 1; } else { value->tag = 0; value->is_system_missing = 1; } } ReadStat-1.1.7/src/sas/readstat_sas.h000066400000000000000000000077311410722155500174440ustar00rootroot00000000000000 #include "../readstat.h" #include "../readstat_bits.h" #pragma pack(push, 1) typedef struct sas_header_start_s { unsigned char magic[32]; unsigned char a2; unsigned char mystery1[2]; unsigned char a1; unsigned char mystery2[1]; unsigned char endian; unsigned char mystery3[1]; char file_format; unsigned char mystery4[30]; unsigned char encoding; unsigned char mystery5[13]; char file_type[8]; char table_name[32]; unsigned char mystery6[32]; char file_info[8]; } sas_header_start_t; typedef struct sas_header_end_s { char release[8]; char host[16]; char version[16]; char os_vendor[16]; char os_name[16]; char extra[48]; } sas_header_end_t; #pragma pack(pop) typedef struct sas_header_info_s { int little_endian; int u64; int vendor; int major_version; int minor_version; int revision; int pad1; int64_t page_size; int64_t page_header_size; int64_t subheader_pointer_size; int64_t page_count; int64_t header_size; time_t creation_time; time_t modification_time; char table_name[32]; char file_label[256]; char *encoding; } sas_header_info_t; enum { READSTAT_VENDOR_STAT_TRANSFER, READSTAT_VENDOR_SAS }; typedef struct sas_text_ref_s { uint16_t index; uint16_t offset; uint16_t length; } sas_text_ref_t; #define SAS_ENDIAN_BIG 0x00 #define SAS_ENDIAN_LITTLE 0x01 #define SAS_FILE_FORMAT_UNIX '1' #define SAS_FILE_FORMAT_WINDOWS '2' #define SAS_ALIGNMENT_OFFSET_0 0x22 #define SAS_ALIGNMENT_OFFSET_4 0x33 #define SAS_COLUMN_TYPE_NUM 0x01 #define SAS_COLUMN_TYPE_CHR 0x02 #define SAS_SUBHEADER_SIGNATURE_ROW_SIZE 0xF7F7F7F7 #define SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE 0xF6F6F6F6 #define SAS_SUBHEADER_SIGNATURE_COUNTS 0xFFFFFC00 #define SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT 0xFFFFFBFE #define SAS_SUBHEADER_SIGNATURE_COLUMN_MASK 0xFFFFFFF8 /* Seen in the wild: FA (unknown), F8 (locale?) */ #define SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS 0xFFFFFFFC #define SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT 0xFFFFFFFD #define SAS_SUBHEADER_SIGNATURE_COLUMN_LIST 0xFFFFFFFE #define SAS_SUBHEADER_SIGNATURE_COLUMN_NAME 0xFFFFFFFF #define SAS_PAGE_TYPE_META 0x0000 #define SAS_PAGE_TYPE_DATA 0x0100 #define SAS_PAGE_TYPE_MIX 0x0200 #define SAS_PAGE_TYPE_AMD 0x0400 #define SAS_PAGE_TYPE_MASK 0x0F00 #define SAS_PAGE_TYPE_META2 0x4000 #define SAS_PAGE_TYPE_COMP 0x9000 #define SAS_SUBHEADER_POINTER_SIZE_32BIT 12 #define SAS_SUBHEADER_POINTER_SIZE_64BIT 24 #define SAS_PAGE_HEADER_SIZE_32BIT 24 #define SAS_PAGE_HEADER_SIZE_64BIT 40 #define SAS_COMPRESSION_NONE 0x00 #define SAS_COMPRESSION_TRUNC 0x01 #define SAS_COMPRESSION_ROW 0x04 #define SAS_COMPRESSION_SIGNATURE_RLE "SASYZCRL" #define SAS_COMPRESSION_SIGNATURE_RDC "SASYZCR2" #define SAS_DEFAULT_FILE_VERSION 9 extern unsigned char sas7bdat_magic_number[32]; extern unsigned char sas7bcat_magic_number[32]; uint64_t sas_read8(const char *data, int bswap); uint32_t sas_read4(const char *data, int bswap); uint16_t sas_read2(const char *data, int bswap); readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *ctx, readstat_error_handler error_handler, void *user_ctx); size_t sas_subheader_remainder(size_t len, size_t signature_len); sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit); readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start); readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo); readstat_error_t sas_validate_variable(const readstat_variable_t *variable); readstat_error_t sas_validate_name(const char *name, size_t max_len); readstat_error_t sas_validate_tag(char tag); void sas_assign_tag(readstat_value_t *value, uint8_t tag); ReadStat-1.1.7/src/sas/readstat_sas7bcat_read.c000066400000000000000000000414551410722155500213540ustar00rootroot00000000000000#include #include #include #include #include #include "readstat_sas.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" #define SAS_CATALOG_FIRST_INDEX_PAGE 1 #define SAS_CATALOG_USELESS_PAGES 3 typedef struct sas7bcat_ctx_s { readstat_metadata_handler metadata_handler; readstat_value_label_handler value_label_handler; void *user_ctx; readstat_io_t *io; int u64; int pad1; int bswap; int64_t xlsr_size; int64_t xlsr_offset; int64_t xlsr_O_offset; int64_t page_count; int64_t page_size; int64_t header_size; uint64_t *block_pointers; int block_pointers_used; int block_pointers_capacity; const char *input_encoding; const char *output_encoding; iconv_t converter; } sas7bcat_ctx_t; static void sas7bcat_ctx_free(sas7bcat_ctx_t *ctx) { if (ctx->converter) iconv_close(ctx->converter); if (ctx->block_pointers) free(ctx->block_pointers); free(ctx); } static readstat_error_t sas7bcat_parse_value_labels(const char *value_start, size_t value_labels_len, int label_count_used, int label_count_capacity, const char *name, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; const char *lbp1 = value_start; uint32_t *value_offset = readstat_calloc(label_count_used, sizeof(uint32_t)); /* Doubles appear to be stored as big-endian, always */ int bswap_doubles = machine_is_little_endian(); int is_string = (name[0] == '$'); char *label = NULL; if (value_offset == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } /* Pass 1 -- find out the offset of the labels */ for (i=0; i value_labels_len || lbp1[2] < 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ipad1+4] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } uint32_t label_pos = sas_read4(&lbp1[10+ctx->pad1], ctx->bswap); if (label_pos >= label_count_used) { retval = READSTAT_ERROR_PARSE; goto cleanup; } value_offset[label_pos] = lbp1 - value_start; } lbp1 += 6 + lbp1[2]; } const char *lbp2 = lbp1; /* Pass 2 -- parse pairs of values & labels */ for (i=0; i value_labels_len || &lbp2[10] - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } readstat_value_t value = { .type = is_string ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE }; char string_val[4*16+1]; if (is_string) { size_t value_entry_len = 6 + lbp1[2]; retval = readstat_convert(string_val, sizeof(string_val), &lbp1[value_entry_len-16], 16, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = string_val; } else { uint64_t val = sas_read8(&lbp1[22], bswap_doubles); double dval = NAN; if ((val | 0xFF0000000000) == 0xFFFFFFFFFFFF) { sas_assign_tag(&value, (val >> 40)); } else { memcpy(&dval, &val, 8); dval *= -1.0; } value.v.double_value = dval; } size_t label_len = sas_read2(&lbp2[8], ctx->bswap); if (&lbp2[10] + label_len - value_start > value_labels_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->value_label_handler) { label = realloc(label, 4 * label_len + 1); retval = readstat_convert(label, 4 * label_len + 1, &lbp2[10], label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->value_label_handler(name, value, label, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } lbp2 += 8 + 2 + label_len + 1; } cleanup: free(label); free(value_offset); return retval; } static readstat_error_t sas7bcat_parse_block(const char *data, size_t data_size, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t pad = 0; int label_count_capacity = 0; int label_count_used = 0; int payload_offset = 106; char name[4*32+1]; if (data_size < payload_offset) goto cleanup; pad = (data[2] & 0x08) ? 4 : 0; // might be 0x10, not sure if (ctx->u64) { label_count_capacity = sas_read4(&data[42+pad], ctx->bswap); label_count_used = sas_read4(&data[50+pad], ctx->bswap); payload_offset += 32; } else { label_count_capacity = sas_read4(&data[38+pad], ctx->bswap); label_count_used = sas_read4(&data[42+pad], ctx->bswap); } if ((retval = readstat_convert(name, sizeof(name), &data[8], 8, ctx->converter)) != READSTAT_OK) goto cleanup; if (pad) { pad += 16; } if ((data[2] & 0x80) && !ctx->u64) { // has long name if (data_size < payload_offset + pad + 32) goto cleanup; retval = readstat_convert(name, sizeof(name), &data[payload_offset+pad], 32, ctx->converter); if (retval != READSTAT_OK) goto cleanup; pad += 32; } if (data_size < payload_offset + pad) goto cleanup; if ((retval = sas7bcat_parse_value_labels(&data[payload_offset+pad], data_size - payload_offset - pad, label_count_used, label_count_capacity, name, ctx)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sas7bcat_augment_index(const char *index, size_t len, sas7bcat_ctx_t *ctx) { const char *xlsr = index; readstat_error_t retval = READSTAT_OK; while (xlsr + ctx->xlsr_size <= index + len) { if (memcmp(xlsr, "XLSR", 4) != 0) // some block pointers seem to have 8 bytes of extra padding xlsr += 8; if (memcmp(xlsr, "XLSR", 4) != 0) break; if (xlsr[ctx->xlsr_O_offset] == 'O') { uint32_t page = 0, pos = 0; if (ctx->u64) { page = sas_read4(&xlsr[8], ctx->bswap); pos = sas_read4(&xlsr[16], ctx->bswap); } else { page = sas_read2(&xlsr[4], ctx->bswap); pos = sas_read2(&xlsr[8], ctx->bswap); } ctx->block_pointers[ctx->block_pointers_used++] = ((uint64_t)page << 32) + pos; } if (ctx->block_pointers_used == ctx->block_pointers_capacity) { ctx->block_pointers = readstat_realloc(ctx->block_pointers, (ctx->block_pointers_capacity *= 2) * sizeof(uint64_t)); if (ctx->block_pointers == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } xlsr += ctx->xlsr_size; } cleanup: return retval; } static int compare_block_pointers(const void *elem1, const void *elem2) { uint64_t v1 = *(const uint64_t *)elem1; uint64_t v2 = *(const uint64_t *)elem2; return v1 - v2; } static void sas7bcat_sort_index(sas7bcat_ctx_t *ctx) { if (ctx->block_pointers_used == 0) return; int i; for (i=1; iblock_pointers_used; i++) { if (ctx->block_pointers[i] < ctx->block_pointers[i-1]) { qsort(ctx->block_pointers, ctx->block_pointers_used, sizeof(uint64_t), &compare_block_pointers); break; } } } static void sas7bcat_uniq_index(sas7bcat_ctx_t *ctx) { if (ctx->block_pointers_used == 0) return; int i; int out_i = 1; for (i=1; iblock_pointers_used; i++) { if (ctx->block_pointers[i] != ctx->block_pointers[i-1]) { if (out_i != i) { ctx->block_pointers[out_i] = ctx->block_pointers[i]; } out_i++; } } ctx->block_pointers_used = out_i; } static int sas7bcat_block_size(int start_page, int start_page_pos, sas7bcat_ctx_t *ctx, readstat_error_t *outError) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int link_count = 0; int buffer_len = 0; int chain_link_len = 0; char chain_link[32]; int chain_link_header_len = 16; if (ctx->u64) { chain_link_header_len = 32; } // calculate buffer size needed while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->u64) { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[8], ctx->bswap); chain_link_len = sas_read2(&chain_link[10], ctx->bswap); } else { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); } buffer_len += chain_link_len; } cleanup: if (outError) *outError = retval; return retval == READSTAT_OK ? buffer_len : -1; } static readstat_error_t sas7bcat_read_block(char *buffer, size_t buffer_len, int start_page, int start_page_pos, sas7bcat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int next_page = start_page; int next_page_pos = start_page_pos; int link_count = 0; int chain_link_len = 0; int buffer_offset = 0; char chain_link[32]; int chain_link_header_len = 16; if (ctx->u64) { chain_link_header_len = 32; } while (next_page > 0 && next_page_pos > 0 && next_page <= ctx->page_count && link_count++ < ctx->page_count) { if (io->seek(ctx->header_size+(next_page-1)*ctx->page_size+next_page_pos, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(chain_link, chain_link_header_len, io->io_ctx) < chain_link_header_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->u64) { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[8], ctx->bswap); chain_link_len = sas_read2(&chain_link[10], ctx->bswap); } else { next_page = sas_read4(&chain_link[0], ctx->bswap); next_page_pos = sas_read2(&chain_link[4], ctx->bswap); chain_link_len = sas_read2(&chain_link[6], ctx->bswap); } if (buffer_offset + chain_link_len > buffer_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(buffer + buffer_offset, chain_link_len, io->io_ctx) < chain_link_len) { retval = READSTAT_ERROR_READ; goto cleanup; } buffer_offset += chain_link_len; } cleanup: return retval; } readstat_error_t readstat_parse_sas7bcat(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int64_t i; char *page = NULL; char *buffer = NULL; sas7bcat_ctx_t *ctx = calloc(1, sizeof(sas7bcat_ctx_t)); sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); ctx->block_pointers = malloc((ctx->block_pointers_capacity = 200) * sizeof(uint64_t)); ctx->value_label_handler = parser->handlers.value_label; ctx->metadata_handler = parser->handlers.metadata; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->io = io; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((retval = sas_read_header(io, hinfo, parser->handlers.error, user_ctx)) != READSTAT_OK) { goto cleanup; } ctx->u64 = hinfo->u64; ctx->pad1 = hinfo->pad1; ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian; ctx->header_size = hinfo->header_size; ctx->page_count = hinfo->page_count; ctx->page_size = hinfo->page_size; if (ctx->input_encoding == NULL) { ctx->input_encoding = hinfo->encoding; } ctx->xlsr_size = 212 + ctx->pad1; ctx->xlsr_offset = 856 + 2 * ctx->pad1; ctx->xlsr_O_offset = 50 + ctx->pad1; if (ctx->u64) { ctx->xlsr_offset += 144; ctx->xlsr_size += 72; ctx->xlsr_O_offset += 24; } if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) { iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } if (ctx->metadata_handler) { char table_name[4*32+1]; readstat_metadata_t metadata = { .file_encoding = ctx->input_encoding, /* orig encoding? */ .modified_time = hinfo->modification_time, .creation_time = hinfo->creation_time, .file_format_version = hinfo->major_version, .endianness = hinfo->little_endian ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG, .is64bit = ctx->u64 }; retval = readstat_convert(table_name, sizeof(table_name), hinfo->table_name, sizeof(hinfo->table_name), ctx->converter); if (retval != READSTAT_OK) goto cleanup; metadata.table_name = table_name; if (ctx->metadata_handler(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if ((page = readstat_malloc(ctx->page_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->seek(ctx->header_size+SAS_CATALOG_FIRST_INDEX_PAGE*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = sas7bcat_augment_index(&page[ctx->xlsr_offset], ctx->page_size - ctx->xlsr_offset, ctx); if (retval != READSTAT_OK) goto cleanup; // Pass 1 -- find the XLSR entries for (i=SAS_CATALOG_USELESS_PAGES; ipage_count; i++) { if (io->seek(ctx->header_size+i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(&page[16], "XLSR", sizeof("XLSR")-1) == 0) { retval = sas7bcat_augment_index(&page[16], ctx->page_size - 16, ctx); if (retval != READSTAT_OK) goto cleanup; } } sas7bcat_sort_index(ctx); sas7bcat_uniq_index(ctx); // Pass 2 -- look up the individual block pointers for (i=0; iblock_pointers_used; i++) { int start_page = ctx->block_pointers[i] >> 32; int start_page_pos = (ctx->block_pointers[i]) & 0xFFFF; int buffer_len = sas7bcat_block_size(start_page, start_page_pos, ctx, &retval); if (buffer_len == -1) { goto cleanup; } else if (buffer_len == 0) { continue; } if ((buffer = readstat_realloc(buffer, buffer_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((retval = sas7bcat_read_block(buffer, buffer_len, start_page, start_page_pos, ctx)) != READSTAT_OK) goto cleanup; if ((retval = sas7bcat_parse_block(buffer, buffer_len, ctx)) != READSTAT_OK) goto cleanup; } cleanup: io->close(io->io_ctx); if (page) free(page); if (buffer) free(buffer); if (ctx) sas7bcat_ctx_free(ctx); if (hinfo) free(hinfo); return retval; } ReadStat-1.1.7/src/sas/readstat_sas7bcat_write.c000066400000000000000000000146151410722155500215710ustar00rootroot00000000000000 #include #include #include #include #include "../readstat.h" #include "../readstat_writer.h" #include "readstat_sas.h" #include "readstat_sas_rle.h" typedef struct sas7bcat_block_s { size_t len; char data[1]; // Flexible array; use [1] for C++-98 compatibility } sas7bcat_block_t; static sas7bcat_block_t *sas7bcat_block_for_label_set(readstat_label_set_t *r_label_set) { size_t len = 0; size_t name_len = strlen(r_label_set->name); int j; char name[32]; len += 106; if (name_len > 8) { len += 32; // long name if (name_len > 32) { name_len = 32; } } memcpy(&name[0], r_label_set->name, name_len); for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); len += 30; // Value: 14-byte header + 16-byte padded value len += 8 + 2 + value_label->label_len + 1; } sas7bcat_block_t *block = calloc(1, sizeof(sas7bcat_block_t) + len); block->len = len; off_t begin = 106; int32_t count = r_label_set->value_labels_count; memcpy(&block->data[38], &count, sizeof(int32_t)); memcpy(&block->data[42], &count, sizeof(int32_t)); if (name_len > 8) { block->data[2] = (char)0x80; memcpy(&block->data[8], name, 8); memset(&block->data[106], ' ', 32); memcpy(&block->data[106], name, name_len); begin += 32; } else { memset(&block->data[8], ' ', 8); memcpy(&block->data[8], name, name_len); } char *lbp1 = &block->data[begin]; char *lbp2 = &block->data[begin+r_label_set->value_labels_count*30]; for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); lbp1[2] = 24; // size - 6 int32_t index = j; memcpy(&lbp1[10], &index, sizeof(int32_t)); if (r_label_set->type == READSTAT_TYPE_STRING) { size_t string_len = value_label->string_key_len; if (string_len > 16) string_len = 16; memset(&lbp1[14], ' ', 16); memcpy(&lbp1[14], value_label->string_key, string_len); } else { uint64_t big_endian_value; double double_value = -1.0 * value_label->double_key; memcpy(&big_endian_value, &double_value, sizeof(double)); if (machine_is_little_endian()) { big_endian_value = byteswap8(big_endian_value); } memcpy(&lbp1[22], &big_endian_value, sizeof(uint64_t)); } int16_t label_len = value_label->label_len; memcpy(&lbp2[8], &label_len, sizeof(int16_t)); memcpy(&lbp2[10], value_label->label, label_len); lbp1 += 30; lbp2 += 8 + 2 + value_label->label_len + 1; } return block; } static readstat_error_t sas7bcat_emit_header(readstat_writer_t *writer, sas_header_info_t *hinfo) { sas_header_start_t header_start = { .a2 = hinfo->u64 ? SAS_ALIGNMENT_OFFSET_4 : SAS_ALIGNMENT_OFFSET_0, .a1 = SAS_ALIGNMENT_OFFSET_0, .endian = machine_is_little_endian() ? SAS_ENDIAN_LITTLE : SAS_ENDIAN_BIG, .file_format = SAS_FILE_FORMAT_UNIX, .encoding = 20, /* UTF-8 */ .file_type = "SAS FILE", .file_info = "CATALOG " }; memcpy(&header_start.magic, sas7bcat_magic_number, sizeof(header_start.magic)); return sas_write_header(writer, hinfo, header_start); } static readstat_error_t sas7bcat_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t retval = READSTAT_OK; int i; sas_header_info_t *hinfo = sas_header_info_init(writer, 0); sas7bcat_block_t **blocks = malloc(writer->label_sets_count * sizeof(sas7bcat_block_t)); char *page = malloc(hinfo->page_size); for (i=0; ilabel_sets_count; i++) { blocks[i] = sas7bcat_block_for_label_set(writer->label_sets[i]); } hinfo->page_count = 4; // Header retval = sas7bcat_emit_header(writer, hinfo); if (retval != READSTAT_OK) goto cleanup; // Page 0 retval = readstat_write_zeros(writer, hinfo->page_size); if (retval != READSTAT_OK) goto cleanup; memset(page, '\0', hinfo->page_size); // Page 1 char *xlsr = &page[856]; int16_t block_idx, block_off; block_idx = 4; block_off = 16; for (i=0; ilabel_sets_count; i++) { if (xlsr + 212 > page + hinfo->page_size) break; memcpy(&xlsr[0], "XLSR", 4); memcpy(&xlsr[4], &block_idx, sizeof(int16_t)); memcpy(&xlsr[8], &block_off, sizeof(int16_t)); xlsr[50] = 'O'; block_off += blocks[i]->len; xlsr += 212; } retval = readstat_write_bytes(writer, page, hinfo->page_size); if (retval != READSTAT_OK) goto cleanup; // Page 2 retval = readstat_write_zeros(writer, hinfo->page_size); if (retval != READSTAT_OK) goto cleanup; // Page 3 memset(page, '\0', hinfo->page_size); char block_header[16]; block_off = 16; for (i=0; ilabel_sets_count; i++) { if (block_off + sizeof(block_header) + blocks[i]->len > hinfo->page_size) break; memset(block_header, '\0', sizeof(block_header)); int32_t next_page = 0; int16_t next_off = 0; int16_t block_len = blocks[i]->len; memcpy(&block_header[0], &next_page, sizeof(int32_t)); memcpy(&block_header[4], &next_off, sizeof(int16_t)); memcpy(&block_header[6], &block_len, sizeof(int16_t)); memcpy(&page[block_off], block_header, sizeof(block_header)); block_off += sizeof(block_header); memcpy(&page[block_off], blocks[i]->data, blocks[i]->len); block_off += blocks[i]->len; } retval = readstat_write_bytes(writer, page, hinfo->page_size); if (retval != READSTAT_OK) goto cleanup; cleanup: for (i=0; ilabel_sets_count; i++) { free(blocks[i]); } free(blocks); free(hinfo); free(page); return retval; } readstat_error_t readstat_begin_writing_sas7bcat(readstat_writer_t *writer, void *user_ctx) { if (writer->version == 0) writer->version = SAS_DEFAULT_FILE_VERSION; writer->callbacks.begin_data = &sas7bcat_begin_data; return readstat_begin_writing_file(writer, user_ctx, 0); } ReadStat-1.1.7/src/sas/readstat_sas7bdat_read.c000066400000000000000000001325131410722155500213510ustar00rootroot00000000000000 #include #include #include #include #include #include #include "readstat_sas.h" #include "readstat_sas_rle.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" typedef struct col_info_s { sas_text_ref_t name_ref; sas_text_ref_t format_ref; sas_text_ref_t label_ref; int index; uint64_t offset; uint32_t width; int type; int format_len; } col_info_t; typedef struct subheader_pointer_s { uint64_t offset; uint64_t len; unsigned char compression; unsigned char is_compressed_data; } subheader_pointer_t; typedef struct sas7bdat_ctx_s { readstat_callbacks_t handle; int64_t file_size; int little_endian; int u64; int vendor; void *user_ctx; readstat_io_t *io; int bswap; int did_submit_columns; uint32_t row_length; uint32_t page_row_count; uint32_t parsed_row_count; uint32_t column_count; uint32_t row_limit; uint32_t row_offset; uint64_t header_size; uint64_t page_count; uint64_t page_size; char *page; char *row; uint64_t page_header_size; uint64_t subheader_signature_size; uint64_t subheader_pointer_size; int text_blob_count; size_t *text_blob_lengths; char **text_blobs; int col_names_count; int col_attrs_count; int col_formats_count; size_t max_col_width; char *scratch_buffer; size_t scratch_buffer_len; int col_info_count; col_info_t *col_info; readstat_variable_t **variables; const char *input_encoding; const char *output_encoding; iconv_t converter; time_t ctime; time_t mtime; int version; char table_name[4*32+1]; char file_label[4*256+1]; char error_buf[2048]; unsigned int rdc_compression:1; } sas7bdat_ctx_t; static void sas7bdat_ctx_free(sas7bdat_ctx_t *ctx) { int i; if (ctx->text_blobs) { for (i=0; itext_blob_count; i++) { free(ctx->text_blobs[i]); } free(ctx->text_blobs); free(ctx->text_blob_lengths); } if (ctx->variables) { for (i=0; icolumn_count; i++) { if (ctx->variables[i]) free(ctx->variables[i]); } free(ctx->variables); } if (ctx->col_info) free(ctx->col_info); if (ctx->scratch_buffer) free(ctx->scratch_buffer); if (ctx->page) free(ctx->page); if (ctx->row) free(ctx->row); if (ctx->converter) iconv_close(ctx->converter); free(ctx); } static readstat_error_t sas7bdat_update_progress(sas7bdat_ctx_t *ctx) { readstat_io_t *io = ctx->io; return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx); } static sas_text_ref_t sas7bdat_parse_text_ref(const char *data, sas7bdat_ctx_t *ctx) { sas_text_ref_t ref; ref.index = sas_read2(&data[0], ctx->bswap); ref.offset = sas_read2(&data[2], ctx->bswap); ref.length = sas_read2(&data[4], ctx->bswap); return ref; } static readstat_error_t sas7bdat_copy_text_ref(char *out_buffer, size_t out_buffer_len, sas_text_ref_t text_ref, sas7bdat_ctx_t *ctx) { if (text_ref.index >= ctx->text_blob_count) return READSTAT_ERROR_PARSE; if (text_ref.length == 0) { out_buffer[0] = '\0'; return READSTAT_OK; } char *blob = ctx->text_blobs[text_ref.index]; if (text_ref.offset + text_ref.length > ctx->text_blob_lengths[text_ref.index]) return READSTAT_ERROR_PARSE; return readstat_convert(out_buffer, out_buffer_len, &blob[text_ref.offset], text_ref.length, ctx->converter); } static readstat_error_t sas7bdat_parse_column_text_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->subheader_signature_size; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); char *blob = NULL; if (remainder != sas_subheader_remainder(len, signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->text_blob_count++; ctx->text_blobs = readstat_realloc(ctx->text_blobs, ctx->text_blob_count * sizeof(char *)); ctx->text_blob_lengths = readstat_realloc(ctx->text_blob_lengths, ctx->text_blob_count * sizeof(ctx->text_blob_lengths[0])); if (ctx->text_blobs == NULL || ctx->text_blob_lengths == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((blob = readstat_malloc(len-signature_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } memcpy(blob, subheader+signature_len, len-signature_len); ctx->text_blob_lengths[ctx->text_blob_count-1] = len-signature_len; ctx->text_blobs[ctx->text_blob_count-1] = blob; cleanup: return retval; } static readstat_error_t sas7bdat_realloc_col_info(sas7bdat_ctx_t *ctx, size_t count) { if (ctx->col_info_count < count) { size_t old_count = ctx->col_info_count; ctx->col_info_count = count; ctx->col_info = readstat_realloc(ctx->col_info, ctx->col_info_count * sizeof(col_info_t)); if (ctx->col_info == NULL) { return READSTAT_ERROR_MALLOC; } memset(ctx->col_info + old_count, 0, (count - old_count) * sizeof(col_info_t)); } return READSTAT_OK; } static readstat_error_t sas7bdat_parse_column_size_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { uint64_t col_count; readstat_error_t retval = READSTAT_OK; if (ctx->column_count || ctx->did_submit_columns) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (len < (ctx->u64 ? 16 : 8)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->u64) { col_count = sas_read8(&subheader[8], ctx->bswap); } else { col_count = sas_read4(&subheader[4], ctx->bswap); } ctx->column_count = col_count; retval = sas7bdat_realloc_col_info(ctx, ctx->column_count); cleanup: return retval; } static readstat_error_t sas7bdat_parse_row_size_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; uint64_t total_row_count; uint64_t row_length, page_row_count; if (len < (ctx->u64 ? 250: 190)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->u64) { row_length = sas_read8(&subheader[40], ctx->bswap); total_row_count = sas_read8(&subheader[48], ctx->bswap); page_row_count = sas_read8(&subheader[120], ctx->bswap); } else { row_length = sas_read4(&subheader[20], ctx->bswap); total_row_count = sas_read4(&subheader[24], ctx->bswap); page_row_count = sas_read4(&subheader[60], ctx->bswap); } sas_text_ref_t file_label_ref = sas7bdat_parse_text_ref(&subheader[len-130], ctx); if (file_label_ref.length) { if ((retval = sas7bdat_copy_text_ref(ctx->file_label, sizeof(ctx->file_label), file_label_ref, ctx)) != READSTAT_OK) { goto cleanup; } } sas_text_ref_t compression_ref = sas7bdat_parse_text_ref(&subheader[len-118], ctx); if (compression_ref.length) { char compression[9]; if ((retval = sas7bdat_copy_text_ref(compression, sizeof(compression), compression_ref, ctx)) != READSTAT_OK) { goto cleanup; } ctx->rdc_compression = (memcmp(compression, SAS_COMPRESSION_SIGNATURE_RDC, 8) == 0); } ctx->row_length = row_length; ctx->row = readstat_realloc(ctx->row, ctx->row_length); if (ctx->row == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } ctx->page_row_count = page_row_count; uint64_t total_row_count_after_skipping = total_row_count; if (total_row_count > ctx->row_offset) { total_row_count_after_skipping -= ctx->row_offset; } else { total_row_count_after_skipping = 0; ctx->row_offset = total_row_count; } if (ctx->row_limit == 0 || total_row_count_after_skipping < ctx->row_limit) ctx->row_limit = total_row_count_after_skipping; cleanup: return retval; } static readstat_error_t sas7bdat_parse_column_name_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->subheader_signature_size; int cmax = ctx->u64 ? (len-28)/8 : (len-20)/8; int i; const char *cnp = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); if (remainder != sas_subheader_remainder(len, signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_names_count += cmax; if ((retval = sas7bdat_realloc_col_info(ctx, ctx->col_names_count)) != READSTAT_OK) goto cleanup; for (i=ctx->col_names_count-cmax; icol_names_count; i++) { ctx->col_info[i].name_ref = sas7bdat_parse_text_ref(cnp, ctx); cnp += 8; } cleanup: return retval; } static readstat_error_t sas7bdat_parse_column_attributes_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t signature_len = ctx->subheader_signature_size; int cmax = ctx->u64 ? (len-28)/16 : (len-20)/12; int i; const char *cap = &subheader[signature_len+8]; uint16_t remainder = sas_read2(&subheader[signature_len], ctx->bswap); if (remainder != sas_subheader_remainder(len, signature_len)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_attrs_count += cmax; if ((retval = sas7bdat_realloc_col_info(ctx, ctx->col_attrs_count)) != READSTAT_OK) goto cleanup; for (i=ctx->col_attrs_count-cmax; icol_attrs_count; i++) { if (ctx->u64) { ctx->col_info[i].offset = sas_read8(&cap[0], ctx->bswap); } else { ctx->col_info[i].offset = sas_read4(&cap[0], ctx->bswap); } readstat_off_t off=4; if (ctx->u64) off=8; ctx->col_info[i].width = sas_read4(&cap[off], ctx->bswap); if (ctx->col_info[i].width > ctx->max_col_width) ctx->max_col_width = ctx->col_info[i].width; if (cap[off+6] == SAS_COLUMN_TYPE_NUM) { ctx->col_info[i].type = READSTAT_TYPE_DOUBLE; } else if (cap[off+6] == SAS_COLUMN_TYPE_CHR) { ctx->col_info[i].type = READSTAT_TYPE_STRING; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_info[i].index = i; cap += off+8; } cleanup: return retval; } static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if (len < (ctx->u64 ? 58 : 46)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->col_formats_count++; if ((retval = sas7bdat_realloc_col_info(ctx, ctx->col_formats_count)) != READSTAT_OK) goto cleanup; if (ctx->u64) ctx->col_info[ctx->col_formats_count-1].format_len = sas_read2(&subheader[24], ctx->bswap); ctx->col_info[ctx->col_formats_count-1].format_ref = sas7bdat_parse_text_ref( ctx->u64 ? &subheader[46] : &subheader[34], ctx); ctx->col_info[ctx->col_formats_count-1].label_ref = sas7bdat_parse_text_ref( ctx->u64 ? &subheader[52] : &subheader[40], ctx); cleanup: return retval; } static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable, col_info_t *col_info, const char *col_data, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int cb_retval = 0; readstat_value_t value; memset(&value, 0, sizeof(readstat_value_t)); value.type = col_info->type; if (col_info->type == READSTAT_TYPE_STRING) { retval = readstat_convert(ctx->scratch_buffer, ctx->scratch_buffer_len, col_data, col_info->width, ctx->converter); if (retval != READSTAT_OK) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Error converting string (row=%u, col=%u) to specified encoding: %.*s", ctx->parsed_row_count+1, col_info->index+1, col_info->width, col_data); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } value.v.string_value = ctx->scratch_buffer; } else if (col_info->type == READSTAT_TYPE_DOUBLE) { uint64_t val = 0; double dval = NAN; if (ctx->little_endian) { int k; for (k=0; kwidth; k++) { val = (val << 8) | (unsigned char)col_data[col_info->width-1-k]; } } else { int k; for (k=0; kwidth; k++) { val = (val << 8) | (unsigned char)col_data[k]; } } val <<= (8-col_info->width)*8; memcpy(&dval, &val, 8); if (isnan(dval)) { value.v.double_value = NAN; sas_assign_tag(&value, ~((val >> 40) & 0xFF)); } else { value.v.double_value = dval; } } cb_retval = ctx->handle.value(ctx->parsed_row_count, variable, value, ctx->user_ctx); if (cb_retval != READSTAT_HANDLER_OK) retval = READSTAT_ERROR_USER_ABORT; cleanup: return retval; } static readstat_error_t sas7bdat_parse_single_row(const char *data, sas7bdat_ctx_t *ctx) { if (ctx->parsed_row_count == ctx->row_limit) return READSTAT_OK; if (ctx->row_offset) { ctx->row_offset--; return READSTAT_OK; } readstat_error_t retval = READSTAT_OK; int j; if (ctx->handle.value) { ctx->scratch_buffer_len = 4*ctx->max_col_width+1; ctx->scratch_buffer = readstat_realloc(ctx->scratch_buffer, ctx->scratch_buffer_len); if (ctx->scratch_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (j=0; jcolumn_count; j++) { col_info_t *col_info = &ctx->col_info[j]; readstat_variable_t *variable = ctx->variables[j]; if (variable->skip) continue; if (col_info->offset > ctx->row_length || col_info->offset + col_info->width > ctx->row_length) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = sas7bdat_handle_data_value(variable, col_info, &data[col_info->offset], ctx); if (retval != READSTAT_OK) { goto cleanup; } } } ctx->parsed_row_count++; cleanup: return retval; } static readstat_error_t sas7bdat_parse_rows(const char *data, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; size_t row_offset=0; for (i=0; ipage_row_count && ctx->parsed_row_count < ctx->row_limit; i++) { if (row_offset + ctx->row_length > len) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } if ((retval = sas7bdat_parse_single_row(&data[row_offset], ctx)) != READSTAT_OK) goto cleanup; row_offset += ctx->row_length; } cleanup: return retval; } static readstat_error_t sas7bdat_parse_subheader_rdc(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; const unsigned char *input = (const unsigned char *)subheader; char *buffer = malloc(ctx->row_length); char *output = buffer; while (input + 2 <= (const unsigned char *)subheader + len) { int i; unsigned short prefix = (input[0] << 8) + input[1]; input += 2; for (i=0; i<16; i++) { if ((prefix & (1 << (15 - i))) == 0) { if (input + 1 > (const unsigned char *)subheader + len) { break; } if (output + 1 > buffer + ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } *output++ = *input++; continue; } if (input + 2 > (const unsigned char *)subheader + len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } unsigned char marker_byte = *input++; unsigned char next_byte = *input++; size_t insert_len = 0, copy_len = 0; unsigned char insert_byte = 0x00; size_t back_offset = 0; if (marker_byte <= 0x0F) { insert_len = 3 + marker_byte; insert_byte = next_byte; } else if ((marker_byte >> 4) == 1) { if (input + 1 > (const unsigned char *)subheader + len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } insert_len = 19 + (marker_byte & 0x0F) + next_byte * 16; insert_byte = *input++; } else if ((marker_byte >> 4) == 2) { if (input + 1 > (const unsigned char *)subheader + len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } copy_len = 16 + (*input++); back_offset = 3 + (marker_byte & 0x0F) + next_byte * 16; } else { copy_len = (marker_byte >> 4); back_offset = 3 + (marker_byte & 0x0F) + next_byte * 16; } if (insert_len) { if (output + insert_len > buffer + ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } memset(output, insert_byte, insert_len); output += insert_len; } else if (copy_len) { if (output - buffer < back_offset || copy_len > back_offset) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (output + copy_len > buffer + ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } memcpy(output, output - back_offset, copy_len); output += copy_len; } } } if (output - buffer != ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } retval = sas7bdat_parse_single_row(buffer, ctx); cleanup: free(buffer); return retval; } static readstat_error_t sas7bdat_parse_subheader_rle(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { if (ctx->row_limit == ctx->parsed_row_count) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; ssize_t bytes_decompressed = 0; bytes_decompressed = sas_rle_decompress(ctx->row, ctx->row_length, subheader, len); if (bytes_decompressed != ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Row #%d decompressed to %ld bytes (expected %d bytes)", ctx->parsed_row_count, (long)(bytes_decompressed), ctx->row_length); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } retval = sas7bdat_parse_single_row(ctx->row, ctx); cleanup: return retval; } static readstat_error_t sas7bdat_parse_subheader_compressed(const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { if (ctx->rdc_compression) return sas7bdat_parse_subheader_rdc(subheader, len, ctx); return sas7bdat_parse_subheader_rle(subheader, len, ctx); } static readstat_error_t sas7bdat_parse_subheader(uint32_t signature, const char *subheader, size_t len, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if (len < 2 + ctx->subheader_signature_size) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (signature == SAS_SUBHEADER_SIGNATURE_ROW_SIZE) { retval = sas7bdat_parse_row_size_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE) { retval = sas7bdat_parse_column_size_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COUNTS) { /* void */ } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT) { retval = sas7bdat_parse_column_text_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_NAME) { retval = sas7bdat_parse_column_name_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS) { retval = sas7bdat_parse_column_attributes_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT) { retval = sas7bdat_parse_column_format_subheader(subheader, len, ctx); } else if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_LIST) { /* void */ } else if ((signature & SAS_SUBHEADER_SIGNATURE_COLUMN_MASK) == SAS_SUBHEADER_SIGNATURE_COLUMN_MASK) { /* void */ } else { retval = READSTAT_ERROR_PARSE; } cleanup: return retval; } static readstat_error_t sas7bdat_validate_column(col_info_t *col_info) { if (col_info->type == READSTAT_TYPE_DOUBLE) { if (col_info->width > 8 || col_info->width < 3) { return READSTAT_ERROR_PARSE; } } if (col_info->type == READSTAT_TYPE_STRING) { if (col_info->width > INT16_MAX || col_info->width == 0) { return READSTAT_ERROR_PARSE; } } return READSTAT_OK; } static readstat_variable_t *sas7bdat_init_variable(sas7bdat_ctx_t *ctx, int i, int index_after_skipping, readstat_error_t *out_retval) { readstat_error_t retval = READSTAT_OK; readstat_variable_t *variable = readstat_calloc(1, sizeof(readstat_variable_t)); variable->index = i; variable->index_after_skipping = index_after_skipping; variable->type = ctx->col_info[i].type; variable->storage_width = ctx->col_info[i].width; if ((retval = sas7bdat_validate_column(&ctx->col_info[i])) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_copy_text_ref(variable->name, sizeof(variable->name), ctx->col_info[i].name_ref, ctx)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_copy_text_ref(variable->format, sizeof(variable->format), ctx->col_info[i].format_ref, ctx)) != READSTAT_OK) { goto cleanup; } size_t len = strlen(variable->format); if (len && ctx->col_info[i].format_len) { snprintf(variable->format + len, sizeof(variable->format) - len, "%d", ctx->col_info[i].format_len); } if ((retval = sas7bdat_copy_text_ref(variable->label, sizeof(variable->label), ctx->col_info[i].label_ref, ctx)) != READSTAT_OK) { goto cleanup; } cleanup: if (retval != READSTAT_OK) { free(variable); if (out_retval) *out_retval = retval; if (retval == READSTAT_ERROR_CONVERT_BAD_STRING) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Error converting variable #%d info to specified encoding: %s %s (%s)", i, variable->name, variable->format, variable->label); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } } return NULL; } return variable; } static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compressed) { readstat_error_t retval = READSTAT_OK; if (ctx->handle.metadata) { readstat_metadata_t metadata = { .row_count = ctx->row_limit, .var_count = ctx->column_count, .table_name = ctx->table_name, .file_label = ctx->file_label, .file_encoding = ctx->input_encoding, /* orig encoding? */ .creation_time = ctx->ctime, .modified_time = ctx->mtime, .file_format_version = ctx->version, .compression = READSTAT_COMPRESS_NONE, .endianness = ctx->little_endian ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG, .is64bit = ctx->u64 }; if (compressed) { if (ctx->rdc_compression) { metadata.compression = READSTAT_COMPRESS_BINARY; } else { metadata.compression = READSTAT_COMPRESS_ROWS; } } if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (ctx->column_count == 0) goto cleanup; if ((ctx->variables = readstat_calloc(ctx->column_count, sizeof(readstat_variable_t *))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } int i; int index_after_skipping = 0; for (i=0; icolumn_count; i++) { ctx->variables[i] = sas7bdat_init_variable(ctx, i, index_after_skipping, &retval); if (ctx->variables[i] == NULL) break; int cb_retval = READSTAT_HANDLER_OK; if (ctx->handle.variable) { cb_retval = ctx->handle.variable(i, ctx->variables[i], ctx->variables[i]->format, ctx->user_ctx); } if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { ctx->variables[i]->skip = 1; } else { index_after_skipping++; } } cleanup: return retval; } static readstat_error_t sas7bdat_submit_columns_if_needed(sas7bdat_ctx_t *ctx, int compressed) { readstat_error_t retval = READSTAT_OK; if (!ctx->did_submit_columns) { if ((retval = sas7bdat_submit_columns(ctx, compressed)) != READSTAT_OK) { goto cleanup; } ctx->did_submit_columns = 1; } cleanup: return retval; } static int sas7bdat_signature_is_recognized(uint32_t signature) { return (signature == SAS_SUBHEADER_SIGNATURE_ROW_SIZE || signature == SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE || signature == SAS_SUBHEADER_SIGNATURE_COUNTS || signature == SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT || (signature & SAS_SUBHEADER_SIGNATURE_COLUMN_MASK) == SAS_SUBHEADER_SIGNATURE_COLUMN_MASK); } static readstat_error_t sas7bdat_parse_subheader_pointer(const char *shp, size_t shp_size, subheader_pointer_t *info, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if (ctx->u64) { if (shp_size <= 17) { retval = READSTAT_ERROR_PARSE; goto cleanup; } info->offset = sas_read8(&shp[0], ctx->bswap); info->len = sas_read8(&shp[8], ctx->bswap); info->compression = shp[16]; info->is_compressed_data = shp[17]; } else { if (shp_size <= 9) { retval = READSTAT_ERROR_PARSE; goto cleanup; } info->offset = sas_read4(&shp[0], ctx->bswap); info->len = sas_read4(&shp[4], ctx->bswap); info->compression = shp[8]; info->is_compressed_data = shp[9]; } cleanup: return retval; } static readstat_error_t sas7bdat_validate_subheader_pointer(subheader_pointer_t *shp_info, size_t page_size, uint16_t subheader_count, sas7bdat_ctx_t *ctx) { if (shp_info->offset > page_size) return READSTAT_ERROR_PARSE; if (shp_info->len > page_size) return READSTAT_ERROR_PARSE; if (shp_info->offset + shp_info->len > page_size) return READSTAT_ERROR_PARSE; if (shp_info->offset < ctx->page_header_size + subheader_count*ctx->subheader_pointer_size) return READSTAT_ERROR_PARSE; if (shp_info->compression == SAS_COMPRESSION_NONE) { if (shp_info->len < ctx->subheader_signature_size) return READSTAT_ERROR_PARSE; if (shp_info->offset + ctx->subheader_signature_size > page_size) return READSTAT_ERROR_PARSE; } return READSTAT_OK; } /* First, extract column text */ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_size, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; uint16_t subheader_count = sas_read2(&page[ctx->page_header_size-4], ctx->bswap); int i; const char *shp = &page[ctx->page_header_size]; int lshp = ctx->subheader_pointer_size; if (ctx->page_header_size + subheader_count*lshp > page_size) { retval = READSTAT_ERROR_PARSE; goto cleanup; } for (i=0; isubheader_signature_size; if ((retval = sas7bdat_parse_subheader_pointer(shp, page + page_size - shp, &shp_info, ctx)) != READSTAT_OK) { goto cleanup; } if (shp_info.len > 0 && shp_info.compression != SAS_COMPRESSION_TRUNC) { if ((retval = sas7bdat_validate_subheader_pointer(&shp_info, page_size, subheader_count, ctx)) != READSTAT_OK) { goto cleanup; } if (shp_info.compression == SAS_COMPRESSION_NONE) { signature = sas_read4(page + shp_info.offset, ctx->bswap); if (!ctx->little_endian && signature == -1 && signature_len == 8) { signature = sas_read4(page + shp_info.offset + 4, ctx->bswap); } if (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT) { if ((retval = sas7bdat_parse_subheader(signature, page + shp_info.offset, shp_info.len, ctx)) != READSTAT_OK) { goto cleanup; } } } else if (shp_info.compression == SAS_COMPRESSION_ROW) { /* void */ } else { retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; } } shp += lshp; } cleanup: return retval; } static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_size, sas7bdat_ctx_t *ctx) { uint16_t page_type; readstat_error_t retval = READSTAT_OK; page_type = sas_read2(&page[ctx->page_header_size-8], ctx->bswap); const char *data = NULL; if ((page_type & SAS_PAGE_TYPE_MASK) == SAS_PAGE_TYPE_DATA) { ctx->page_row_count = sas_read2(&page[ctx->page_header_size-6], ctx->bswap); data = &page[ctx->page_header_size]; } else if (!(page_type & SAS_PAGE_TYPE_COMP)) { uint16_t subheader_count = sas_read2(&page[ctx->page_header_size-4], ctx->bswap); int i; const char *shp = &page[ctx->page_header_size]; int lshp = ctx->subheader_pointer_size; if (ctx->page_header_size + subheader_count*lshp > page_size) { retval = READSTAT_ERROR_PARSE; goto cleanup; } for (i=0; i 0 && shp_info.compression != SAS_COMPRESSION_TRUNC) { if ((retval = sas7bdat_validate_subheader_pointer(&shp_info, page_size, subheader_count, ctx)) != READSTAT_OK) { goto cleanup; } if (shp_info.compression == SAS_COMPRESSION_NONE) { signature = sas_read4(page + shp_info.offset, ctx->bswap); if (!ctx->little_endian && signature == -1 && ctx->u64) { signature = sas_read4(page + shp_info.offset + 4, ctx->bswap); } if (shp_info.is_compressed_data && !sas7bdat_signature_is_recognized(signature)) { if (shp_info.len != ctx->row_length) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } if ((retval = sas7bdat_submit_columns_if_needed(ctx, 1)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_parse_single_row(page + shp_info.offset, ctx)) != READSTAT_OK) { goto cleanup; } } else { if (signature != SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT) { if ((retval = sas7bdat_parse_subheader(signature, page + shp_info.offset, shp_info.len, ctx)) != READSTAT_OK) { goto cleanup; } } } } else if (shp_info.compression == SAS_COMPRESSION_ROW) { if ((retval = sas7bdat_submit_columns_if_needed(ctx, 1)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_parse_subheader_compressed(page + shp_info.offset, shp_info.len, ctx)) != READSTAT_OK) { goto cleanup; } } else { retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; } } shp += lshp; } if ((page_type & SAS_PAGE_TYPE_MASK) == SAS_PAGE_TYPE_MIX) { /* HACK - this is supposed to obey 8-byte boundaries but * some files created by Stat/Transfer don't. So verify that the * padding is { 0, 0, 0, 0 } or { ' ', ' ', ' ', ' ' } (or that * the file is not from Stat/Transfer) before skipping it */ if ((shp-page)%8 == 4 && shp + 4 <= page + page_size && (*(uint32_t *)shp == 0x00000000 || *(uint32_t *)shp == 0x20202020 || ctx->vendor != READSTAT_VENDOR_STAT_TRANSFER)) { data = shp + 4; } else { data = shp; } } } if (data) { if ((retval = sas7bdat_submit_columns_if_needed(ctx, 0)) != READSTAT_OK) { goto cleanup; } if (ctx->handle.value) { retval = sas7bdat_parse_rows(data, page + page_size - data, ctx); } } cleanup: return retval; } static readstat_error_t sas7bdat_parse_meta_pages_pass1(sas7bdat_ctx_t *ctx, int64_t *outLastExaminedPage) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int64_t i; /* look for META and MIX pages at beginning... */ for (i=0; ipage_count; i++) { if (io->seek(ctx->header_size + i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to position %" PRId64 " (= %" PRId64 " + %" PRId64 "*%" PRId64 ")", ctx->header_size + i*ctx->page_size, ctx->header_size, i, ctx->page_size); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } readstat_off_t off = 0; if (ctx->u64) off = 16; size_t head_len = off + 16 + 2; size_t tail_len = ctx->page_size - head_len; if (io->read(ctx->page, head_len, io->io_ctx) < head_len) { retval = READSTAT_ERROR_READ; goto cleanup; } uint16_t page_type = sas_read2(&ctx->page[off+16], ctx->bswap); if ((page_type & SAS_PAGE_TYPE_MASK) == SAS_PAGE_TYPE_DATA) break; if ((page_type & SAS_PAGE_TYPE_COMP)) continue; if (io->read(ctx->page + head_len, tail_len, io->io_ctx) < tail_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = sas7bdat_parse_page_pass1(ctx->page, ctx->page_size, ctx)) != READSTAT_OK) { if (ctx->handle.error && retval != READSTAT_ERROR_USER_ABORT) { int64_t pos = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx); snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Error parsing page %" PRId64 ", bytes %" PRId64 "-%" PRId64, i, pos - ctx->page_size, pos-1); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } } cleanup: if (outLastExaminedPage) *outLastExaminedPage = i; return retval; } static readstat_error_t sas7bdat_parse_amd_pages_pass1(int64_t last_examined_page_pass1, sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; uint64_t i; uint64_t amd_page_count = 0; /* ...then AMD pages at the end */ for (i=ctx->page_count-1; i>last_examined_page_pass1; i--) { if (io->seek(ctx->header_size + i*ctx->page_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to position %" PRId64 " (= %" PRId64 " + %" PRId64 "*%" PRId64 ")", ctx->header_size + i*ctx->page_size, ctx->header_size, i, ctx->page_size); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } readstat_off_t off = 0; if (ctx->u64) off = 16; size_t head_len = off + 16 + 2; size_t tail_len = ctx->page_size - head_len; if (io->read(ctx->page, head_len, io->io_ctx) < head_len) { retval = READSTAT_ERROR_READ; goto cleanup; } uint16_t page_type = sas_read2(&ctx->page[off+16], ctx->bswap); if ((page_type & SAS_PAGE_TYPE_MASK) == SAS_PAGE_TYPE_DATA) { /* Usually AMD pages are at the end but sometimes data pages appear after them */ if (amd_page_count > 0) break; continue; } if ((page_type & SAS_PAGE_TYPE_COMP)) continue; if (io->read(ctx->page + head_len, tail_len, io->io_ctx) < tail_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = sas7bdat_parse_page_pass1(ctx->page, ctx->page_size, ctx)) != READSTAT_OK) { if (ctx->handle.error && retval != READSTAT_ERROR_USER_ABORT) { int64_t pos = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx); snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Error parsing page %" PRId64 ", bytes %" PRId64 "-%" PRId64, i, pos - ctx->page_size, pos-1); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } amd_page_count++; } cleanup: return retval; } static readstat_error_t sas7bdat_parse_all_pages_pass2(sas7bdat_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int64_t i; for (i=0; ipage_count; i++) { if ((retval = sas7bdat_update_progress(ctx)) != READSTAT_OK) { goto cleanup; } if (io->read(ctx->page, ctx->page_size, io->io_ctx) < ctx->page_size) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = sas7bdat_parse_page_pass2(ctx->page, ctx->page_size, ctx)) != READSTAT_OK) { if (ctx->handle.error && retval != READSTAT_ERROR_USER_ABORT) { int64_t pos = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx); snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Error parsing page %" PRId64 ", bytes %" PRId64 "-%" PRId64, i, pos - ctx->page_size, pos-1); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } if (ctx->parsed_row_count == ctx->row_limit) break; } cleanup: return retval; } readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *path, void *user_ctx) { int64_t last_examined_page_pass1 = 0; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; sas7bdat_ctx_t *ctx = calloc(1, sizeof(sas7bdat_ctx_t)); sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); ctx->handle = parser->handlers; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->io = parser->io; ctx->row_limit = parser->row_limit; if (parser->row_offset > 0) ctx->row_offset = parser->row_offset; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) { retval = READSTAT_ERROR_SEEK; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to end of file"); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to beginning of file"); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } if ((retval = sas_read_header(io, hinfo, ctx->handle.error, user_ctx)) != READSTAT_OK) { goto cleanup; } ctx->u64 = hinfo->u64; ctx->little_endian = hinfo->little_endian; ctx->vendor = hinfo->vendor; ctx->bswap = machine_is_little_endian() ^ hinfo->little_endian; ctx->header_size = hinfo->header_size; ctx->page_count = hinfo->page_count; ctx->page_size = hinfo->page_size; ctx->page_header_size = hinfo->page_header_size; ctx->subheader_pointer_size = hinfo->subheader_pointer_size; ctx->subheader_signature_size = ctx->u64 ? 8 : 4; ctx->ctime = hinfo->creation_time; ctx->mtime = hinfo->modification_time; ctx->version = hinfo->major_version; if (ctx->input_encoding == NULL) { ctx->input_encoding = hinfo->encoding; } if ((ctx->page = readstat_malloc(ctx->page_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) { iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } if ((retval = readstat_convert(ctx->table_name, sizeof(ctx->table_name), hinfo->table_name, sizeof(hinfo->table_name), ctx->converter)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_parse_meta_pages_pass1(ctx, &last_examined_page_pass1)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_parse_amd_pages_pass1(last_examined_page_pass1, ctx)) != READSTAT_OK) { goto cleanup; } if (io->seek(ctx->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Failed to seek to position %" PRId64, ctx->header_size); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } if ((retval = sas7bdat_parse_all_pages_pass2(ctx)) != READSTAT_OK) { goto cleanup; } if ((retval = sas7bdat_submit_columns_if_needed(ctx, 0)) != READSTAT_OK) { goto cleanup; } if (ctx->handle.value && ctx->parsed_row_count != ctx->row_limit) { retval = READSTAT_ERROR_ROW_COUNT_MISMATCH; if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: Expected %d rows in file, found %d", ctx->row_limit, ctx->parsed_row_count); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } goto cleanup; } if ((retval = sas7bdat_update_progress(ctx)) != READSTAT_OK) { goto cleanup; } cleanup: io->close(io->io_ctx); if (retval == READSTAT_ERROR_OPEN || retval == READSTAT_ERROR_READ || retval == READSTAT_ERROR_SEEK) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "ReadStat: %s (retval = %d): %s (errno = %d)", readstat_error_message(retval), retval, strerror(errno), errno); ctx->handle.error(ctx->error_buf, user_ctx); } } if (ctx) sas7bdat_ctx_free(ctx); if (hinfo) free(hinfo); return retval; } ReadStat-1.1.7/src/sas/readstat_sas7bdat_write.c000066400000000000000000000747561410722155500216060ustar00rootroot00000000000000 #include #include #include #include #include "../readstat.h" #include "../readstat_writer.h" #include "readstat_sas.h" #include "readstat_sas_rle.h" typedef struct sas7bdat_subheader_s { uint32_t signature; char *data; size_t len; int is_row_data; int is_row_data_compressed; } sas7bdat_subheader_t; typedef struct sas7bdat_subheader_array_s { int64_t count; int64_t capacity; sas7bdat_subheader_t **subheaders; } sas7bdat_subheader_array_t; typedef struct sas7bdat_column_text_s { char *data; size_t capacity; size_t used; int64_t index; } sas7bdat_column_text_t; typedef struct sas7bdat_column_text_array_s { int64_t count; sas7bdat_column_text_t **column_texts; } sas7bdat_column_text_array_t; typedef struct sas7bdat_write_ctx_s { sas_header_info_t *hinfo; sas7bdat_subheader_array_t *sarray; } sas7bdat_write_ctx_t; static size_t sas7bdat_variable_width(readstat_type_t type, size_t user_width); static int32_t sas7bdat_count_meta_pages(readstat_writer_t *writer) { sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx; sas_header_info_t *hinfo = ctx->hinfo; sas7bdat_subheader_array_t *sarray = ctx->sarray; int i; int pages = 1; size_t bytes_left = hinfo->page_size - hinfo->page_header_size; size_t shp_ptr_size = hinfo->subheader_pointer_size; for (i=sarray->count-1; i>=0; i--) { sas7bdat_subheader_t *subheader = sarray->subheaders[i]; if (subheader->len + shp_ptr_size > bytes_left) { bytes_left = hinfo->page_size - hinfo->page_header_size; pages++; } bytes_left -= (subheader->len + shp_ptr_size); } return pages; } static size_t sas7bdat_row_length(readstat_writer_t *writer) { int i; size_t len = 0; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); len += sas7bdat_variable_width(readstat_variable_get_type(variable), readstat_variable_get_storage_width(variable)); } return len; } static int32_t sas7bdat_rows_per_page(readstat_writer_t *writer, sas_header_info_t *hinfo) { size_t row_length = sas7bdat_row_length(writer); return (hinfo->page_size - hinfo->page_header_size) / row_length; } static int32_t sas7bdat_count_data_pages(readstat_writer_t *writer, sas_header_info_t *hinfo) { if (writer->compression == READSTAT_COMPRESS_ROWS) return 0; int32_t rows_per_page = sas7bdat_rows_per_page(writer, hinfo); return (writer->row_count + (rows_per_page - 1)) / rows_per_page; } static sas7bdat_column_text_t *sas7bdat_column_text_init(int64_t index, size_t len) { sas7bdat_column_text_t *column_text = calloc(1, sizeof(sas7bdat_column_text_t)); column_text->data = malloc(len); column_text->capacity = len; column_text->index = index; return column_text; } static void sas7bdat_column_text_free(sas7bdat_column_text_t *column_text) { free(column_text->data); free(column_text); } static void sas7bdat_column_text_array_free(sas7bdat_column_text_array_t *column_text_array) { int i; for (i=0; icount; i++) { sas7bdat_column_text_free(column_text_array->column_texts[i]); } free(column_text_array->column_texts); free(column_text_array); } static sas_text_ref_t sas7bdat_make_text_ref(sas7bdat_column_text_array_t *column_text_array, const char *string) { size_t len = strlen(string); size_t padded_len = (len + 3) / 4 * 4; sas7bdat_column_text_t *column_text = column_text_array->column_texts[ column_text_array->count-1]; if (column_text->used + padded_len > column_text->capacity) { column_text_array->count++; column_text_array->column_texts = realloc(column_text_array->column_texts, sizeof(sas7bdat_column_text_t *) * column_text_array->count); column_text = sas7bdat_column_text_init(column_text_array->count-1, column_text->capacity); column_text_array->column_texts[column_text_array->count-1] = column_text; } sas_text_ref_t text_ref = { .index = column_text->index, .offset = column_text->used + 28, .length = len }; strncpy(&column_text->data[column_text->used], string, padded_len); column_text->used += padded_len; return text_ref; } static readstat_error_t sas7bdat_emit_header(readstat_writer_t *writer, sas_header_info_t *hinfo) { sas_header_start_t header_start = { .a2 = hinfo->u64 ? SAS_ALIGNMENT_OFFSET_4 : SAS_ALIGNMENT_OFFSET_0, .a1 = SAS_ALIGNMENT_OFFSET_0, .endian = machine_is_little_endian() ? SAS_ENDIAN_LITTLE : SAS_ENDIAN_BIG, .file_format = SAS_FILE_FORMAT_UNIX, .encoding = 20, /* UTF-8 */ .file_type = "SAS FILE", .file_info = "DATA " }; memcpy(&header_start.magic, sas7bdat_magic_number, sizeof(header_start.magic)); return sas_write_header(writer, hinfo, header_start); } static sas7bdat_subheader_t *sas7bdat_subheader_init(uint32_t signature, size_t len) { sas7bdat_subheader_t *subheader = calloc(1, sizeof(sas7bdat_subheader_t)); subheader->signature = signature; subheader->len = len; subheader->data = calloc(1, len); return subheader; } static sas7bdat_subheader_t *sas7bdat_row_size_subheader_init(readstat_writer_t *writer, sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) { sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_ROW_SIZE, hinfo->u64 ? 808 : 480); if (hinfo->u64) { int64_t row_length = sas7bdat_row_length(writer); int64_t row_count = writer->row_count; int64_t ncfl1 = writer->variables_count; int64_t page_size = hinfo->page_size; memcpy(&subheader->data[40], &row_length, sizeof(int64_t)); memcpy(&subheader->data[48], &row_count, sizeof(int64_t)); memcpy(&subheader->data[72], &ncfl1, sizeof(int64_t)); memcpy(&subheader->data[104], &page_size, sizeof(int64_t)); memset(&subheader->data[128], 0xFF, 16); } else { int32_t row_length = sas7bdat_row_length(writer); int32_t row_count = writer->row_count; int32_t ncfl1 = writer->variables_count; int32_t page_size = hinfo->page_size; memcpy(&subheader->data[20], &row_length, sizeof(int32_t)); memcpy(&subheader->data[24], &row_count, sizeof(int32_t)); memcpy(&subheader->data[36], &ncfl1, sizeof(int32_t)); memcpy(&subheader->data[52], &page_size, sizeof(int32_t)); memset(&subheader->data[64], 0xFF, 8); } sas_text_ref_t text_ref = { 0 }; if (writer->file_label[0]) { text_ref = sas7bdat_make_text_ref(column_text_array, writer->file_label); memcpy(&subheader->data[subheader->len-130], &text_ref, sizeof(sas_text_ref_t)); } if (writer->compression == READSTAT_COMPRESS_ROWS) { text_ref = sas7bdat_make_text_ref(column_text_array, SAS_COMPRESSION_SIGNATURE_RLE); memcpy(&subheader->data[subheader->len-118], &text_ref, sizeof(sas_text_ref_t)); } return subheader; } static sas7bdat_subheader_t *sas7bdat_col_size_subheader_init(readstat_writer_t *writer, sas_header_info_t *hinfo) { sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_SIZE, hinfo->u64 ? 24 : 12); if (hinfo->u64) { int64_t col_count = writer->variables_count; memcpy(&subheader->data[8], &col_count, sizeof(int64_t)); } else { int32_t col_count = writer->variables_count; memcpy(&subheader->data[4], &col_count, sizeof(int32_t)); } return subheader; } static size_t sas7bdat_col_name_subheader_length(readstat_writer_t *writer, sas_header_info_t *hinfo) { return (hinfo->u64 ? 28+8*writer->variables_count : 20+8*writer->variables_count); } static sas7bdat_subheader_t *sas7bdat_col_name_subheader_init(readstat_writer_t *writer, sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) { size_t len = sas7bdat_col_name_subheader_length(writer, hinfo); size_t signature_len = hinfo->u64 ? 8 : 4; uint16_t remainder = sas_subheader_remainder(len, signature_len); sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_NAME, len); memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t)); int i; char *ptrs = &subheader->data[signature_len+8]; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); const char *name = readstat_variable_get_name(variable); sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, name); memcpy(ptrs, &text_ref, sizeof(sas_text_ref_t)); ptrs += 8; } return subheader; } static size_t sas7bdat_col_attrs_subheader_length(readstat_writer_t *writer, sas_header_info_t *hinfo) { return (hinfo->u64 ? 28+16*writer->variables_count : 20+12*writer->variables_count); } static sas7bdat_subheader_t *sas7bdat_col_attrs_subheader_init(readstat_writer_t *writer, sas_header_info_t *hinfo) { size_t len = sas7bdat_col_attrs_subheader_length(writer, hinfo); size_t signature_len = hinfo->u64 ? 8 : 4; uint16_t remainder = sas_subheader_remainder(len, signature_len); sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS, len); memcpy(&subheader->data[signature_len], &remainder, sizeof(uint16_t)); char *ptrs = &subheader->data[signature_len+8]; uint64_t offset = 0; int i; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); const char *name = readstat_variable_get_name(variable); readstat_type_t type = readstat_variable_get_type(variable); uint16_t name_length_flag = strlen(name) <= 8 ? 4 : 2048; uint32_t width = 0; if (hinfo->u64) { memcpy(&ptrs[0], &offset, sizeof(uint64_t)); ptrs += sizeof(uint64_t); } else { uint32_t offset32 = offset; memcpy(&ptrs[0], &offset32, sizeof(uint32_t)); ptrs += sizeof(uint32_t); } if (type == READSTAT_TYPE_STRING) { ptrs[6] = SAS_COLUMN_TYPE_CHR; width = readstat_variable_get_storage_width(variable); } else { ptrs[6] = SAS_COLUMN_TYPE_NUM; width = 8; } memcpy(&ptrs[0], &width, sizeof(uint32_t)); memcpy(&ptrs[4], &name_length_flag, sizeof(uint16_t)); offset += width; ptrs += 8; } return subheader; } static sas7bdat_subheader_t *sas7bdat_col_format_subheader_init(readstat_variable_t *variable, sas_header_info_t *hinfo, sas7bdat_column_text_array_t *column_text_array) { sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_FORMAT, hinfo->u64 ? 64 : 52); const char *format = readstat_variable_get_format(variable); const char *label = readstat_variable_get_label(variable); off_t format_offset = hinfo->u64 ? 46 : 34; off_t label_offset = hinfo->u64 ? 52 : 40; if (format) { sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, format); memcpy(&subheader->data[format_offset+0], &text_ref.index, sizeof(uint16_t)); memcpy(&subheader->data[format_offset+2], &text_ref.offset, sizeof(uint16_t)); memcpy(&subheader->data[format_offset+4], &text_ref.length, sizeof(uint16_t)); } if (label) { sas_text_ref_t text_ref = sas7bdat_make_text_ref(column_text_array, label); memcpy(&subheader->data[label_offset+0], &text_ref.index, sizeof(uint16_t)); memcpy(&subheader->data[label_offset+2], &text_ref.offset, sizeof(uint16_t)); memcpy(&subheader->data[label_offset+4], &text_ref.length, sizeof(uint16_t)); } return subheader; } static size_t sas7bdat_col_text_subheader_length(sas_header_info_t *hinfo, sas7bdat_column_text_t *column_text) { size_t signature_len = hinfo->u64 ? 8 : 4; size_t text_len = column_text ? column_text->used : 0; return signature_len + 28 + text_len; } static sas7bdat_subheader_t *sas7bdat_col_text_subheader_init(readstat_writer_t *writer, sas_header_info_t *hinfo, sas7bdat_column_text_t *column_text) { size_t signature_len = hinfo->u64 ? 8 : 4; size_t len = sas7bdat_col_text_subheader_length(hinfo, column_text); sas7bdat_subheader_t *subheader = sas7bdat_subheader_init( SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT, len); uint16_t used = sas_subheader_remainder(len, signature_len); memcpy(&subheader->data[signature_len], &used, sizeof(uint16_t)); memset(&subheader->data[signature_len+12], ' ', 8); memcpy(&subheader->data[signature_len+28], column_text->data, column_text->used); return subheader; } static sas7bdat_subheader_array_t *sas7bdat_subheader_array_init(readstat_writer_t *writer, sas_header_info_t *hinfo) { sas7bdat_column_text_array_t *column_text_array = calloc(1, sizeof(sas7bdat_column_text_array_t)); column_text_array->count = 1; column_text_array->column_texts = malloc(sizeof(sas7bdat_column_text_t *)); column_text_array->column_texts[0] = sas7bdat_column_text_init(0, hinfo->page_size - hinfo->page_header_size - hinfo->subheader_pointer_size - sas7bdat_col_text_subheader_length(hinfo, NULL)); sas7bdat_subheader_array_t *sarray = calloc(1, sizeof(sas7bdat_subheader_array_t)); sarray->count = 4+writer->variables_count; sarray->subheaders = calloc(sarray->count, sizeof(sas7bdat_subheader_t *)); long idx = 0; int i; sas7bdat_subheader_t *col_name_subheader = NULL; sas7bdat_subheader_t *col_attrs_subheader = NULL; sas7bdat_subheader_t **col_format_subheaders = NULL; col_name_subheader = sas7bdat_col_name_subheader_init(writer, hinfo, column_text_array); col_attrs_subheader = sas7bdat_col_attrs_subheader_init(writer, hinfo); sarray->subheaders[idx++] = sas7bdat_row_size_subheader_init(writer, hinfo, column_text_array); sarray->subheaders[idx++] = sas7bdat_col_size_subheader_init(writer, hinfo); col_format_subheaders = calloc(writer->variables_count, sizeof(sas7bdat_subheader_t *)); for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); col_format_subheaders[i] = sas7bdat_col_format_subheader_init(variable, hinfo, column_text_array); } sarray->count += column_text_array->count; sarray->subheaders = realloc(sarray->subheaders, sarray->count * sizeof(sas7bdat_subheader_t *)); for (i=0; icount; i++) { sarray->subheaders[idx++] = sas7bdat_col_text_subheader_init(writer, hinfo, column_text_array->column_texts[i]); } sas7bdat_column_text_array_free(column_text_array); sarray->subheaders[idx++] = col_name_subheader; sarray->subheaders[idx++] = col_attrs_subheader; for (i=0; ivariables_count; i++) { sarray->subheaders[idx++] = col_format_subheaders[i]; } free(col_format_subheaders); sarray->capacity = sarray->count; if (writer->compression == READSTAT_COMPRESS_ROWS) { sarray->capacity = (sarray->count + writer->row_count); sarray->subheaders = realloc(sarray->subheaders, sarray->capacity * sizeof(sas7bdat_subheader_t *)); } return sarray; } static void sas7bdat_subheader_free(sas7bdat_subheader_t *subheader) { if (!subheader) return; if (subheader->data) free(subheader->data); free(subheader); } static void sas7bdat_subheader_array_free(sas7bdat_subheader_array_t *sarray) { int i; for (i=0; icount; i++) { sas7bdat_subheader_free(sarray->subheaders[i]); } free(sarray->subheaders); free(sarray); } static int sas7bdat_subheader_type(uint32_t signature) { return (signature == SAS_SUBHEADER_SIGNATURE_COLUMN_TEXT || signature == SAS_SUBHEADER_SIGNATURE_COLUMN_NAME || signature == SAS_SUBHEADER_SIGNATURE_COLUMN_ATTRS || signature == SAS_SUBHEADER_SIGNATURE_COLUMN_LIST); } static readstat_error_t sas7bdat_emit_meta_pages(readstat_writer_t *writer) { sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx; sas_header_info_t *hinfo = ctx->hinfo; sas7bdat_subheader_array_t *sarray = ctx->sarray; readstat_error_t retval = READSTAT_OK; int16_t page_type = SAS_PAGE_TYPE_META; char *page = malloc(hinfo->page_size); int64_t shp_written = 0; while (sarray->count > shp_written) { memset(page, 0, hinfo->page_size); int16_t shp_count = 0; size_t shp_data_offset = hinfo->page_size; size_t shp_ptr_offset = hinfo->page_header_size; size_t shp_ptr_size = hinfo->subheader_pointer_size; memcpy(&page[hinfo->page_header_size-8], &page_type, sizeof(int16_t)); if (sarray->subheaders[shp_written]->len + shp_ptr_size > shp_data_offset - shp_ptr_offset) { retval = READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE; goto cleanup; } while (sarray->count > shp_written && sarray->subheaders[shp_written]->len + shp_ptr_size <= shp_data_offset - shp_ptr_offset) { sas7bdat_subheader_t *subheader = sarray->subheaders[shp_written]; uint32_t signature32 = subheader->signature; /* copy ptr */ if (hinfo->u64) { uint64_t offset = shp_data_offset - subheader->len; uint64_t len = subheader->len; memcpy(&page[shp_ptr_offset], &offset, sizeof(uint64_t)); memcpy(&page[shp_ptr_offset+8], &len, sizeof(uint64_t)); if (subheader->is_row_data) { if (subheader->is_row_data_compressed) { page[shp_ptr_offset+16] = SAS_COMPRESSION_ROW; } else { page[shp_ptr_offset+16] = SAS_COMPRESSION_NONE; } page[shp_ptr_offset+17] = 1; } else { page[shp_ptr_offset+17] = sas7bdat_subheader_type(subheader->signature); if (signature32 >= 0xFF000000) { int64_t signature64 = (int32_t)signature32; memcpy(&subheader->data[0], &signature64, sizeof(int64_t)); } else { memcpy(&subheader->data[0], &signature32, sizeof(int32_t)); } } } else { uint32_t offset = shp_data_offset - subheader->len; uint32_t len = subheader->len; memcpy(&page[shp_ptr_offset], &offset, sizeof(uint32_t)); memcpy(&page[shp_ptr_offset+4], &len, sizeof(uint32_t)); if (subheader->is_row_data) { if (subheader->is_row_data_compressed) { page[shp_ptr_offset+8] = SAS_COMPRESSION_ROW; } else { page[shp_ptr_offset+8] = SAS_COMPRESSION_NONE; } page[shp_ptr_offset+9] = 1; } else { page[shp_ptr_offset+9] = sas7bdat_subheader_type(subheader->signature); memcpy(&subheader->data[0], &signature32, sizeof(int32_t)); } } shp_ptr_offset += shp_ptr_size; /* copy data */ shp_data_offset -= subheader->len; memcpy(&page[shp_data_offset], subheader->data, subheader->len); shp_written++; shp_count++; } if (hinfo->u64) { memcpy(&page[34], &shp_count, sizeof(int16_t)); memcpy(&page[36], &shp_count, sizeof(int16_t)); } else { memcpy(&page[18], &shp_count, sizeof(int16_t)); memcpy(&page[20], &shp_count, sizeof(int16_t)); } retval = readstat_write_bytes(writer, page, hinfo->page_size); if (retval != READSTAT_OK) goto cleanup; } cleanup: free(page); return retval; } static int sas7bdat_page_is_too_small(readstat_writer_t *writer, sas_header_info_t *hinfo, size_t row_length) { size_t page_length = hinfo->page_size - hinfo->page_header_size; if (writer->compression == READSTAT_COMPRESS_NONE && page_length < row_length) return 1; if (writer->compression == READSTAT_COMPRESS_ROWS && page_length < row_length + hinfo->subheader_pointer_size) return 1; if (page_length < sas7bdat_col_name_subheader_length(writer, hinfo) + hinfo->subheader_pointer_size) return 1; if (page_length < sas7bdat_col_attrs_subheader_length(writer, hinfo) + hinfo->subheader_pointer_size) return 1; return 0; } static sas7bdat_write_ctx_t *sas7bdat_write_ctx_init(readstat_writer_t *writer) { sas7bdat_write_ctx_t *ctx = calloc(1, sizeof(sas7bdat_write_ctx_t)); sas_header_info_t *hinfo = sas_header_info_init(writer, writer->is_64bit); size_t row_length = sas7bdat_row_length(writer); while (sas7bdat_page_is_too_small(writer, hinfo, row_length)) { hinfo->page_size <<= 1; } ctx->hinfo = hinfo; ctx->sarray = sas7bdat_subheader_array_init(writer, hinfo); return ctx; } static void sas7bdat_write_ctx_free(sas7bdat_write_ctx_t *ctx) { free(ctx->hinfo); sas7bdat_subheader_array_free(ctx->sarray); free(ctx); } static readstat_error_t sas7bdat_emit_header_and_meta_pages(readstat_writer_t *writer) { sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx; readstat_error_t retval = READSTAT_OK; if (sas7bdat_row_length(writer) == 0) { retval = READSTAT_ERROR_TOO_FEW_COLUMNS; goto cleanup; } if (writer->compression == READSTAT_COMPRESS_NONE && sas7bdat_rows_per_page(writer, ctx->hinfo) == 0) { retval = READSTAT_ERROR_ROW_IS_TOO_WIDE_FOR_PAGE; goto cleanup; } ctx->hinfo->page_count = sas7bdat_count_meta_pages(writer) + sas7bdat_count_data_pages(writer, ctx->hinfo); retval = sas7bdat_emit_header(writer, ctx->hinfo); if (retval != READSTAT_OK) goto cleanup; retval = sas7bdat_emit_meta_pages(writer); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sas7bdat_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t retval = READSTAT_OK; writer->module_ctx = sas7bdat_write_ctx_init(writer); if (writer->compression == READSTAT_COMPRESS_NONE) { retval = sas7bdat_emit_header_and_meta_pages(writer); if (retval != READSTAT_OK) goto cleanup; } cleanup: if (retval != READSTAT_OK) { if (writer->module_ctx) { sas7bdat_write_ctx_free(writer->module_ctx); writer->module_ctx = NULL; } } return retval; } static readstat_error_t sas7bdat_end_data(void *writer_ctx) { readstat_error_t retval = READSTAT_OK; readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx; if (writer->compression == READSTAT_COMPRESS_ROWS) { retval = sas7bdat_emit_header_and_meta_pages(writer); } else { retval = sas_fill_page(writer, ctx->hinfo); } return retval; } static void sas7bdat_module_ctx_free(void *module_ctx) { sas7bdat_write_ctx_free(module_ctx); } static readstat_error_t sas7bdat_write_double(void *row, const readstat_variable_t *var, double value) { memcpy(row, &value, sizeof(double)); return READSTAT_OK; } static readstat_error_t sas7bdat_write_float(void *row, const readstat_variable_t *var, float value) { return sas7bdat_write_double(row, var, value); } static readstat_error_t sas7bdat_write_int32(void *row, const readstat_variable_t *var, int32_t value) { return sas7bdat_write_double(row, var, value); } static readstat_error_t sas7bdat_write_int16(void *row, const readstat_variable_t *var, int16_t value) { return sas7bdat_write_double(row, var, value); } static readstat_error_t sas7bdat_write_int8(void *row, const readstat_variable_t *var, int8_t value) { return sas7bdat_write_double(row, var, value); } static readstat_error_t sas7bdat_write_missing_tagged_raw(void *row, const readstat_variable_t *var, char tag) { union { double dval; char chars[8]; } nan_value; nan_value.dval = NAN; nan_value.chars[machine_is_little_endian() ? 5 : 2] = ~tag; return sas7bdat_write_double(row, var, nan_value.dval); } static readstat_error_t sas7bdat_write_missing_tagged(void *row, const readstat_variable_t *var, char tag) { readstat_error_t error = sas_validate_tag(tag); if (error == READSTAT_OK) return sas7bdat_write_missing_tagged_raw(row, var, tag); return error; } static readstat_error_t sas7bdat_write_missing_numeric(void *row, const readstat_variable_t *var) { return sas7bdat_write_missing_tagged_raw(row, var, '.'); } static readstat_error_t sas7bdat_write_string(void *row, const readstat_variable_t *var, const char *value) { size_t max_len = readstat_variable_get_storage_width(var); if (value == NULL || value[0] == '\0') { memset(row, '\0', max_len); } else { size_t value_len = strlen(value); if (value_len > max_len) return READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG; strncpy((char *)row, value, max_len); } return READSTAT_OK; } static readstat_error_t sas7bdat_write_missing_string(void *row, const readstat_variable_t *var) { return sas7bdat_write_string(row, var, NULL); } static size_t sas7bdat_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { return user_width; } return 8; } static readstat_error_t sas7bdat_write_row_uncompressed(readstat_writer_t *writer, sas7bdat_write_ctx_t *ctx, void *bytes, size_t len) { readstat_error_t retval = READSTAT_OK; sas_header_info_t *hinfo = ctx->hinfo; int32_t rows_per_page = sas7bdat_rows_per_page(writer, hinfo); if (writer->current_row % rows_per_page == 0) { retval = sas_fill_page(writer, ctx->hinfo); if (retval != READSTAT_OK) goto cleanup; int16_t page_type = SAS_PAGE_TYPE_DATA; int16_t page_row_count = (writer->row_count - writer->current_row < rows_per_page ? writer->row_count - writer->current_row : rows_per_page); char *header = calloc(hinfo->page_header_size, 1); memcpy(&header[hinfo->page_header_size-6], &page_row_count, sizeof(int16_t)); memcpy(&header[hinfo->page_header_size-8], &page_type, sizeof(int16_t)); retval = readstat_write_bytes(writer, header, hinfo->page_header_size); free(header); if (retval != READSTAT_OK) goto cleanup; } retval = readstat_write_bytes(writer, bytes, len); cleanup: return retval; } /* We don't actually write compressed data out at this point; the file header * requires a page count, so instead we collect the compressed subheaders in * memory and write the entire file at the end, once the page count can be * determined. */ static readstat_error_t sas7bdat_write_row_compressed(readstat_writer_t *writer, sas7bdat_write_ctx_t *ctx, void *bytes, size_t len) { readstat_error_t retval = READSTAT_OK; size_t compressed_len = sas_rle_compressed_len(bytes, len); sas7bdat_subheader_t *subheader = NULL; if (compressed_len < len) { subheader = sas7bdat_subheader_init(0, compressed_len); subheader->is_row_data = 1; subheader->is_row_data_compressed = 1; size_t actual_len = sas_rle_compress(subheader->data, subheader->len, bytes, len); if (actual_len != compressed_len) { retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH; goto cleanup; } } else { subheader = sas7bdat_subheader_init(0, len); subheader->is_row_data = 1; memcpy(subheader->data, bytes, len); } ctx->sarray->subheaders[ctx->sarray->count++] = subheader; cleanup: if (retval != READSTAT_OK) sas7bdat_subheader_free(subheader); return retval; } static readstat_error_t sas7bdat_write_row(void *writer_ctx, void *bytes, size_t len) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; sas7bdat_write_ctx_t *ctx = (sas7bdat_write_ctx_t *)writer->module_ctx; readstat_error_t retval = READSTAT_OK; if (writer->compression == READSTAT_COMPRESS_NONE) { retval = sas7bdat_write_row_uncompressed(writer, ctx, bytes, len); } else if (writer->compression == READSTAT_COMPRESS_ROWS) { retval = sas7bdat_write_row_compressed(writer, ctx, bytes, len); } return retval; } static readstat_error_t sas7bdat_metadata_ok(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; if (writer->compression != READSTAT_COMPRESS_NONE && writer->compression != READSTAT_COMPRESS_ROWS) return READSTAT_ERROR_UNSUPPORTED_COMPRESSION; return READSTAT_OK; } readstat_error_t readstat_begin_writing_sas7bdat(readstat_writer_t *writer, void *user_ctx, long row_count) { if (writer->version == 0) writer->version = SAS_DEFAULT_FILE_VERSION; writer->callbacks.metadata_ok = &sas7bdat_metadata_ok; writer->callbacks.write_int8 = &sas7bdat_write_int8; writer->callbacks.write_int16 = &sas7bdat_write_int16; writer->callbacks.write_int32 = &sas7bdat_write_int32; writer->callbacks.write_float = &sas7bdat_write_float; writer->callbacks.write_double = &sas7bdat_write_double; writer->callbacks.write_string = &sas7bdat_write_string; writer->callbacks.write_missing_string = &sas7bdat_write_missing_string; writer->callbacks.write_missing_number = &sas7bdat_write_missing_numeric; writer->callbacks.write_missing_tagged = &sas7bdat_write_missing_tagged; writer->callbacks.variable_width = &sas7bdat_variable_width; writer->callbacks.variable_ok = &sas_validate_variable; writer->callbacks.begin_data = &sas7bdat_begin_data; writer->callbacks.end_data = &sas7bdat_end_data; writer->callbacks.module_ctx_free = &sas7bdat_module_ctx_free; writer->callbacks.write_row = &sas7bdat_write_row; return readstat_begin_writing_file(writer, user_ctx, row_count); } ReadStat-1.1.7/src/sas/readstat_sas_rle.c000066400000000000000000000231541410722155500202760ustar00rootroot00000000000000 #include #include #include #if defined(_MSC_VER) #include typedef SSIZE_T ssize_t; #endif #include "readstat_sas_rle.h" #define SAS_RLE_COMMAND_COPY64 0 #define SAS_RLE_COMMAND_INSERT_BYTE18 4 #define SAS_RLE_COMMAND_INSERT_AT17 5 #define SAS_RLE_COMMAND_INSERT_BLANK17 6 #define SAS_RLE_COMMAND_INSERT_ZERO17 7 #define SAS_RLE_COMMAND_COPY1 8 #define SAS_RLE_COMMAND_COPY17 9 #define SAS_RLE_COMMAND_COPY33 10 #define SAS_RLE_COMMAND_COPY49 11 #define SAS_RLE_COMMAND_INSERT_BYTE3 12 #define SAS_RLE_COMMAND_INSERT_AT2 13 #define SAS_RLE_COMMAND_INSERT_BLANK2 14 #define SAS_RLE_COMMAND_INSERT_ZERO2 15 #define MAX_INSERT_RUN 4112 // 4095 + 17 #define MAX_COPY_RUN 4159 // 4095 + 64 static size_t command_lengths[16] = { [SAS_RLE_COMMAND_COPY64] = 1, [SAS_RLE_COMMAND_INSERT_BYTE18] = 2, [SAS_RLE_COMMAND_INSERT_AT17] = 1, [SAS_RLE_COMMAND_INSERT_BLANK17] = 1, [SAS_RLE_COMMAND_INSERT_ZERO17] = 1, [SAS_RLE_COMMAND_INSERT_BYTE3] = 1 }; ssize_t sas_rle_decompressed_len(const void *input_buf, size_t input_len) { return sas_rle_decompress(NULL, 0, input_buf, input_len); } ssize_t sas_rle_decompress(void *output_buf, size_t output_len, const void *input_buf, size_t input_len) { unsigned char *buffer = (unsigned char *)output_buf; unsigned char *output = buffer; size_t output_written = 0; const unsigned char *input = (const unsigned char *)input_buf; while (input < (const unsigned char *)input_buf + input_len) { unsigned char control = *input++; unsigned char command = (control & 0xF0) >> 4; unsigned char length = (control & 0x0F); int copy_len = 0; int insert_len = 0; unsigned char insert_byte = '\0'; if (input + command_lengths[command] > (const unsigned char *)input_buf + input_len) { return -1; } switch (command) { case SAS_RLE_COMMAND_COPY64: copy_len = (*input++) + 64 + length * 256; break; case SAS_RLE_COMMAND_INSERT_BYTE18: insert_len = (*input++) + 18 + length * 256; insert_byte = *input++; break; case SAS_RLE_COMMAND_INSERT_AT17: insert_len = (*input++) + 17 + length * 256; insert_byte = '@'; break; case SAS_RLE_COMMAND_INSERT_BLANK17: insert_len = (*input++) + 17 + length * 256; insert_byte = ' '; break; case SAS_RLE_COMMAND_INSERT_ZERO17: insert_len = (*input++) + 17 + length * 256; insert_byte = '\0'; break; case SAS_RLE_COMMAND_COPY1: copy_len = length + 1; break; case SAS_RLE_COMMAND_COPY17: copy_len = length + 17; break; case SAS_RLE_COMMAND_COPY33: copy_len = length + 33; break; case SAS_RLE_COMMAND_COPY49: copy_len = length + 49; break; case SAS_RLE_COMMAND_INSERT_BYTE3: insert_byte = *input++; insert_len = length + 3; break; case SAS_RLE_COMMAND_INSERT_AT2: insert_byte = '@'; insert_len = length + 2; break; case SAS_RLE_COMMAND_INSERT_BLANK2: insert_byte = ' '; insert_len = length + 2; break; case SAS_RLE_COMMAND_INSERT_ZERO2: insert_byte = '\0'; insert_len = length + 2; break; default: /* error out here? */ break; } if (copy_len) { if (output_written + copy_len > output_len) { return -1; } if (input + copy_len > (const unsigned char *)input_buf + input_len) { return -1; } if (output) { memcpy(&output[output_written], input, copy_len); } input += copy_len; output_written += copy_len; } if (insert_len) { if (output_written + insert_len > output_len) { return -1; } if (output) { memset(&output[output_written], insert_byte, insert_len); } output_written += insert_len; } } return output_written; } static size_t sas_rle_measure_copy_run(size_t copy_run) { size_t len = 0; while (copy_run >= MAX_COPY_RUN) { len += 2 + MAX_COPY_RUN; copy_run -= MAX_COPY_RUN; } return len + (copy_run > 64) + (copy_run > 0) + copy_run; } static size_t sas_rle_copy_run(unsigned char *output_buf, size_t offset, const unsigned char *copy, size_t copy_run) { unsigned char *out = output_buf + offset; if (output_buf == NULL) return sas_rle_measure_copy_run(copy_run); while (copy_run >= MAX_COPY_RUN) { *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + 0x0F; *out++ = 0xFF; memcpy(out, copy, MAX_COPY_RUN); out += MAX_COPY_RUN; copy += MAX_COPY_RUN; copy_run -= MAX_COPY_RUN; } if (copy_run > 64) { int length = (copy_run - 64) / 256; unsigned char rem = (copy_run - 64) % 256; *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + (length & 0x0F); *out++ = rem; } else if (copy_run >= 49) { *out++ = (SAS_RLE_COMMAND_COPY49 << 4) + (copy_run - 49); } else if (copy_run >= 33) { *out++ = (SAS_RLE_COMMAND_COPY33 << 4) + (copy_run - 33); } else if (copy_run >= 17) { *out++ = (SAS_RLE_COMMAND_COPY17 << 4) + (copy_run - 17); } else if (copy_run >= 1) { *out++ = (SAS_RLE_COMMAND_COPY1 << 4) + (copy_run - 1); } memcpy(out, copy, copy_run); out += copy_run; return out - (output_buf + offset); } static int sas_rle_is_special_byte(unsigned char last_byte) { return (last_byte == '@' || last_byte == ' ' || last_byte == '\0'); } static size_t sas_rle_measure_insert_run(unsigned char last_byte, size_t insert_run) { if (sas_rle_is_special_byte(last_byte)) return insert_run > 17 ? 2 : 1; return insert_run > 18 ? 3 : 2; } static size_t sas_rle_insert_run(unsigned char *output_buf, size_t offset, unsigned char last_byte, size_t insert_run) { unsigned char *out = output_buf + offset; if (output_buf == NULL) return sas_rle_measure_insert_run(last_byte, insert_run); if (sas_rle_is_special_byte(last_byte)) { if (insert_run > 17) { int length = (insert_run - 17) / 256; unsigned char rem = (insert_run - 17) % 256; if (last_byte == '@') { *out++ = (SAS_RLE_COMMAND_INSERT_AT17 << 4) + (length & 0x0F); } else if (last_byte == ' ') { *out++ = (SAS_RLE_COMMAND_INSERT_BLANK17 << 4) + (length & 0x0F); } else if (last_byte == '\0') { *out++ = (SAS_RLE_COMMAND_INSERT_ZERO17 << 4) + (length & 0x0F); } *out++ = rem; } else if (insert_run >= 2) { if (last_byte == '@') { *out++ = (SAS_RLE_COMMAND_INSERT_AT2 << 4) + (insert_run - 2); } else if (last_byte == ' ') { *out++ = (SAS_RLE_COMMAND_INSERT_BLANK2 << 4) + (insert_run - 2); } else if (last_byte == '\0') { *out++ = (SAS_RLE_COMMAND_INSERT_ZERO2 << 4) + (insert_run - 2); } } } else if (insert_run > 18) { int length = (insert_run - 18) / 256; unsigned char rem = (insert_run - 18) % 256; *out++ = (SAS_RLE_COMMAND_INSERT_BYTE18 << 4) + (length & 0x0F); *out++ = rem; *out++ = last_byte; } else if (insert_run >= 3) { *out++ = (SAS_RLE_COMMAND_INSERT_BYTE3 << 4) + (insert_run - 3); *out++ = last_byte; } return out - (output_buf + offset); } static int sas_rle_is_insert_run(unsigned char last_byte, size_t insert_run) { if (sas_rle_is_special_byte(last_byte)) return (insert_run > 1); return (insert_run > 2); } ssize_t sas_rle_compressed_len(const void *bytes, size_t len) { return sas_rle_compress(NULL, 0, bytes, len); } ssize_t sas_rle_compress(void *output_buf, size_t output_len, const void *input_buf, size_t input_len) { /* TODO bounds check */ const unsigned char *p = (const unsigned char *)input_buf; const unsigned char *pe = p + input_len; const unsigned char *copy = p; unsigned char *out = (unsigned char *)output_buf; size_t insert_run = 0; size_t copy_run = 0; size_t out_written = 0; unsigned char last_byte = 0; while (p < pe) { unsigned char c = *p; if (insert_run == 0) { insert_run = 1; } else if (c == last_byte && insert_run < MAX_INSERT_RUN) { insert_run++; } else { if (sas_rle_is_insert_run(last_byte, insert_run)) { out_written += sas_rle_copy_run(out, out_written, copy, copy_run); out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run); copy_run = 0; copy = p; } else { copy_run += insert_run; } insert_run = 1; } last_byte = c; p++; } if (sas_rle_is_insert_run(last_byte, insert_run)) { out_written += sas_rle_copy_run(out, out_written, copy, copy_run); out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run); } else { out_written += sas_rle_copy_run(out, out_written, copy, copy_run + insert_run); } return out_written; } ReadStat-1.1.7/src/sas/readstat_sas_rle.h000066400000000000000000000005561410722155500203040ustar00rootroot00000000000000 ssize_t sas_rle_decompress(void *output_buf, size_t output_len, const void *input_buf, size_t input_len); ssize_t sas_rle_compress(void *output_buf, size_t output_len, const void *input_buf, size_t input_len); ssize_t sas_rle_decompressed_len(const void *input_buf, size_t input_len); ssize_t sas_rle_compressed_len(const void *bytes, size_t len); ReadStat-1.1.7/src/sas/readstat_xport.c000066400000000000000000000014741410722155500200230ustar00rootroot00000000000000#include #include "readstat_xport.h" #include "../readstat_bits.h" char _xport_months[12][4] = { "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC" }; void xport_namestr_bswap(xport_namestr_t *namestr) { if (!machine_is_little_endian()) return; namestr->ntype = byteswap2(namestr->ntype); namestr->nhfun = byteswap2(namestr->nhfun); namestr->nlng = byteswap2(namestr->nlng); namestr->nvar0 = byteswap2(namestr->nvar0); namestr->nfl = byteswap2(namestr->nfl); namestr->nfd = byteswap2(namestr->nfd); namestr->nfj = byteswap2(namestr->nfj); namestr->nifl = byteswap2(namestr->nifl); namestr->nifd = byteswap2(namestr->nifd); namestr->npos = byteswap4(namestr->npos); namestr->labeln = byteswap2(namestr->labeln); } ReadStat-1.1.7/src/sas/readstat_xport.h000066400000000000000000000015561410722155500200310ustar00rootroot00000000000000 typedef struct xport_header_record_s { char name[9]; int num1; int num2; int num3; int num4; int num5; int num6; } xport_header_record_t; extern char _xport_months[12][4]; #pragma pack(push, 1) typedef struct xport_namestr_s { uint16_t ntype; uint16_t nhfun; uint16_t nlng; uint16_t nvar0; char nname[8]; char nlabel[40]; char nform[8]; uint16_t nfl; uint16_t nfd; uint16_t nfj; char nfill[2]; char niform[8]; uint16_t nifl; uint16_t nifd; uint32_t npos; char longname[32]; uint16_t labeln; char rest[18]; } xport_namestr_t; #pragma pack(pop) #define XPORT_MIN_DOUBLE_SIZE 3 #define XPORT_MAX_DOUBLE_SIZE 8 void xport_namestr_bswap(xport_namestr_t *namestr); ReadStat-1.1.7/src/sas/readstat_xport_read.c000066400000000000000000000552011410722155500210130ustar00rootroot00000000000000#include #include #include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" #include "readstat_sas.h" #include "readstat_xport.h" #include "ieee.h" #define LINE_LEN 80 typedef struct xport_ctx_s { readstat_callbacks_t handle; size_t file_size; void *user_ctx; const char *input_encoding; const char *output_encoding; iconv_t converter; readstat_io_t *io; time_t timestamp; int obs_count; int var_count; int row_limit; int row_offset; size_t row_length; int parsed_row_count; char file_label[256*4+1]; char table_name[32*4+1]; readstat_variable_t **variables; int version; } xport_ctx_t; static readstat_error_t xport_update_progress(xport_ctx_t *ctx) { readstat_io_t *io = ctx->io; return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx); } static xport_ctx_t *xport_ctx_init() { xport_ctx_t *ctx = calloc(1, sizeof(xport_ctx_t)); return ctx; } static void xport_ctx_free(xport_ctx_t *ctx) { if (ctx->variables) { int i; for (i=0; ivar_count; i++) { if (ctx->variables[i]) free(ctx->variables[i]); } free(ctx->variables); } if (ctx->converter) { iconv_close(ctx->converter); } free(ctx); } static ssize_t read_bytes(xport_ctx_t *ctx, void *dst, size_t dst_len) { readstat_io_t *io = (readstat_io_t *)ctx->io; return io->read(dst, dst_len, io->io_ctx); } static readstat_error_t xport_skip_record(xport_ctx_t *ctx) { readstat_io_t *io = (readstat_io_t *)ctx->io; if (io->seek(LINE_LEN, READSTAT_SEEK_CUR, io->io_ctx) == -1) return READSTAT_ERROR_SEEK; return READSTAT_OK; } static readstat_error_t xport_skip_rest_of_record(xport_ctx_t *ctx) { readstat_io_t *io = (readstat_io_t *)ctx->io; off_t pos = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx); if (pos == -1) return READSTAT_ERROR_SEEK; if (pos % LINE_LEN) { if (io->seek(LINE_LEN - (pos % LINE_LEN), READSTAT_SEEK_CUR, io->io_ctx) == -1) return READSTAT_ERROR_SEEK; } return READSTAT_OK; } static readstat_error_t xport_read_record(xport_ctx_t *ctx, char *record) { ssize_t bytes_read = read_bytes(ctx, record, LINE_LEN); if (bytes_read < LINE_LEN) return READSTAT_ERROR_READ; record[LINE_LEN] = '\0'; return READSTAT_OK; } static readstat_error_t xport_read_header_record(xport_ctx_t *ctx, xport_header_record_t *xrecord) { char line[LINE_LEN+1]; readstat_error_t retval = READSTAT_OK; retval = xport_read_record(ctx, line); if (retval != READSTAT_OK) return retval; memset(xrecord, 0, sizeof(xport_header_record_t)); int matches = sscanf(line, "HEADER RECORD*******%8s HEADER RECORD!!!!!!!" "%05d%05d%05d" "%05d%05d%05d", xrecord->name, &xrecord->num1, &xrecord->num2, &xrecord->num3, &xrecord->num4, &xrecord->num5, &xrecord->num6); if (matches < 2) { return READSTAT_ERROR_PARSE; } return READSTAT_OK; } static readstat_error_t xport_expect_header_record(xport_ctx_t *ctx, const char *v5_name, const char *v8_name) { readstat_error_t retval = READSTAT_OK; xport_header_record_t xrecord; retval = xport_read_header_record(ctx, &xrecord); if (retval != READSTAT_OK) goto cleanup; if (ctx->version == 5 && strcmp(xrecord.name, v5_name) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } else if (ctx->version == 8 && strcmp(xrecord.name, v8_name) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: return retval; } static readstat_error_t xport_read_table_name_record(xport_ctx_t *ctx) { char line[LINE_LEN+1]; readstat_error_t retval = READSTAT_OK; retval = xport_read_record(ctx, line); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(ctx->table_name, sizeof(ctx->table_name), &line[8], ctx->version == 5 ? 8 : 32, ctx->converter); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t xport_read_file_label_record(xport_ctx_t *ctx) { char line[LINE_LEN+1]; readstat_error_t retval = READSTAT_OK; retval = xport_read_record(ctx, line); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), &line[32], 40, ctx->converter); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t xport_read_library_record(xport_ctx_t *ctx) { xport_header_record_t xrecord; readstat_error_t retval = xport_read_header_record(ctx, &xrecord); if (retval != READSTAT_OK) goto cleanup; if (strcmp(xrecord.name, "LIBRARY") == 0) { ctx->version = 5; } else if (strcmp(xrecord.name, "LIBV8") == 0) { ctx->version = 8; } else { retval = READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; goto cleanup; } cleanup: return retval; } static readstat_error_t xport_read_timestamp_record(xport_ctx_t *ctx) { char line[LINE_LEN+1]; readstat_error_t retval = READSTAT_OK; struct tm ts = { .tm_isdst = -1 }; char month[4]; int i; retval = xport_read_record(ctx, line); if (retval != READSTAT_OK) goto cleanup; sscanf(line, "%02d%3s%02d:%02d:%02d:%02d", &ts.tm_mday, month, &ts.tm_year, &ts.tm_hour, &ts.tm_min, &ts.tm_sec); for (i=0; itimestamp = mktime(&ts); cleanup: return retval; } static readstat_error_t xport_read_namestr_header_record(xport_ctx_t *ctx) { xport_header_record_t xrecord; readstat_error_t retval = READSTAT_OK; retval = xport_read_header_record(ctx, &xrecord); if (retval != READSTAT_OK) goto cleanup; if (ctx->version == 5 && strcmp(xrecord.name, "NAMESTR") != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } else if (ctx->version == 8 && strcmp(xrecord.name, "NAMSTV8") != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->var_count = xrecord.num2; ctx->variables = readstat_calloc(ctx->var_count, sizeof(readstat_variable_t *)); if (ctx->variables == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->handle.metadata) { readstat_metadata_t metadata = { .row_count = -1, .var_count = ctx->var_count, .file_label = ctx->file_label, .table_name = ctx->table_name, .creation_time = ctx->timestamp, .modified_time = ctx->timestamp, .file_format_version = ctx->version }; if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; } static readstat_error_t xport_read_obs_header_record(xport_ctx_t *ctx) { return xport_expect_header_record(ctx, "OBS", "OBSV8"); } static readstat_error_t xport_construct_format(char *dst, size_t dst_len, const char *src, size_t src_len, int width, int decimals) { char *format = malloc(4 * src_len + 1); readstat_error_t retval = readstat_convert(format, 4 * src_len + 1, src, src_len, NULL); if (retval != READSTAT_OK) { free(format); return retval; } if (!format[0]) { *dst = '\0'; } else if (decimals) { snprintf(dst, dst_len, "%s%d.%d", format, width, decimals); } else if (width) { snprintf(dst, dst_len, "%s%d", format, width); } else { snprintf(dst, dst_len, "%s", format); } free(format); return retval; } static readstat_error_t xport_read_labels_v8(xport_ctx_t *ctx, int label_count) { readstat_error_t retval = READSTAT_OK; uint16_t labeldef[3]; char *name = NULL; char *label = NULL; int i; for (i=0; i ctx->var_count || index == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } name = realloc(name, name_len + 1); label = realloc(label, label_len + 1); readstat_variable_t *variable = ctx->variables[index-1]; if (read_bytes(ctx, name, name_len) != name_len || read_bytes(ctx, label, label_len) != label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(variable->name, sizeof(variable->name), name, name_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(variable->label, sizeof(variable->label), label, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; } retval = xport_skip_rest_of_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_obs_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; cleanup: free(name); free(label); return retval; } static readstat_error_t xport_read_labels_v9(xport_ctx_t *ctx, int label_count) { readstat_error_t retval = READSTAT_OK; uint16_t labeldef[5]; int i; char *name = NULL; char *format = NULL; char *informat = NULL; char *label = NULL; for (i=0; i ctx->var_count || index == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } name = realloc(name, name_len + 1); format = realloc(format, format_len + 1); informat = realloc(informat, informat_len + 1); label = realloc(label, label_len + 1); readstat_variable_t *variable = ctx->variables[index-1]; if (read_bytes(ctx, name, name_len) != name_len || read_bytes(ctx, format, format_len) != format_len || read_bytes(ctx, informat, informat_len) != informat_len || read_bytes(ctx, label, label_len) != label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(variable->name, sizeof(variable->name), name, name_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(variable->label, sizeof(variable->label), label, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = xport_construct_format(variable->format, sizeof(variable->format), format, format_len, variable->display_width, variable->decimals); if (retval != READSTAT_OK) goto cleanup; } retval = xport_skip_rest_of_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_obs_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; cleanup: free(name); free(format); free(informat); free(label); return retval; } static readstat_error_t xport_read_variables(xport_ctx_t *ctx) { int i; readstat_error_t retval = READSTAT_OK; for (i=0; ivar_count; i++) { xport_namestr_t namestr; ssize_t bytes_read = read_bytes(ctx, &namestr, sizeof(xport_namestr_t)); if (bytes_read < sizeof(xport_namestr_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } xport_namestr_bswap(&namestr); readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->index = i; variable->type = namestr.ntype == SAS_COLUMN_TYPE_CHR ? READSTAT_TYPE_STRING : READSTAT_TYPE_DOUBLE; variable->storage_width = namestr.nlng; variable->display_width = namestr.nfl; variable->decimals = namestr.nfd; variable->alignment = namestr.nfj ? READSTAT_ALIGNMENT_RIGHT : READSTAT_ALIGNMENT_LEFT; if (ctx->version == 5) { retval = readstat_convert(variable->name, sizeof(variable->name), namestr.nname, sizeof(namestr.nname), ctx->converter); } else { retval = readstat_convert(variable->name, sizeof(variable->name), namestr.longname, sizeof(namestr.longname), ctx->converter); } if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(variable->label, sizeof(variable->label), namestr.nlabel, sizeof(namestr.nlabel), ctx->converter); if (retval != READSTAT_OK) goto cleanup; retval = xport_construct_format(variable->format, sizeof(variable->format), namestr.nform, sizeof(namestr.nform), variable->display_width, variable->decimals); if (retval != READSTAT_OK) goto cleanup; ctx->variables[i] = variable; } retval = xport_skip_rest_of_record(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->version == 5) { retval = xport_read_obs_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; } else { xport_header_record_t xrecord; retval = xport_read_header_record(ctx, &xrecord); if (retval != READSTAT_OK) goto cleanup; if (strcmp(xrecord.name, "OBSV8") == 0) { /* void */ } else if (strcmp(xrecord.name, "LABELV8") == 0) { retval = xport_read_labels_v8(ctx, xrecord.num1); } else if (strcmp(xrecord.name, "LABELV9") == 0) { retval = xport_read_labels_v9(ctx, xrecord.num1); } if (retval != READSTAT_OK) goto cleanup; } ctx->row_length = 0; int index_after_skipping = 0; for (i=0; ivar_count; i++) { readstat_variable_t *variable = ctx->variables[i]; variable->index_after_skipping = index_after_skipping; int cb_retval = READSTAT_HANDLER_OK; if (ctx->handle.variable) { cb_retval = ctx->handle.variable(i, variable, variable->format, ctx->user_ctx); } if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { variable->skip = 1; } else { index_after_skipping++; } ctx->row_length += variable->storage_width; } cleanup: return retval; } static readstat_error_t xport_process_row(xport_ctx_t *ctx, const char *row, size_t row_length) { readstat_error_t retval = READSTAT_OK; int i; off_t pos = 0; char *string = NULL; for (i=0; ivar_count; i++) { readstat_variable_t *variable = ctx->variables[i]; readstat_value_t value = { .type = variable->type }; if (variable->type == READSTAT_TYPE_STRING) { string = readstat_realloc(string, 4*variable->storage_width+1); if (string == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } retval = readstat_convert(string, 4*variable->storage_width+1, &row[pos], variable->storage_width, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = string; } else { double dval = NAN; if (variable->storage_width <= XPORT_MAX_DOUBLE_SIZE && variable->storage_width >= XPORT_MIN_DOUBLE_SIZE) { char full_value[8] = { 0 }; if (memcmp(&full_value[1], &row[pos+1], variable->storage_width - 1) == 0 && (row[pos] == '.' || sas_validate_tag(row[pos]) == READSTAT_OK)) { if (row[pos] == '.') { value.is_system_missing = 1; } else { value.tag = row[pos]; value.is_tagged_missing = 1; } } else { memcpy(full_value, &row[pos], variable->storage_width); int rc = cnxptiee(full_value, CN_TYPE_XPORT, &dval, CN_TYPE_NATIVE); if (rc != 0) { retval = READSTAT_ERROR_CONVERT; goto cleanup; } } } value.v.double_value = dval; } pos += variable->storage_width; if (ctx->handle.value && !ctx->variables[i]->skip && !ctx->row_offset) { if (ctx->handle.value(ctx->parsed_row_count, variable, value, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } if (ctx->row_offset) { ctx->row_offset--; } else { ctx->parsed_row_count++; } cleanup: free(string); return retval; } static readstat_error_t xport_read_data(xport_ctx_t *ctx) { if (!ctx->row_length) return READSTAT_OK; if (!ctx->handle.value) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; char *row = readstat_malloc(ctx->row_length); char *blank_row = readstat_malloc(ctx->row_length); int num_blank_rows = 0; if (row == NULL || blank_row == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } memset(blank_row, ' ', ctx->row_length); while (1) { ssize_t bytes_read = read_bytes(ctx, row, ctx->row_length); if (bytes_read == -1) { retval = READSTAT_ERROR_READ; goto cleanup; } else if (bytes_read < ctx->row_length) { break; } off_t pos = 0; int row_is_blank = 1; for (pos=0; posrow_length; pos++) { if (row[pos] != ' ') { row_is_blank = 0; break; } } if (row_is_blank) { num_blank_rows++; continue; } while (num_blank_rows) { retval = xport_process_row(ctx, blank_row, ctx->row_length); if (retval != READSTAT_OK) goto cleanup; if (ctx->row_limit > 0 && ctx->parsed_row_count == ctx->row_limit) goto cleanup; num_blank_rows--; } retval = xport_process_row(ctx, row, ctx->row_length); if (retval != READSTAT_OK) goto cleanup; retval = xport_update_progress(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->row_limit > 0 && ctx->parsed_row_count == ctx->row_limit) break; } cleanup: if (row) free(row); if (blank_row) free(blank_row); return retval; } readstat_error_t readstat_parse_xport(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; xport_ctx_t *ctx = xport_ctx_init(); ctx->handle = parser->handlers; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->io = io; ctx->row_limit = parser->row_limit; if (parser->row_offset > 0) ctx->row_offset = parser->row_offset; if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (ctx->input_encoding && ctx->output_encoding && strcmp(ctx->input_encoding, ctx->output_encoding) != 0) { iconv_t converter = iconv_open(ctx->output_encoding, ctx->input_encoding); if (converter == (iconv_t)-1) { retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } ctx->converter = converter; } retval = xport_read_library_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_skip_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_timestamp_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_expect_header_record(ctx, "MEMBER", "MEMBV8"); if (retval != READSTAT_OK) goto cleanup; retval = xport_expect_header_record(ctx, "DSCRPTR", "DSCPTV8"); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_table_name_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_file_label_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_namestr_header_record(ctx); if (retval != READSTAT_OK) goto cleanup; retval = xport_read_variables(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->row_length) { retval = xport_read_data(ctx); if (retval != READSTAT_OK) goto cleanup; } cleanup: io->close(io->io_ctx); xport_ctx_free(ctx); return retval; } ReadStat-1.1.7/src/sas/readstat_xport_write.c000066400000000000000000000435401410722155500212350ustar00rootroot00000000000000 #include #include #include #include "../readstat.h" #include "../readstat_writer.h" #include "readstat_sas.h" #include "readstat_xport.h" #include "ieee.h" #define XPORT_DEFAULT_VERISON 8 #define RECORD_LEN 80 #if defined _MSC_VER #define restrict __restrict #endif static void copypad(char * restrict dst, size_t dst_len, const char * restrict src) { char *dst_end = dst + dst_len; while (dst < dst_end && *src) *dst++ = *src++; while (dst < dst_end) *dst++ = ' '; } static readstat_error_t xport_write_bytes(readstat_writer_t *writer, const void *bytes, size_t len) { return readstat_write_bytes_as_lines(writer, bytes, len, RECORD_LEN, ""); } static readstat_error_t xport_finish_record(readstat_writer_t *writer) { return readstat_write_line_padding(writer, ' ', RECORD_LEN, ""); } static readstat_error_t xport_write_record(readstat_writer_t *writer, const char *record) { size_t len = strlen(record); readstat_error_t retval = READSTAT_OK; retval = xport_write_bytes(writer, record, len); if (retval != READSTAT_OK) goto cleanup; retval = xport_finish_record(writer); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t xport_write_header_record_v8(readstat_writer_t *writer, xport_header_record_t *xrecord) { char record[RECORD_LEN+1]; snprintf(record, sizeof(record), "HEADER RECORD*******%-8sHEADER RECORD!!!!!!!%-30d", xrecord->name, xrecord->num1); return xport_write_record(writer, record); } static readstat_error_t xport_write_header_record(readstat_writer_t *writer, xport_header_record_t *xrecord) { char record[RECORD_LEN+1]; snprintf(record, sizeof(record), "HEADER RECORD*******%-8sHEADER RECORD!!!!!!!" "%05d%05d%05d" "%05d%05d%05d", xrecord->name, xrecord->num1, xrecord->num2, xrecord->num3, xrecord->num4, xrecord->num5, xrecord->num6); return xport_write_record(writer, record); } static size_t xport_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) return user_width; if (user_width >= XPORT_MAX_DOUBLE_SIZE || user_width == 0) return XPORT_MAX_DOUBLE_SIZE; if (user_width <= XPORT_MIN_DOUBLE_SIZE) return XPORT_MIN_DOUBLE_SIZE; return user_width; } static readstat_error_t xport_write_variables(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int i; long offset = 0; int num_long_labels = 0; int any_has_long_format = 0; for (i=0; ivariables_count; i++) { int needs_long_record = 0; readstat_variable_t *variable = readstat_get_variable(writer, i); size_t width = xport_variable_width(variable->type, variable->user_width); xport_namestr_t namestr = { .nvar0 = i+1, .nlng = width, .npos = offset, .niform = " ", .nform = " " }; if (readstat_variable_get_type_class(variable) == READSTAT_TYPE_CLASS_STRING) { namestr.ntype = SAS_COLUMN_TYPE_CHR; } else { namestr.ntype = SAS_COLUMN_TYPE_NUM; } copypad(namestr.nname, sizeof(namestr.nname), variable->name); copypad(namestr.nlabel, sizeof(namestr.nlabel), variable->label); if (variable->format[0]) { int decimals = 0; int width = 0; char name[24]; sscanf(variable->format, "%s%d.%d", name, &width, &decimals); copypad(namestr.nform, sizeof(namestr.nform), name); namestr.nfl = width; namestr.nfd = decimals; copypad(namestr.niform, sizeof(namestr.niform), name); namestr.nifl = width; namestr.nifd = decimals; if (strlen(name) > 8) { any_has_long_format = 1; needs_long_record = 1; } } else if (variable->display_width) { namestr.nfl = variable->display_width; } namestr.nfj = (variable->alignment == READSTAT_ALIGNMENT_RIGHT); if (writer->version == 8) { copypad(namestr.longname, sizeof(namestr.longname), variable->name); size_t label_len = strlen(variable->label); if (label_len > 40) { needs_long_record = 1; } namestr.labeln = label_len; } if (needs_long_record) { num_long_labels++; } offset += width; xport_namestr_bswap(&namestr); retval = xport_write_bytes(writer, &namestr, sizeof(xport_namestr_t)); if (retval != READSTAT_OK) goto cleanup; } retval = xport_finish_record(writer); if (retval != READSTAT_OK) goto cleanup; if (writer->version == 8 && num_long_labels) { xport_header_record_t header = { .name = "LABELV8", .num1 = num_long_labels }; if (any_has_long_format) { strcpy(header.name, "LABELV9"); } retval = xport_write_header_record_v8(writer, &header); if (retval != READSTAT_OK) goto cleanup; for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); size_t label_len = strlen(variable->label); size_t name_len = strlen(variable->name); int has_long_label = 0; int has_long_format = 0; int format_len = 0; char format_name[24]; memset(format_name, 0, sizeof(format_name)); has_long_label = (label_len > 40); if (variable->format[0]) { int decimals = 2; int width = 8; int matches = sscanf(variable->format, "%s%d.%d", format_name, &width, &decimals); if (matches < 1) { retval = READSTAT_ERROR_BAD_FORMAT_STRING; goto cleanup; } format_len = strlen(format_name); if (format_len > 8) { has_long_format = 1; } } if (has_long_format) { uint16_t labeldef[5] = { i+1, name_len, format_len, format_len, label_len }; if (machine_is_little_endian()) { labeldef[0] = byteswap2(labeldef[0]); labeldef[1] = byteswap2(labeldef[1]); labeldef[2] = byteswap2(labeldef[2]); labeldef[3] = byteswap2(labeldef[3]); labeldef[4] = byteswap2(labeldef[4]); } retval = readstat_write_bytes(writer, labeldef, sizeof(labeldef)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, variable->name); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, format_name); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, format_name); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, variable->label); if (retval != READSTAT_OK) goto cleanup; } else if (has_long_label) { uint16_t labeldef[3] = { i+1, name_len, label_len }; if (machine_is_little_endian()) { labeldef[0] = byteswap2(labeldef[0]); labeldef[1] = byteswap2(labeldef[1]); labeldef[2] = byteswap2(labeldef[2]); } retval = readstat_write_bytes(writer, labeldef, sizeof(labeldef)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, variable->name); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_string(writer, variable->label); if (retval != READSTAT_OK) goto cleanup; } } retval = xport_finish_record(writer); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t xport_write_first_header_record(readstat_writer_t *writer) { xport_header_record_t xrecord = { .name = "LIBRARY" }; if (writer->version == 8) { strcpy(xrecord.name, "LIBV8"); } return xport_write_header_record(writer, &xrecord); } static readstat_error_t xport_write_first_real_header_record(readstat_writer_t *writer, const char *timestamp) { char real_record[RECORD_LEN+1]; snprintf(real_record, sizeof(real_record), "%-8.8s" "%-8.8s" "%-8.8s" "%-8.8s" "%-8.8s" "%-24.24s" "%16.16s", "SAS", "SAS", "SASLIB", "6.06", "bsd4.2", "", timestamp); return xport_write_record(writer, real_record); } static readstat_error_t xport_write_member_header_record(readstat_writer_t *writer) { xport_header_record_t xrecord = { .name = "MEMBER", .num4 = 160, .num6 = 140 }; if (writer->version == 8) { strcpy(xrecord.name, "MEMBV8"); } return xport_write_header_record(writer, &xrecord); } static readstat_error_t xport_write_descriptor_header_record(readstat_writer_t *writer) { xport_header_record_t xrecord = { .name = "DSCRPTR" }; if (writer->version == 8) { strcpy(xrecord.name, "DSCPTV8"); } return xport_write_header_record(writer, &xrecord); } static readstat_error_t xport_write_member_record_v8(readstat_writer_t *writer, char *timestamp) { readstat_error_t retval = READSTAT_OK; char member_header[RECORD_LEN+1]; char *ds_name = "DATASET"; if (writer->table_name[0]) ds_name = writer->table_name; snprintf(member_header, sizeof(member_header), "%-8.8s" "%-32.32s" "%-8.8s" "%-8.8s" "%-8.8s" "%16.16s", "SAS", ds_name, "SASDATA", "6.06", "bsd4.2", timestamp); retval = xport_write_record(writer, member_header); return retval; } static readstat_error_t xport_write_member_record(readstat_writer_t *writer, char *timestamp) { if (writer->version == 8) return xport_write_member_record_v8(writer, timestamp); readstat_error_t retval = READSTAT_OK; char member_header[RECORD_LEN+1]; char *ds_name = "DATASET"; if (writer->table_name[0]) ds_name = writer->table_name; snprintf(member_header, sizeof(member_header), "%-8.8s" "%-8.8s" "%-8.8s" "%-8.8s" "%-8.8s" "%-24.24s" "%16.16s", "SAS", ds_name, "SASDATA", "6.06", "bsd4.2", "", timestamp); retval = xport_write_record(writer, member_header); return retval; } static readstat_error_t xport_write_file_label_record(readstat_writer_t *writer, char *timestamp) { char member_header[RECORD_LEN+1]; snprintf(member_header, sizeof(member_header), "%16.16s" "%16.16s" "%-40.40s" "%-8.8s", timestamp, "", writer->file_label, "" /* dstype? */); return xport_write_record(writer, member_header); } static readstat_error_t xport_write_namestr_header_record(readstat_writer_t *writer) { xport_header_record_t xrecord = { .name = "NAMESTR", .num2 = writer->variables_count }; if (writer->version == 8) { strcpy(xrecord.name, "NAMSTV8"); } return xport_write_header_record(writer, &xrecord); } static readstat_error_t xport_write_obs_header_record(readstat_writer_t *writer) { xport_header_record_t xrecord = { .name = "OBS" }; if (writer->version == 8) { strcpy(xrecord.name, "OBSV8"); } return xport_write_header_record(writer, &xrecord); } static readstat_error_t xport_format_timestamp(char *output, size_t output_len, time_t timestamp) { struct tm *ts = localtime(×tamp); if (!ts) return READSTAT_ERROR_BAD_TIMESTAMP_VALUE; snprintf(output, output_len, "%02d%3.3s%02d:%02d:%02d:%02d", (unsigned int)ts->tm_mday % 100, _xport_months[ts->tm_mon], (unsigned int)ts->tm_year % 100, (unsigned int)ts->tm_hour % 100, (unsigned int)ts->tm_min % 100, (unsigned int)ts->tm_sec % 100 ); return READSTAT_OK; } static readstat_error_t xport_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t retval = READSTAT_OK; char timestamp[17]; retval = xport_format_timestamp(timestamp, sizeof(timestamp), writer->timestamp); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_first_header_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_first_real_header_record(writer, timestamp); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_record(writer, timestamp); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_member_header_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_descriptor_header_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_member_record(writer, timestamp); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_file_label_record(writer, timestamp); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_namestr_header_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_variables(writer); if (retval != READSTAT_OK) goto cleanup; retval = xport_write_obs_header_record(writer); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t xport_end_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t retval = READSTAT_OK; retval = xport_finish_record(writer); return retval; } static readstat_error_t xport_write_row(void *writer_ctx, void *row, size_t row_len) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; return xport_write_bytes(writer, row, row_len); } static readstat_error_t xport_write_double(void *row, const readstat_variable_t *var, double value) { char full_value[8]; int rc = cnxptiee(&value, CN_TYPE_NATIVE, full_value, CN_TYPE_XPORT); if (rc) return READSTAT_ERROR_CONVERT; memcpy(row, full_value, var->storage_width); return READSTAT_OK; } static readstat_error_t xport_write_float(void *row, const readstat_variable_t *var, float value) { return xport_write_double(row, var, value); } static readstat_error_t xport_write_int32(void *row, const readstat_variable_t *var, int32_t value) { return xport_write_double(row, var, value); } static readstat_error_t xport_write_int16(void *row, const readstat_variable_t *var, int16_t value) { return xport_write_double(row, var, value); } static readstat_error_t xport_write_int8(void *row, const readstat_variable_t *var, int8_t value) { return xport_write_double(row, var, value); } static readstat_error_t xport_write_string(void *row, const readstat_variable_t *var, const char *string) { memset(row, ' ', var->storage_width); if (string != NULL && string[0]) { size_t value_len = strlen(string); if (value_len > var->storage_width) return READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG; memcpy(row, string, value_len); } return READSTAT_OK; } static readstat_error_t xport_write_missing_numeric(void *row, const readstat_variable_t *var) { char *row_bytes = (char *)row; row_bytes[0] = 0x2e; return READSTAT_OK; } static readstat_error_t xport_write_missing_string(void *row, const readstat_variable_t *var) { return xport_write_string(row, var, NULL); } static readstat_error_t xport_write_missing_tagged(void *row, const readstat_variable_t *var, char tag) { char *row_bytes = (char *)row; readstat_error_t error = sas_validate_tag(tag); if (error == READSTAT_OK) { row_bytes[0] = tag; } return error; } static readstat_error_t xport_metadata_ok(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; if (writer->version != 5 && writer->version != 8) return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; if (writer->table_name[0]) { if (writer->version == 8) { return sas_validate_name(writer->table_name, 32); } if (writer->version == 5) { return sas_validate_name(writer->table_name, 8); } } return READSTAT_OK; } readstat_error_t readstat_begin_writing_xport(readstat_writer_t *writer, void *user_ctx, long row_count) { if (writer->version == 0) writer->version = XPORT_DEFAULT_VERISON; writer->callbacks.metadata_ok = &xport_metadata_ok; writer->callbacks.write_int8 = &xport_write_int8; writer->callbacks.write_int16 = &xport_write_int16; writer->callbacks.write_int32 = &xport_write_int32; writer->callbacks.write_float = &xport_write_float; writer->callbacks.write_double = &xport_write_double; writer->callbacks.write_string = &xport_write_string; writer->callbacks.write_missing_string = &xport_write_missing_string; writer->callbacks.write_missing_number = &xport_write_missing_numeric; writer->callbacks.write_missing_tagged = &xport_write_missing_tagged; writer->callbacks.variable_width = &xport_variable_width; writer->callbacks.variable_ok = &sas_validate_variable; writer->callbacks.begin_data = &xport_begin_data; writer->callbacks.end_data = &xport_end_data; writer->callbacks.write_row = &xport_write_row; return readstat_begin_writing_file(writer, user_ctx, row_count); } ReadStat-1.1.7/src/spss/000077500000000000000000000000001410722155500150105ustar00rootroot00000000000000ReadStat-1.1.7/src/spss/readstat_por.c000066400000000000000000000130261410722155500176450ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../CKHashTable.h" #include "../readstat_convert.h" #include "readstat_spss.h" #include "readstat_por.h" int8_t por_ascii_lookup[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '.', '<', '(', '+', '|', '&', '[', ']', '!', '$', '*', ')', ';', '^', '-', '/', '|', ',', '%', '_', '>', '?', '`', ':', '#', '@', '\'', '=', '"', 0, 0, 0, 0, 0, 0, '~', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', '\\', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; uint16_t por_unicode_lookup[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '.', '<', '(', '+', '|', '&', '[', ']', '!', '$', '*', ')', ';', '^', '-', '/', 0x00A3, ',', '%', '_', '>', '?', 0x2018, ':', 0x00A6, '@', 0x2019, '=', '"', 0x2264, 0x25A1, 0x00B1, 0x25A0, 0x00B0, 0x2020, '~', 0x2013, 0x2514, 0x250C, 0x2265, 0x2070, 0x2071, 0x00B2, 0x00B3, 0x2074, 0x2075, 0x2076, 0x2077, 0x2078, 0x2079, 0x2518, 0x2510, 0x2260, 0x2014, 0x207D, 0x207E, 0x2E38, '{', '}', '\\', 0x00A2, 0x2022, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; por_ctx_t *por_ctx_init() { por_ctx_t *ctx = calloc(1, sizeof(por_ctx_t)); ctx->space = ' '; ctx->base30_precision = 20; ctx->var_dict = ck_hash_table_init(1024, 8); return ctx; } void por_ctx_free(por_ctx_t *ctx) { if (ctx->string_buffer) free(ctx->string_buffer); if (ctx->varinfo) { int i; for (i=0; ivar_count; i++) { if (ctx->varinfo[i].label) free(ctx->varinfo[i].label); } free(ctx->varinfo); } if (ctx->variables) { int i; for (i=0; ivar_count; i++) { if (ctx->variables[i]) free(ctx->variables[i]); } free(ctx->variables); } if (ctx->var_dict) ck_hash_table_free(ctx->var_dict); if (ctx->converter) iconv_close(ctx->converter); free(ctx); } ssize_t por_utf8_encode(const unsigned char *input, size_t input_len, char *output, size_t output_len, uint16_t lookup[256]) { int offset = 0; int i; for (i=0; i output_len) return offset; output[offset++] = codepoint; } else { if (codepoint <= 0x07FF) { if (offset + 2 > output_len) return offset; } else /* if (codepoint <= 0xFFFF) */{ if (offset + 3 > output_len) return offset; } /* TODO - For some reason that replacement character isn't recognized * by some systems, so be prepared to insert an ASCII space instead */ int printed = sprintf(output + offset, "%lc", codepoint); if (printed > 0) { offset += printed; } else { output[offset++] = ' '; } } } return offset; } ssize_t por_utf8_decode( const char *input, size_t input_len, char *output, size_t output_len, uint8_t *lookup, size_t lookup_len) { int offset = 0; wchar_t codepoint = 0; while (1) { int char_len = 0; if (offset + 1 > output_len) return offset; unsigned char val = *input; if (val >= 0x20 && val < 0x7F) { if (!lookup[val]) return -1; output[offset++] = lookup[val]; input++; } else { int conversions = sscanf(input, "%lc%n", &codepoint, &char_len); if (conversions == 0 || codepoint >= lookup_len || lookup[codepoint] == 0) { return -1; } output[offset++] = lookup[codepoint]; input += char_len; } } return offset; } ReadStat-1.1.7/src/spss/readstat_por.h000066400000000000000000000024031410722155500176470ustar00rootroot00000000000000 extern int8_t por_ascii_lookup[256]; extern uint16_t por_unicode_lookup[256]; typedef struct por_ctx_s { readstat_callbacks_t handle; size_t file_size; void *user_ctx; int pos; readstat_io_t *io; char space; long num_spaces; time_t timestamp; long version; char fweight_name[9]; char file_label[21]; uint16_t byte2unicode[256]; size_t base30_precision; iconv_t converter; unsigned char *string_buffer; size_t string_buffer_len; int labels_offset; int obs_count; int var_count; int var_offset; int row_limit; int row_offset; readstat_variable_t **variables; spss_varinfo_t *varinfo; ck_hash_table_t *var_dict; } por_ctx_t; por_ctx_t *por_ctx_init(); void por_ctx_free(por_ctx_t *ctx); ssize_t por_utf8_encode(const unsigned char *input, size_t input_len, char *output, size_t output_len, uint16_t lookup[256]); ssize_t por_utf8_decode( const char *input, size_t input_len, char *output, size_t output_len, uint8_t *lookup, size_t lookup_len); ReadStat-1.1.7/src/spss/readstat_por_parse.c000066400000000000000000000160401410722155500210360ustar00rootroot00000000000000#line 1 "src/spss/readstat_por_parse.rl" #include #include "../readstat.h" #include "readstat_por_parse.h" #line 9 "src/spss/readstat_por_parse.c" static const signed char _por_field_parse_actions[] = { 0, 1, 0, 1, 1, 1, 5, 1, 8, 1, 9, 1, 10, 2, 2, 0, 2, 3, 1, 2, 5, 10, 2, 7, 10, 3, 4, 2, 0, 3, 6, 2, 0, 0 }; static const signed char _por_field_parse_key_offsets[] = { 0, 0, 8, 9, 14, 18, 23, 31, 35, 40, 44, 48, 55, 0 }; static const char _por_field_parse_trans_keys[] = { 32, 42, 45, 46, 48, 57, 65, 84, 46, 46, 48, 57, 65, 84, 48, 57, 65, 84, 47, 48, 57, 65, 84, 43, 45, 46, 47, 48, 57, 65, 84, 48, 57, 65, 84, 47, 48, 57, 65, 84, 48, 57, 65, 84, 48, 57, 65, 84, 43, 45, 47, 48, 57, 65, 84, 0 }; static const signed char _por_field_parse_single_lengths[] = { 0, 4, 1, 1, 0, 1, 4, 0, 1, 0, 0, 3, 0, 0 }; static const signed char _por_field_parse_range_lengths[] = { 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0 }; static const signed char _por_field_parse_index_offsets[] = { 0, 0, 7, 9, 13, 16, 20, 27, 30, 34, 37, 40, 46, 0 }; static const signed char _por_field_parse_cond_targs[] = { 1, 2, 3, 4, 6, 6, 0, 12, 0, 4, 6, 6, 0, 5, 5, 0, 12, 5, 5, 0, 7, 9, 10, 12, 6, 6, 0, 8, 8, 0, 12, 8, 8, 0, 8, 8, 0, 11, 11, 0, 7, 9, 12, 11, 11, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0 }; static const signed char _por_field_parse_cond_actions[] = { 0, 9, 0, 0, 13, 13, 0, 11, 0, 7, 25, 25, 0, 16, 16, 0, 11, 3, 3, 0, 5, 5, 5, 19, 1, 1, 0, 13, 13, 0, 22, 1, 1, 0, 29, 29, 0, 16, 16, 0, 0, 0, 11, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const int por_field_parse_start = 1; static const int por_field_parse_en_main = 1; #line 9 "src/spss/readstat_por_parse.rl" ssize_t readstat_por_parse_double(const char *data, size_t len, double *result, readstat_error_handler error_cb, void *user_ctx) { ssize_t retval = 0; double val = 0.0; double denom = 30.0; double temp_frac = 0.0; double num = 0.0; double exp = 0.0; double temp_val = 0.0; const unsigned char *p = (const unsigned char *)data; const unsigned char *pe = p + len; int cs; int is_negative = 0, exp_is_negative = 0; int success = 0; #line 97 "src/spss/readstat_por_parse.c" { cs = (int)por_field_parse_start; } #line 102 "src/spss/readstat_por_parse.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe ) goto _out; _keys = ( _por_field_parse_trans_keys + (_por_field_parse_key_offsets[cs])); _trans = (unsigned int)_por_field_parse_index_offsets[cs]; _klen = (int)_por_field_parse_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_por_field_parse_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} cs = (int)_por_field_parse_cond_targs[_trans]; if ( _por_field_parse_cond_actions[_trans] != 0 ) { _acts = ( _por_field_parse_actions + (_por_field_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 30 "src/spss/readstat_por_parse.rl" if ((( (*( p)))) >= '0' && (( (*( p)))) <= '9') { temp_val = 30 * temp_val + ((( (*( p)))) - '0'); } else if ((( (*( p)))) >= 'A' && (( (*( p)))) <= 'T') { temp_val = 30 * temp_val + (10 + (( (*( p)))) - 'A'); } } #line 184 "src/spss/readstat_por_parse.c" break; } case 1: { { #line 38 "src/spss/readstat_por_parse.rl" if ((( (*( p)))) >= '0' && (( (*( p)))) <= '9') { temp_frac += ((( (*( p)))) - '0') / denom; } else if ((( (*( p)))) >= 'A' && (( (*( p)))) <= 'T') { temp_frac += (10 + (( (*( p)))) - 'A') / denom; } denom *= 30.0; } #line 200 "src/spss/readstat_por_parse.c" break; } case 2: { { #line 47 "src/spss/readstat_por_parse.rl" temp_val = 0; } #line 209 "src/spss/readstat_por_parse.c" break; } case 3: { { #line 49 "src/spss/readstat_por_parse.rl" temp_frac = 0.0; } #line 218 "src/spss/readstat_por_parse.c" break; } case 4: { { #line 53 "src/spss/readstat_por_parse.rl" is_negative = 1; } #line 227 "src/spss/readstat_por_parse.c" break; } case 5: { { #line 53 "src/spss/readstat_por_parse.rl" num = temp_val; } #line 236 "src/spss/readstat_por_parse.c" break; } case 6: { { #line 54 "src/spss/readstat_por_parse.rl" exp_is_negative = 1; } #line 245 "src/spss/readstat_por_parse.c" break; } case 7: { { #line 54 "src/spss/readstat_por_parse.rl" exp = temp_val; } #line 254 "src/spss/readstat_por_parse.c" break; } case 8: { { #line 56 "src/spss/readstat_por_parse.rl" is_negative = 1; } #line 263 "src/spss/readstat_por_parse.c" break; } case 9: { { #line 58 "src/spss/readstat_por_parse.rl" val = NAN; } #line 272 "src/spss/readstat_por_parse.c" break; } case 10: { { #line 60 "src/spss/readstat_por_parse.rl" success = 1; {p += 1; goto _out; } } #line 281 "src/spss/readstat_por_parse.c" break; } } _nacts -= 1; _acts += 1; } } if ( cs != 0 ) { p += 1; goto _resume; } _out: {} } #line 64 "src/spss/readstat_por_parse.rl" if (!isnan(val)) { val = 1.0 * num + temp_frac; if (exp_is_negative) exp *= -1; if (exp) { val *= pow(30.0, exp); } if (is_negative) val *= -1; } if (!success) { retval = -1; if (error_cb) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Read bytes: %ld String: %.*s Ending state: %d", (long)(p - (const unsigned char *)data), (int)len, data, cs); error_cb(error_buf, user_ctx); } } if (retval == 0) { if (result) *result = val; retval = (p - (const unsigned char *)data); } /* suppress warning */ (void)por_field_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_por_parse.h000066400000000000000000000002531410722155500210420ustar00rootroot00000000000000// // readstat_por_parse.h // ssize_t readstat_por_parse_double(const char *data, size_t len, double *result, readstat_error_handler error_cb, void *user_ctx); ReadStat-1.1.7/src/spss/readstat_por_parse.rl000066400000000000000000000051571410722155500212400ustar00rootroot00000000000000#include #include "../readstat.h" #include "readstat_por_parse.h" %%{ machine por_field_parse; write data nofinal noerror; }%% ssize_t readstat_por_parse_double(const char *data, size_t len, double *result, readstat_error_handler error_cb, void *user_ctx) { ssize_t retval = 0; double val = 0.0; double denom = 30.0; double temp_frac = 0.0; double num = 0.0; double exp = 0.0; double temp_val = 0.0; const unsigned char *p = (const unsigned char *)data; const unsigned char *pe = p + len; int cs; int is_negative = 0, exp_is_negative = 0; int success = 0; %%{ action incr_val { if (fc >= '0' && fc <= '9') { temp_val = 30 * temp_val + (fc - '0'); } else if (fc >= 'A' && fc <= 'T') { temp_val = 30 * temp_val + (10 + fc - 'A'); } } action incr_frac { if (fc >= '0' && fc <= '9') { temp_frac += (fc - '0') / denom; } else if (fc >= 'A' && fc <= 'T') { temp_frac += (10 + fc - 'A') / denom; } denom *= 30.0; } value = [0-9A-T]+ >{ temp_val = 0; } $incr_val; frac_value = [0-9A-T]+ >{ temp_frac = 0.0; } $incr_frac; fraction = "." frac_value; nonmissing_value = (("-" %{ is_negative = 1; })? value %{ num = temp_val; } fraction? ( ("+" | "-" %{ exp_is_negative = 1; }) value %{ exp = temp_val; })?) "/"; nonmissing_fraction = ("-" %{ is_negative = 1; })? fraction "/"; missing_value = "*." >{ val = NAN; }; main := " "* (missing_value | nonmissing_value | nonmissing_fraction ) @{ success = 1; fbreak; }; write init; write exec; }%% if (!isnan(val)) { val = 1.0 * num + temp_frac; if (exp_is_negative) exp *= -1; if (exp) { val *= pow(30.0, exp); } if (is_negative) val *= -1; } if (!success) { retval = -1; if (error_cb) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Read bytes: %ld String: %.*s Ending state: %d", (long)(p - (const unsigned char *)data), (int)len, data, cs); error_cb(error_buf, user_ctx); } } if (retval == 0) { if (result) *result = val; retval = (p - (const unsigned char *)data); } /* suppress warning */ (void)por_field_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_por_read.c000066400000000000000000000715271410722155500206520ustar00rootroot00000000000000// // readstat_por.c // #include #include #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" #include "../CKHashTable.h" #include "readstat_por_parse.h" #include "readstat_spss.h" #include "readstat_por.h" #define POR_LINE_LENGTH 80 #define POR_LABEL_NAME_PREFIX "labels" #define POR_FORMAT_SHIFT 82 #define MAX_FORMAT_TYPE (POR_FORMAT_SHIFT+SPSS_FORMAT_TYPE_YMDHMS) #define MAX_FORMAT_WIDTH 20000 #define MAX_FORMAT_DECIMALS 100 #define MAX_STRING_LENGTH 20000 #define MAX_VARS 1000000 #define MAX_WIDTH 1000000 #define MAX_LINES 1000000 #define MAX_STRINGS 1000000 #define MAX_LABELS 1000000 static ssize_t read_bytes(por_ctx_t *ctx, void *dst, size_t len); static readstat_error_t read_string(por_ctx_t *ctx, char *data, size_t len); static readstat_error_t por_update_progress(por_ctx_t *ctx) { readstat_io_t *io = ctx->io; return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx); } static ssize_t read_bytes(por_ctx_t *ctx, void *dst, size_t len) { char *dst_pos = (char *)dst; readstat_io_t *io = ctx->io; char byte; while (dst_pos < (char *)dst + len) { if (ctx->num_spaces) { *dst_pos++ = ctx->space; ctx->num_spaces--; continue; } ssize_t bytes_read = io->read(&byte, 1, io->io_ctx); if (bytes_read == 0) { break; } if (bytes_read == -1) { return -1; } if (byte == '\r' || byte == '\n') { if (byte == '\r') { bytes_read = io->read(&byte, 1, io->io_ctx); if (bytes_read == 0 || bytes_read == -1 || byte != '\n') return -1; } ctx->num_spaces = POR_LINE_LENGTH - ctx->pos; ctx->pos = 0; continue; } else if (ctx->pos == POR_LINE_LENGTH) { return -1; } *dst_pos++ = byte; ctx->pos++; } return (int)(dst_pos - (char *)dst); } static uint16_t read_tag(por_ctx_t *ctx) { unsigned char tag; if (read_bytes(ctx, &tag, 1) != 1) { return -1; } return ctx->byte2unicode[tag]; } static readstat_error_t read_double_with_peek(por_ctx_t *ctx, double *out_double, unsigned char peek) { readstat_error_t retval = READSTAT_OK; double value = NAN; unsigned char buffer[100]; char utf8_buffer[300]; char error_buf[1024]; int64_t len = 0; ssize_t bytes_read = 0; buffer[0] = peek; bytes_read = read_bytes(ctx, &buffer[1], 1); if (bytes_read != 1) return READSTAT_ERROR_PARSE; if (ctx->byte2unicode[buffer[0]] == '*' && ctx->byte2unicode[buffer[1]] == '.') { if (out_double) *out_double = NAN; return READSTAT_OK; } int64_t i=2; while (ibyte2unicode[buffer[i-1]] != '/') { bytes_read = read_bytes(ctx, &buffer[i], 1); if (bytes_read != 1) return READSTAT_ERROR_PARSE; i++; } if (i == sizeof(buffer)) { return READSTAT_ERROR_PARSE; } len = por_utf8_encode(buffer, i, utf8_buffer, sizeof(utf8_buffer), ctx->byte2unicode); if (len == -1) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error converting double string (length=%" PRId64 "): %.*s", i, (int)i, buffer); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_CONVERT; goto cleanup; } bytes_read = readstat_por_parse_double(utf8_buffer, len, &value, ctx->handle.error, ctx->user_ctx); if (bytes_read == -1) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error parsing double string (length=%" PRId64 "): %.*s [%s]", len, (int)len, utf8_buffer, buffer); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: if (out_double) *out_double = value; return retval; } static readstat_error_t read_double(por_ctx_t *ctx, double *out_double) { unsigned char peek; size_t bytes_read = read_bytes(ctx, &peek, 1); if (bytes_read != 1) return READSTAT_ERROR_PARSE; return read_double_with_peek(ctx, out_double, peek); } static readstat_error_t read_integer_in_range(por_ctx_t *ctx, int min, int max, int *out_integer) { double dval = NAN; readstat_error_t retval = read_double(ctx, &dval); if (retval != READSTAT_OK) return retval; if (isnan(dval) || dval < min || dval > max) return READSTAT_ERROR_PARSE; if (out_integer) *out_integer = (int)dval; return READSTAT_OK; } static readstat_error_t maybe_read_double(por_ctx_t *ctx, double *out_double, int *out_finished) { unsigned char peek; size_t bytes_read = read_bytes(ctx, &peek, 1); if (bytes_read != 1) return READSTAT_ERROR_PARSE; if (ctx->byte2unicode[peek] == 'Z') { if (out_double) *out_double = NAN; if (out_finished) *out_finished = 1; return READSTAT_OK; } if (out_finished) *out_finished = 0; return read_double_with_peek(ctx, out_double, peek); } static readstat_error_t maybe_read_string(por_ctx_t *ctx, char *data, size_t len, int *out_finished) { readstat_error_t retval = READSTAT_OK; double value; int finished = 0; char error_buf[1024]; size_t string_length = 0; retval = maybe_read_double(ctx, &value, &finished); if (retval != READSTAT_OK || finished) { if (out_finished) *out_finished = finished; return retval; } if (value < 0 || value > MAX_STRING_LENGTH || isnan(value)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } string_length = (size_t)value; if (string_length > ctx->string_buffer_len) { ctx->string_buffer_len = string_length; ctx->string_buffer = realloc(ctx->string_buffer, ctx->string_buffer_len); memset(ctx->string_buffer, 0, ctx->string_buffer_len); } if (read_bytes(ctx, ctx->string_buffer, string_length) == -1) { retval = READSTAT_ERROR_READ; goto cleanup; } size_t bytes_encoded = por_utf8_encode(ctx->string_buffer, string_length, data, len - 1, ctx->byte2unicode); if (bytes_encoded == -1) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error converting string: %.*s", (int)string_length, ctx->string_buffer); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_CONVERT; goto cleanup; } data[bytes_encoded] = '\0'; if (out_finished) *out_finished = 0; cleanup: return retval; } static readstat_error_t read_string(por_ctx_t *ctx, char *data, size_t len) { int finished = 0; readstat_error_t retval = maybe_read_string(ctx, data, len, &finished); if (retval == READSTAT_OK && finished) { return READSTAT_ERROR_PARSE; } return retval; } static readstat_error_t read_variable_count_record(por_ctx_t *ctx) { int value; readstat_error_t retval = READSTAT_OK; if (ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if ((retval = read_integer_in_range(ctx, 0, MAX_VARS, &value)) != READSTAT_OK) { goto cleanup; } ctx->var_count = value; ctx->variables = readstat_calloc(ctx->var_count, sizeof(readstat_variable_t *)); ctx->varinfo = readstat_calloc(ctx->var_count, sizeof(spss_varinfo_t)); if (ctx->variables == NULL || ctx->varinfo == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->handle.metadata) { readstat_metadata_t metadata = { .row_count = -1, .var_count = ctx->var_count, .creation_time = ctx->timestamp, .modified_time = ctx->timestamp, .file_format_version = ctx->version, .file_label = ctx->file_label }; if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; } static readstat_error_t read_precision_record(por_ctx_t *ctx) { int precision = 0; readstat_error_t error = read_integer_in_range(ctx, 0, 100, &precision); if (error == READSTAT_OK) ctx->base30_precision = precision; return error; } static readstat_error_t read_case_weight_record(por_ctx_t *ctx) { return read_string(ctx, ctx->fweight_name, sizeof(ctx->fweight_name)); } static readstat_error_t read_variable_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int value; int i; spss_varinfo_t *varinfo = NULL; spss_format_t *formats[2]; ctx->var_offset++; if (ctx->var_offset == ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; formats[0] = &varinfo->print_format; formats[1] = &varinfo->write_format; varinfo->labels_index = -1; if ((retval = read_integer_in_range(ctx, 0, MAX_WIDTH, &value)) != READSTAT_OK) { goto cleanup; } varinfo->width = value; if (varinfo->width == 0) { varinfo->type = READSTAT_TYPE_DOUBLE; } else { varinfo->type = READSTAT_TYPE_STRING; } if ((retval = read_string(ctx, varinfo->name, sizeof(varinfo->name))) != READSTAT_OK) { goto cleanup; } ck_str_hash_insert(varinfo->name, varinfo, ctx->var_dict); for (i=0; i POR_FORMAT_SHIFT) { // Some files in the wild have their format types shifted by 82 for date/time values // I have no idea why, but see test files linked from: // https://github.com/WizardMac/ReadStat/issues/158 format->type = value - POR_FORMAT_SHIFT; } else { format->type = value; } if ((retval = read_integer_in_range(ctx, 0, MAX_FORMAT_WIDTH, &value)) != READSTAT_OK) { goto cleanup; } format->width = value; if ((retval = read_integer_in_range(ctx, 0, MAX_FORMAT_DECIMALS, &value)) != READSTAT_OK) { goto cleanup; } format->decimal_places = value; } cleanup: return retval; } static readstat_error_t read_missing_value_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; spss_varinfo_t *varinfo = NULL; if (ctx->var_offset < 0 || ctx->var_offset >= ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; if (varinfo->type == READSTAT_TYPE_DOUBLE) { if ((retval = read_double(ctx, &varinfo->missing_double_values[varinfo->n_missing_values])) != READSTAT_OK) { goto cleanup; } } else { if ((retval = read_string(ctx, varinfo->missing_string_values[varinfo->n_missing_values], sizeof(varinfo->missing_string_values[varinfo->n_missing_values]))) != READSTAT_OK) { goto cleanup; } } if (varinfo->n_missing_values > 2) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo->n_missing_values++; cleanup: return retval; } static readstat_error_t read_missing_value_range_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; spss_varinfo_t *varinfo = NULL; if (ctx->var_offset < 0 || ctx->var_offset == ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; varinfo->missing_range = 1; varinfo->n_missing_values = 2; if (varinfo->type == READSTAT_TYPE_DOUBLE) { if ((retval = read_double(ctx, &varinfo->missing_double_values[0])) != READSTAT_OK) { goto cleanup; } if ((retval = read_double(ctx, &varinfo->missing_double_values[1])) != READSTAT_OK) { goto cleanup; } } else { if ((retval = read_string(ctx, varinfo->missing_string_values[0], sizeof(varinfo->missing_string_values[0]))) != READSTAT_OK) { goto cleanup; } if ((retval = read_string(ctx, varinfo->missing_string_values[1], sizeof(varinfo->missing_string_values[1]))) != READSTAT_OK) { goto cleanup; } } cleanup: return retval; } static readstat_error_t read_missing_value_lo_range_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; spss_varinfo_t *varinfo = NULL; if (ctx->var_offset < 0 || ctx->var_offset == ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; varinfo->missing_range = 1; varinfo->n_missing_values = 2; if (varinfo->type == READSTAT_TYPE_DOUBLE) { varinfo->missing_double_values[0] = -HUGE_VAL; if ((retval = read_double(ctx, &varinfo->missing_double_values[1])) != READSTAT_OK) { goto cleanup; } } else { varinfo->missing_string_values[0][0] = '\0'; if ((retval = read_string(ctx, varinfo->missing_string_values[1], sizeof(varinfo->missing_string_values[1]))) != READSTAT_OK) { goto cleanup; } } cleanup: return retval; } static readstat_error_t read_missing_value_hi_range_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; spss_varinfo_t *varinfo = NULL; if (ctx->var_offset < 0 || ctx->var_offset == ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; varinfo->missing_range = 1; varinfo->n_missing_values = 2; if (varinfo->type == READSTAT_TYPE_DOUBLE) { if ((retval = read_double(ctx, &varinfo->missing_double_values[0])) != READSTAT_OK) { goto cleanup; } varinfo->missing_double_values[1] = HUGE_VAL; } else { if ((retval = read_string(ctx, varinfo->missing_string_values[0], sizeof(varinfo->missing_string_values[0]))) != READSTAT_OK) { goto cleanup; } varinfo->missing_string_values[1][0] = '\0'; } cleanup: return retval; } static readstat_error_t read_document_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; char string[256]; int i; int line_count = 0; if ((retval = read_integer_in_range(ctx, 0, MAX_LINES, &line_count)) != READSTAT_OK) { goto cleanup; } for (i=0; ihandle.note) { if (ctx->handle.note(i, string, ctx->user_ctx) != READSTAT_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } cleanup: return retval; } static readstat_error_t read_variable_label_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; char string[256]; spss_varinfo_t *varinfo = NULL; if (ctx->var_offset < 0 || ctx->var_offset == ctx->var_count) { retval = READSTAT_ERROR_PARSE; goto cleanup; } varinfo = &ctx->varinfo[ctx->var_offset]; if ((retval = read_string(ctx, string, sizeof(string))) != READSTAT_OK) { goto cleanup; } varinfo->label = realloc(varinfo->label, 4*strlen(string) + 1); retval = readstat_convert(varinfo->label, 4*strlen(string) + 1, string, strlen(string), ctx->converter); cleanup: return retval; } static readstat_error_t read_value_label_record(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; double dval; int i; char string[256]; int count = 0, label_count = 0; char label_name_buf[256]; char label_buf[256]; snprintf(label_name_buf, sizeof(label_name_buf), POR_LABEL_NAME_PREFIX "%d", ctx->labels_offset); readstat_type_t value_type = READSTAT_TYPE_DOUBLE; if ((retval = read_integer_in_range(ctx, 0, MAX_STRINGS, &count)) != READSTAT_OK) { goto cleanup; } for (i=0; ivar_dict); if (info) { value_type = info->type; info->labels_index = ctx->labels_offset; } } if ((retval = read_integer_in_range(ctx, 0, MAX_LABELS, &label_count)) != READSTAT_OK) { goto cleanup; } for (i=0; ihandle.value_label) { if (ctx->handle.value_label(label_name_buf, value, label_buf, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } ctx->labels_offset++; cleanup: return retval; } static readstat_error_t read_por_file_data(por_ctx_t *ctx) { int i; char input_string[256]; char output_string[4*256+1]; char error_buf[1024]; readstat_error_t rs_retval = READSTAT_OK; if (ctx->var_count == 0) return READSTAT_OK; while (1) { int finished = 0; for (i=0; ivar_count; i++) { spss_varinfo_t *info = &ctx->varinfo[i]; readstat_value_t value = { .type = info->type }; if (info->type == READSTAT_TYPE_STRING) { rs_retval = maybe_read_string(ctx, input_string, sizeof(input_string), &finished); if (rs_retval != READSTAT_OK) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error in %s (row=%d)", info->name, ctx->obs_count+1); ctx->handle.error(error_buf, ctx->user_ctx); } goto cleanup; } else if (finished) { if (i != 0) rs_retval = READSTAT_ERROR_PARSE; goto cleanup; } rs_retval = readstat_convert(output_string, sizeof(output_string), input_string, strlen(input_string), ctx->converter); if (rs_retval != READSTAT_OK) { goto cleanup; } value.v.string_value = output_string; } else if (info->type == READSTAT_TYPE_DOUBLE) { rs_retval = maybe_read_double(ctx, &value.v.double_value, &finished); if (rs_retval != READSTAT_OK) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error in %s (row=%d)", info->name, ctx->obs_count+1); ctx->handle.error(error_buf, ctx->user_ctx); } goto cleanup; } else if (finished) { if (i != 0) rs_retval = READSTAT_ERROR_PARSE; goto cleanup; } value.is_system_missing = isnan(value.v.double_value); } if (ctx->handle.value && !ctx->variables[i]->skip && !ctx->row_offset) { if (ctx->handle.value(ctx->obs_count, ctx->variables[i], value, ctx->user_ctx) != READSTAT_HANDLER_OK) { rs_retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } if (ctx->row_offset) { ctx->row_offset--; } else { ctx->obs_count++; } rs_retval = por_update_progress(ctx); if (rs_retval != READSTAT_OK) break; if (ctx->row_limit > 0 && ctx->obs_count == ctx->row_limit) break; } cleanup: return rs_retval; } readstat_error_t read_version_and_timestamp(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; char string[256]; struct tm timestamp = { .tm_isdst = -1 }; unsigned char version; if (read_bytes(ctx, &version, sizeof(version)) != sizeof(version)) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = read_string(ctx, string, sizeof(string))) != READSTAT_OK) { /* creation date */ goto cleanup; } if (sscanf(string, "%04d%02d%02d", ×tamp.tm_year, ×tamp.tm_mon, ×tamp.tm_mday) != 3) { retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; goto cleanup; } if ((retval = read_string(ctx, string, sizeof(string))) != READSTAT_OK) { /* creation time */ goto cleanup; } if (sscanf(string, "%02d%02d%02d", ×tamp.tm_hour, ×tamp.tm_min, ×tamp.tm_sec) != 3) { retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; goto cleanup; } timestamp.tm_year -= 1900; timestamp.tm_mon--; ctx->timestamp = mktime(×tamp); ctx->version = ctx->byte2unicode[version] - 'A'; cleanup: return retval; } readstat_error_t handle_variables(por_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; int index_after_skipping = 0; for (i=0; ivar_count; i++) { char label_name_buf[256]; spss_varinfo_t *info = &ctx->varinfo[i]; info->index = i; ctx->variables[i] = spss_init_variable_for_info(info, index_after_skipping, ctx->converter); snprintf(label_name_buf, sizeof(label_name_buf), POR_LABEL_NAME_PREFIX "%d", info->labels_index); int cb_retval = READSTAT_HANDLER_OK; if (ctx->handle.variable) { cb_retval = ctx->handle.variable(i, ctx->variables[i], info->labels_index == -1 ? NULL : label_name_buf, ctx->user_ctx); } if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { ctx->variables[i]->skip = 1; } else { index_after_skipping++; } } if (ctx->handle.fweight && ctx->fweight_name[0]) { for (i=0; ivar_count; i++) { spss_varinfo_t *info = &ctx->varinfo[i]; if (strcmp(info->name, ctx->fweight_name) == 0) { if (ctx->handle.fweight(ctx->variables[i], ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } break; } } } cleanup: return retval; } readstat_error_t readstat_parse_por(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; unsigned char reverse_lookup[256]; char vanity[5][40]; char error_buf[1024]; por_ctx_t *ctx = por_ctx_init(); ctx->handle = parser->handlers; ctx->user_ctx = user_ctx; ctx->io = io; ctx->row_limit = parser->row_limit; if (parser->row_offset > 0) ctx->row_offset = parser->row_offset; if (parser->output_encoding) { if (strcmp(parser->output_encoding, "UTF-8") != 0) ctx->converter = iconv_open(parser->output_encoding, "UTF-8"); if (ctx->converter == (iconv_t)-1) { ctx->converter = NULL; retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } } if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((ctx->file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx)) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (read_bytes(ctx, vanity, sizeof(vanity)) != sizeof(vanity)) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), vanity[1] + 20, 20, NULL); if (retval != READSTAT_OK) goto cleanup; if (read_bytes(ctx, reverse_lookup, sizeof(reverse_lookup)) != sizeof(reverse_lookup)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx->space = reverse_lookup[126]; int i; for (i=0; i<256; i++) { if (por_ascii_lookup[i]) { ctx->byte2unicode[reverse_lookup[i]] = por_ascii_lookup[i]; } else if (por_unicode_lookup[i]) { ctx->byte2unicode[reverse_lookup[i]] = por_unicode_lookup[i]; } } ctx->byte2unicode[reverse_lookup[64]] = por_unicode_lookup[64]; unsigned char check[8]; char tr_check[8]; if (read_bytes(ctx, check, sizeof(check)) != sizeof(check)) { retval = READSTAT_ERROR_READ; goto cleanup; } ssize_t encoded_len; if ((encoded_len = por_utf8_encode(check, sizeof(check), tr_check, sizeof(tr_check), ctx->byte2unicode)) == -1) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error converting check string: %.*s", (int)sizeof(check), check); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_CONVERT; goto cleanup; } if (strncmp("SPSSPORT", tr_check, encoded_len) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->var_offset = -1; char string[256]; retval = read_version_and_timestamp(ctx); if (retval != READSTAT_OK) goto cleanup; while (1) { uint16_t tr_tag = read_tag(ctx); switch (tr_tag) { case '1': /* product ID */ case '2': /* author ID */ case '3': /* sub-product ID */ retval = read_string(ctx, string, sizeof(string)); break; case '4': /* variable count */ retval = read_variable_count_record(ctx); break; case '5': /* precision */ retval = read_precision_record(ctx); break; case '6': /* case weight */ retval = read_case_weight_record(ctx); break; case '7': /* variable */ retval = read_variable_record(ctx); break; case '8': /* missing value */ retval = read_missing_value_record(ctx); break; case 'B': /* missing value range */ retval = read_missing_value_range_record(ctx); break; case '9': /* LO THRU x */ retval = read_missing_value_lo_range_record(ctx); break; case 'A': /* x THRU HI */ retval = read_missing_value_hi_range_record(ctx); break; case 'C': /* variable label */ retval = read_variable_label_record(ctx); break; case 'D': /* value label */ retval = read_value_label_record(ctx); break; case 'E': /* document record */ retval = read_document_record(ctx); break; case 'F': /* file data */ if (ctx->var_offset != ctx->var_count - 1) { retval = READSTAT_ERROR_COLUMN_COUNT_MISMATCH; goto cleanup; } retval = handle_variables(ctx); if (retval != READSTAT_OK) goto cleanup; if (ctx->handle.value) { retval = read_por_file_data(ctx); } goto cleanup; default: retval = READSTAT_ERROR_PARSE; goto cleanup; } if (retval != READSTAT_OK) break; } cleanup: io->close(io->io_ctx); por_ctx_free(ctx); return retval; } ReadStat-1.1.7/src/spss/readstat_por_write.c000066400000000000000000000646111410722155500210650ustar00rootroot00000000000000 #include #include #include #include #include #include "../readstat.h" #include "../CKHashTable.h" #include "../readstat_writer.h" #include "readstat_spss.h" #include "readstat_por.h" #define POR_BASE30_PRECISION 50 typedef struct por_write_ctx_s { unsigned char *unicode2byte; size_t unicode2byte_len; } por_write_ctx_t; static inline char por_encode_base30_digit(uint64_t digit) { if (digit < 10) return '0' + digit; return 'A' + (digit - 10); } static int por_write_base30_integer(char *string, size_t string_len, uint64_t integer) { int start = 0; int end = 0; int offset = 0; while (integer) { string[offset++] = por_encode_base30_digit(integer % 30); integer /= 30; } end = offset; offset--; while (offset > start) { char tmp = string[start]; string[start] = string[offset]; string[offset] = tmp; offset--; start++; } return end; } static readstat_error_t por_finish(readstat_writer_t *writer) { return readstat_write_line_padding(writer, 'Z', 80, "\r\n"); } static readstat_error_t por_write_bytes(readstat_writer_t *writer, const void *bytes, size_t len) { return readstat_write_bytes_as_lines(writer, bytes, len, 80, "\r\n"); } static readstat_error_t por_write_string_n(readstat_writer_t *writer, por_write_ctx_t *ctx, const char *string, size_t input_len) { char error_buf[1024]; readstat_error_t retval = READSTAT_OK; char *por_string = malloc(input_len); ssize_t output_len = por_utf8_decode(string, input_len, por_string, input_len, ctx->unicode2byte, ctx->unicode2byte_len); if (output_len == -1) { if (writer->error_handler) { snprintf(error_buf, sizeof(error_buf), "Error converting string (length=%" PRId64 "): %.*s", (int64_t)input_len, (int)input_len, string); writer->error_handler(error_buf, writer->user_ctx); } retval = READSTAT_ERROR_CONVERT; goto cleanup; } retval = por_write_bytes(writer, por_string, output_len); cleanup: if (por_string) free(por_string); return retval; } static readstat_error_t por_write_tag(readstat_writer_t *writer, por_write_ctx_t *ctx, char tag) { char string[2]; string[0] = tag; string[1] = '\0'; return por_write_string_n(writer, ctx, string, 1); } static ssize_t por_write_double_to_buffer(char *string, size_t buffer_len, double value, long precision) { int offset = 0; if (isnan(value)) { string[offset++] = '*'; string[offset++] = '.'; } else if (isinf(value)) { if (value < 0.0) { string[offset++] = '-'; } string[offset++] = '1'; string[offset++] = '+'; string[offset++] = 'T'; string[offset++] = 'T'; string[offset++] = '/'; } else { long integers_printed = 0; double integer_part; double fraction = modf(fabs(value), &integer_part); int64_t integer = integer_part; int64_t exponent = 0; if (value < 0.0) { string[offset++] = '-'; } if (integer == 0) { string[offset++] = '0'; } else { while (fraction == 0 && integer != 0 && (integer % 30) == 0) { integer /= 30; exponent++; } integers_printed = por_write_base30_integer(&string[offset], buffer_len - offset, integer); offset += integers_printed; } /* should use exponents for efficiency, but this works */ if (fraction) { string[offset++] = '.'; } while (fraction && integers_printed < precision) { fraction = modf(fraction * 30, &integer_part); integer = integer_part; if (integer < 0) { return -1; } else { string[offset++] = por_encode_base30_digit(integer); } integers_printed++; } if (exponent) { string[offset++] = '+'; offset += por_write_base30_integer(&string[offset], buffer_len - offset, exponent); } string[offset++] = '/'; } string[offset] = '\0'; return offset; } static readstat_error_t por_write_double(readstat_writer_t *writer, por_write_ctx_t *ctx, double value) { char error_buf[1024]; char string[256]; ssize_t bytes_written = por_write_double_to_buffer(string, sizeof(string), value, POR_BASE30_PRECISION); if (bytes_written == -1) { if (writer->error_handler) { snprintf(error_buf, sizeof(error_buf), "Unable to encode number: %lf", value); writer->error_handler(error_buf, writer->user_ctx); } return READSTAT_ERROR_WRITE; } return por_write_string_n(writer, ctx, string, bytes_written); } static readstat_error_t por_write_string_field_n(readstat_writer_t *writer, por_write_ctx_t *ctx, const char *string, size_t len) { readstat_error_t error = por_write_double(writer, ctx, len); if (error != READSTAT_OK) return error; return por_write_string_n(writer, ctx, string, len); } static readstat_error_t por_write_string_field(readstat_writer_t *writer, por_write_ctx_t *ctx, const char *string) { return por_write_string_field_n(writer, ctx, string, strlen(string)); } static por_write_ctx_t *por_write_ctx_init() { por_write_ctx_t *ctx = calloc(1, sizeof(por_write_ctx_t)); uint16_t max_unicode = 0; int i; for (i=0; i max_unicode) max_unicode = por_unicode_lookup[i]; } ctx->unicode2byte = malloc(max_unicode+1); ctx->unicode2byte_len = max_unicode+1; for (i=0; iunicode2byte[por_unicode_lookup[i]] = por_ascii_lookup[i]; } if (por_ascii_lookup[i]) { ctx->unicode2byte[por_ascii_lookup[i]] = por_ascii_lookup[i]; } } return ctx; } static void por_write_ctx_free(por_write_ctx_t *ctx) { if (ctx->unicode2byte) free(ctx->unicode2byte); free(ctx); } static readstat_error_t por_emit_header(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t file_label_len = strlen(writer->file_label); char vanity[5][40]; memset(vanity, '0', sizeof(vanity)); memcpy(vanity[1], "ASCII SPSS PORT FILE", 20); strncpy(vanity[1] + 20, writer->file_label, 20); if (file_label_len < 20) memset(vanity[1] + 20 + file_label_len, ' ', 20 - file_label_len); por_write_bytes(writer, vanity, sizeof(vanity)); char lookup[256]; int i; memset(lookup, '0', sizeof(lookup)); for (i=0; itimestamp); if (!timestamp) { retval = READSTAT_ERROR_BAD_TIMESTAMP_VALUE; goto cleanup; } if ((retval = por_write_tag(writer, ctx, 'A')) != READSTAT_OK) goto cleanup; char date[9]; snprintf(date, sizeof(date), "%04d%02d%02d", (unsigned int)(timestamp->tm_year + 1900) % 10000, (unsigned int)(timestamp->tm_mon + 1) % 100, (unsigned int)(timestamp->tm_mday) % 100); if ((retval = por_write_string_field(writer, ctx, date)) != READSTAT_OK) goto cleanup; char time[7]; snprintf(time, sizeof(time), "%02d%02d%02d", (unsigned int)timestamp->tm_hour % 100, (unsigned int)timestamp->tm_min % 100, (unsigned int)timestamp->tm_sec % 100); if ((retval = por_write_string_field(writer, ctx, time)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_identification_records(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if ((retval = por_write_tag(writer, ctx, '1')) != READSTAT_OK) goto cleanup; if ((retval = por_write_string_field(writer, ctx, READSTAT_PRODUCT_NAME)) != READSTAT_OK) goto cleanup; if ((retval = por_write_tag(writer, ctx, '3')) != READSTAT_OK) goto cleanup; if ((retval = por_write_string_field(writer, ctx, READSTAT_PRODUCT_URL)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_variable_count_record(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if ((retval = por_write_tag(writer, ctx, '4')) != READSTAT_OK) goto cleanup; if ((retval = por_write_double(writer, ctx, writer->variables_count)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_precision_record(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if ((retval = por_write_tag(writer, ctx, '5')) != READSTAT_OK) goto cleanup; if ((retval = por_write_double(writer, ctx, POR_BASE30_PRECISION)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_case_weight_variable_record(readstat_writer_t *writer, por_write_ctx_t *ctx) { if (!writer->fweight_variable) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; if ((retval = por_write_tag(writer, ctx, '6')) != READSTAT_OK) goto cleanup; if ((retval = por_write_string_field(writer, ctx, readstat_variable_get_name(writer->fweight_variable))) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_format(readstat_writer_t *writer, por_write_ctx_t *ctx, spss_format_t *format) { readstat_error_t error = READSTAT_OK; if ((error = por_write_double(writer, ctx, format->type)) != READSTAT_OK) goto cleanup; if ((error = por_write_double(writer, ctx, format->width)) != READSTAT_OK) goto cleanup; if ((error = por_write_double(writer, ctx, format->decimal_places)) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t validate_variable_name(const char *name) { size_t len = strlen(name); if (len < 1 || len > 8) return READSTAT_ERROR_NAME_IS_TOO_LONG; int i; for (i=0; name[i]; i++) { if (name[i] >= 'A' && name[i] <= 'Z') continue; if (name[i] >= '0' && name[i] <= '9') continue; if (name[i] == '@' || name[i] == '#' || name[i] == '$') continue; if (name[i] == '_' || name[i] == '.') continue; return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; } if (!(name[0] >= 'A' && name[0] <= 'Z') && name[0] != '@') return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER; return READSTAT_OK; } static readstat_error_t por_emit_variable_label_record(readstat_writer_t *writer, por_write_ctx_t *ctx, readstat_variable_t *r_variable) { const char *label = readstat_variable_get_label(r_variable); readstat_error_t retval = READSTAT_OK; if (!label) return READSTAT_OK; if ((retval = por_write_tag(writer, ctx, 'C')) != READSTAT_OK) goto cleanup; if ((retval = por_write_string_field(writer, ctx, label)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t por_emit_missing_string_values_records(readstat_writer_t *writer, por_write_ctx_t *ctx, readstat_variable_t *r_variable) { readstat_error_t retval = READSTAT_OK; int n_missing_values = 0; int n_missing_ranges = readstat_variable_get_missing_ranges_count(r_variable); /* ranges */ int j; for (j=0; j 3) retval = READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS; cleanup: return retval; } static readstat_error_t por_emit_missing_double_values_records(readstat_writer_t *writer, por_write_ctx_t *ctx, readstat_variable_t *r_variable) { readstat_error_t retval = READSTAT_OK; int n_missing_values = 0; int n_missing_ranges = readstat_variable_get_missing_ranges_count(r_variable); /* ranges */ int j; for (j=0; j 3) retval = READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS; cleanup: return retval; } static readstat_error_t por_emit_missing_values_records(readstat_writer_t *writer, por_write_ctx_t *ctx, readstat_variable_t *r_variable) { if (r_variable->type == READSTAT_TYPE_DOUBLE) { return por_emit_missing_double_values_records(writer, ctx, r_variable); } return por_emit_missing_string_values_records(writer, ctx, r_variable); } static readstat_error_t por_emit_variable_records(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); const char *variable_name = readstat_variable_get_name(r_variable); spss_format_t print_format; if ((retval = por_write_tag(writer, ctx, '7')) != READSTAT_OK) break; retval = por_write_double(writer, ctx, (r_variable->type == READSTAT_TYPE_STRING) ? r_variable->user_width : 0); if (retval != READSTAT_OK) break; if ((retval = por_write_string_field(writer, ctx, variable_name)) != READSTAT_OK) break; if ((retval = spss_format_for_variable(r_variable, &print_format)) != READSTAT_OK) break; if ((retval = por_emit_format(writer, ctx, &print_format)) != READSTAT_OK) break; if ((retval = por_emit_format(writer, ctx, &print_format)) != READSTAT_OK) break; if ((retval = por_emit_missing_values_records(writer, ctx, r_variable)) != READSTAT_OK) break; if ((retval = por_emit_variable_label_record(writer, ctx, r_variable)) != READSTAT_OK) break; } return retval; } static readstat_error_t por_emit_value_label_records(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i, j; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); readstat_type_t user_type = r_label_set->type; if (r_label_set->value_labels_count == 0 || r_label_set->variables_count == 0) continue; if ((retval = por_write_tag(writer, ctx, 'D')) != READSTAT_OK) goto cleanup; if ((retval = por_write_double(writer, ctx, r_label_set->variables_count)) != READSTAT_OK) goto cleanup; for (j=0; jvariables_count; j++) { readstat_variable_t *r_variable = readstat_get_label_set_variable(r_label_set, j); if ((retval = por_write_string_field(writer, ctx, readstat_variable_get_name(r_variable))) != READSTAT_OK) goto cleanup; } if ((retval = por_write_double(writer, ctx, r_label_set->value_labels_count)) != READSTAT_OK) goto cleanup; for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *r_value_label = readstat_get_value_label(r_label_set, j); if (user_type == READSTAT_TYPE_STRING) { retval = por_write_string_field_n(writer, ctx, r_value_label->string_key, r_value_label->string_key_len); } else if (user_type == READSTAT_TYPE_DOUBLE) { retval = por_write_double(writer, ctx, r_value_label->double_key); } else if (user_type == READSTAT_TYPE_INT32) { retval = por_write_double(writer, ctx, r_value_label->int32_key); } if (retval != READSTAT_OK) goto cleanup; if ((retval = por_write_string_field_n(writer, ctx, r_value_label->label, r_value_label->label_len)) != READSTAT_OK) goto cleanup; } } cleanup: return retval; } static readstat_error_t por_emit_document_record(readstat_writer_t *writer, por_write_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; if ((retval = por_write_tag(writer, ctx, 'E')) != READSTAT_OK) goto cleanup; if ((retval = por_write_double(writer, ctx, writer->notes_count)) != READSTAT_OK) goto cleanup; int i; for (i=0; inotes_count; i++) { size_t len = strlen(writer->notes[i]); if (len > SPSS_DOC_LINE_SIZE) { retval = READSTAT_ERROR_NOTE_IS_TOO_LONG; goto cleanup; } if ((retval = por_write_string_field_n(writer, ctx, writer->notes[i], len)) != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t por_emit_data_tag(readstat_writer_t *writer, por_write_ctx_t *ctx) { return por_write_tag(writer, ctx, 'F'); } static readstat_error_t por_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; por_write_ctx_t *ctx = por_write_ctx_init(); readstat_error_t retval = READSTAT_OK; if ((retval = por_emit_header(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_version_and_timestamp(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_identification_records(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_variable_count_record(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_precision_record(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_case_weight_variable_record(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_variable_records(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_value_label_records(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_document_record(writer, ctx)) != READSTAT_OK) goto cleanup; if ((retval = por_emit_data_tag(writer, ctx)) != READSTAT_OK) goto cleanup; cleanup: if (retval != READSTAT_OK) { por_write_ctx_free(ctx); } else { writer->module_ctx = ctx; } return retval; } static readstat_error_t por_end_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t error = READSTAT_OK; if ((error = por_write_tag(writer, writer->module_ctx, 'Z')) != READSTAT_OK) goto cleanup; if ((error = por_finish(writer)) != READSTAT_OK) goto cleanup; cleanup: por_write_ctx_free(writer->module_ctx); return error; } static size_t por_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { return POR_BASE30_PRECISION + 4 + user_width; } return POR_BASE30_PRECISION + 4; // minus sign + period + plus/minus + slash } static readstat_error_t por_variable_ok(const readstat_variable_t *variable) { return validate_variable_name(readstat_variable_get_name(variable)); } static readstat_error_t por_write_double_value(void *row, const readstat_variable_t *var, double value) { if (por_write_double_to_buffer(row, POR_BASE30_PRECISION + 4, value, POR_BASE30_PRECISION) == -1) { return READSTAT_ERROR_WRITE; } return READSTAT_OK; } static readstat_error_t por_write_int8_value(void *row, const readstat_variable_t *var, int8_t value) { return por_write_double_value(row, var, value); } static readstat_error_t por_write_int16_value(void *row, const readstat_variable_t *var, int16_t value) { return por_write_double_value(row, var, value); } static readstat_error_t por_write_int32_value(void *row, const readstat_variable_t *var, int32_t value) { return por_write_double_value(row, var, value); } static readstat_error_t por_write_float_value(void *row, const readstat_variable_t *var, float value) { return por_write_double_value(row, var, value); } static readstat_error_t por_write_missing_number(void *row, const readstat_variable_t *var) { return por_write_double_value(row, var, NAN); } static readstat_error_t por_write_missing_string(void *row, const readstat_variable_t *var) { return por_write_double_value(row, var, 0); } static readstat_error_t por_write_string_value(void *row, const readstat_variable_t *var, const char *string) { size_t len = strlen(string); if (len == 0) { string = " "; len = 1; } size_t storage_width = readstat_variable_get_storage_width(var); if (len > storage_width) { len = storage_width; } ssize_t bytes_written = por_write_double_to_buffer(row, POR_BASE30_PRECISION + 4, len, POR_BASE30_PRECISION); if (bytes_written == -1) { return READSTAT_ERROR_WRITE; } strncpy(((char *)row) + bytes_written, string, len); return READSTAT_OK; } static readstat_error_t por_write_row(void *writer_ctx, void *row, size_t row_len) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; char *row_chars = (char *)row; int offset = 0, output = 0; for (offset=0; offsetmodule_ctx, row_chars, output); } static readstat_error_t por_metadata_ok(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; if (writer->compression != READSTAT_COMPRESS_NONE) return READSTAT_ERROR_UNSUPPORTED_COMPRESSION; return READSTAT_OK; } readstat_error_t readstat_begin_writing_por(readstat_writer_t *writer, void *user_ctx, long row_count) { writer->callbacks.metadata_ok = &por_metadata_ok; writer->callbacks.variable_width = &por_variable_width; writer->callbacks.variable_ok = &por_variable_ok; writer->callbacks.write_int8 = &por_write_int8_value; writer->callbacks.write_int16 = &por_write_int16_value; writer->callbacks.write_int32 = &por_write_int32_value; writer->callbacks.write_float = &por_write_float_value; writer->callbacks.write_double = &por_write_double_value; writer->callbacks.write_string = &por_write_string_value; writer->callbacks.write_missing_string = &por_write_missing_string; writer->callbacks.write_missing_number = &por_write_missing_number; writer->callbacks.begin_data = &por_begin_data; writer->callbacks.write_row = &por_write_row; writer->callbacks.end_data = &por_end_data; return readstat_begin_writing_file(writer, user_ctx, row_count); } ReadStat-1.1.7/src/spss/readstat_sav.c000066400000000000000000000050701410722155500176360ustar00rootroot00000000000000// // sav.c // #include #include #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_malloc.h" #include "readstat_sav.h" #define SAV_VARINFO_INITIAL_CAPACITY 512 sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io) { sav_ctx_t *ctx = readstat_calloc(1, sizeof(sav_ctx_t)); if (ctx == NULL) { return NULL; } if (memcmp(&header->rec_type, "$FL2", 4) == 0) { ctx->format_version = 2; } else if (memcmp(&header->rec_type, "$FL3", 4) == 0) { ctx->format_version = 3; } else { sav_ctx_free(ctx); return NULL; } ctx->bswap = !(header->layout_code == 2 || header->layout_code == 3); ctx->endianness = (machine_is_little_endian() ^ ctx->bswap) ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG; if (header->compression == 1 || byteswap4(header->compression) == 1) { ctx->compression = READSTAT_COMPRESS_ROWS; } else if (header->compression == 2 || byteswap4(header->compression) == 2) { ctx->compression = READSTAT_COMPRESS_BINARY; } ctx->record_count = ctx->bswap ? byteswap4(header->ncases) : header->ncases; ctx->fweight_index = ctx->bswap ? byteswap4(header->weight_index) : header->weight_index; ctx->missing_double = SAV_MISSING_DOUBLE; ctx->lowest_double = SAV_LOWEST_DOUBLE; ctx->highest_double = SAV_HIGHEST_DOUBLE; ctx->bias = ctx->bswap ? byteswap_double(header->bias) : header->bias; ctx->varinfo_capacity = SAV_VARINFO_INITIAL_CAPACITY; if ((ctx->varinfo = readstat_calloc(ctx->varinfo_capacity, sizeof(spss_varinfo_t *))) == NULL) { sav_ctx_free(ctx); return NULL; } ctx->io = io; return ctx; } void sav_ctx_free(sav_ctx_t *ctx) { if (ctx->varinfo) { int i; for (i=0; ivar_index; i++) { spss_varinfo_free(ctx->varinfo[i]); } free(ctx->varinfo); } if (ctx->variables) { int i; for (i=0; ivar_count; i++) { if (ctx->variables[i]) free(ctx->variables[i]); } free(ctx->variables); } if (ctx->raw_string) free(ctx->raw_string); if (ctx->utf8_string) free(ctx->utf8_string); if (ctx->converter) iconv_close(ctx->converter); if (ctx->variable_display_values) { free(ctx->variable_display_values); } free(ctx); } ReadStat-1.1.7/src/spss/readstat_sav.h000066400000000000000000000077121410722155500176500ustar00rootroot00000000000000// // readstat_sav.h // #include "readstat_spss.h" #pragma pack(push, 1) // SAV files typedef struct sav_file_header_record_s { char rec_type[4]; char prod_name[60]; int32_t layout_code; int32_t nominal_case_size; int32_t compression; int32_t weight_index; int32_t ncases; double bias; /* TODO is this portable? */ char creation_date[9]; char creation_time[8]; char file_label[64]; char padding[3]; } sav_file_header_record_t; typedef struct sav_variable_record_s { int32_t type; int32_t has_var_label; int32_t n_missing_values; int32_t print; int32_t write; char name[8]; } sav_variable_record_t; typedef struct sav_info_record_header_s { int32_t rec_type; int32_t subtype; int32_t size; int32_t count; } sav_info_record_t; typedef struct sav_machine_integer_info_record_s { int32_t version_major; int32_t version_minor; int32_t version_revision; int32_t machine_code; int32_t floating_point_rep; int32_t compression_code; int32_t endianness; int32_t character_code; } sav_machine_integer_info_record_t; typedef struct sav_machine_floating_point_info_record_s { uint64_t sysmis; uint64_t highest; uint64_t lowest; } sav_machine_floating_point_info_record_t; typedef struct sav_dictionary_termination_record_s { int32_t rec_type; int32_t filler; } sav_dictionary_termination_record_t; #pragma pack(pop) typedef struct sav_ctx_s { readstat_callbacks_t handle; size_t file_size; readstat_io_t *io; void *user_ctx; spss_varinfo_t **varinfo; size_t varinfo_capacity; readstat_variable_t **variables; const char *input_encoding; const char *output_encoding; char file_label[4*64+1]; time_t timestamp; uint32_t *variable_display_values; size_t variable_display_values_count; iconv_t converter; int var_index; int var_offset; int var_count; int record_count; int row_limit; int row_offset; int current_row; int value_labels_count; int fweight_index; char *raw_string; size_t raw_string_len; char *utf8_string; size_t utf8_string_len; uint64_t missing_double; uint64_t lowest_double; uint64_t highest_double; double bias; int format_version; readstat_compress_t compression; readstat_endian_t endianness; unsigned int bswap:1; } sav_ctx_t; #define SAV_RECORD_TYPE_VARIABLE 2 #define SAV_RECORD_TYPE_VALUE_LABEL 3 #define SAV_RECORD_TYPE_VALUE_LABEL_VARIABLES 4 #define SAV_RECORD_TYPE_DOCUMENT 6 #define SAV_RECORD_TYPE_HAS_DATA 7 #define SAV_RECORD_TYPE_DICT_TERMINATION 999 #define SAV_RECORD_SUBTYPE_INTEGER_INFO 3 #define SAV_RECORD_SUBTYPE_FP_INFO 4 #define SAV_RECORD_SUBTYPE_PRODUCT_INFO 10 #define SAV_RECORD_SUBTYPE_VAR_DISPLAY 11 #define SAV_RECORD_SUBTYPE_LONG_VAR_NAME 13 #define SAV_RECORD_SUBTYPE_VERY_LONG_STR 14 #define SAV_RECORD_SUBTYPE_NUMBER_OF_CASES 16 #define SAV_RECORD_SUBTYPE_DATA_FILE_ATTRS 17 #define SAV_RECORD_SUBTYPE_VARIABLE_ATTRS 18 #define SAV_RECORD_SUBTYPE_CHAR_ENCODING 20 #define SAV_RECORD_SUBTYPE_LONG_STRING_VALUE_LABELS 21 #define SAV_RECORD_SUBTYPE_LONG_STRING_MISSING_VALUES 22 #define SAV_FLOATING_POINT_REP_IEEE 1 #define SAV_FLOATING_POINT_REP_IBM 2 #define SAV_FLOATING_POINT_REP_VAX 3 #define SAV_ENDIANNESS_BIG 1 #define SAV_ENDIANNESS_LITTLE 2 #define SAV_EIGHT_SPACES " " sav_ctx_t *sav_ctx_init(sav_file_header_record_t *header, readstat_io_t *io); void sav_ctx_free(sav_ctx_t *ctx); ReadStat-1.1.7/src/spss/readstat_sav_compress.c000066400000000000000000000113231410722155500215470ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "readstat_sav.h" #include "readstat_sav_compress.h" size_t sav_compressed_row_bound(size_t uncompressed_length) { return uncompressed_length + (uncompressed_length/8 + 8)/8*8; } size_t sav_compress_row(void *output_row, void *input_row, size_t input_len, readstat_writer_t *writer) { unsigned char *output = output_row; unsigned char *input = input_row; off_t input_offset = 0; off_t output_offset = 8; off_t control_offset = 0; int i; memset(&output[control_offset], 0, 8); for (i=0; ivariables_count; i++) { readstat_variable_t *variable = readstat_get_variable(writer, i); if (variable->type == READSTAT_TYPE_STRING) { size_t width = variable->storage_width; while (width > 0) { if (memcmp(&input[input_offset], SAV_EIGHT_SPACES, 8) == 0) { output[control_offset++] = 254; } else { output[control_offset++] = 253; memcpy(&output[output_offset], &input[input_offset], 8); output_offset += 8; } if (control_offset % 8 == 0) { control_offset = output_offset; memset(&output[control_offset], 0, 8); output_offset += 8; } input_offset += 8; width -= 8; } } else { uint64_t int_value; memcpy(&int_value, &input[input_offset], 8); if (int_value == SAV_MISSING_DOUBLE) { output[control_offset++] = 255; } else { double fp_value; memcpy(&fp_value, &input[input_offset], 8); if (fp_value > -100 && fp_value < 152 && (int)fp_value == fp_value) { output[control_offset++] = (int)fp_value + 100; } else { output[control_offset++] = 253; memcpy(&output[output_offset], &input[input_offset], 8); output_offset += 8; } } if (control_offset % 8 == 0) { control_offset = output_offset; memset(&output[control_offset], 0, 8); output_offset += 8; } input_offset += 8; } } if (writer->current_row + 1 == writer->row_count) output[control_offset] = 252; return output_offset; } void sav_decompress_row(struct sav_row_stream_s *state) { double fp_value; uint64_t missing_value = state->bswap ? byteswap8(state->missing_value) : state->missing_value; int i = 8 - state->i; while (1) { if (i == 8) { if (state->avail_in < 8) { state->status = SAV_ROW_STREAM_NEED_DATA; goto done; } memcpy(state->chunk, state->next_in, 8); state->next_in += 8; state->avail_in -= 8; i = 0; } while (i<8) { switch (state->chunk[i]) { case 0: break; case 252: state->status = SAV_ROW_STREAM_FINISHED_ALL; goto done; case 253: if (state->avail_in < 8) { state->status = SAV_ROW_STREAM_NEED_DATA; goto done; } memcpy(state->next_out, state->next_in, 8); state->next_out += 8; state->avail_out -= 8; state->next_in += 8; state->avail_in -= 8; break; case 254: memset(state->next_out, ' ', 8); state->next_out += 8; state->avail_out -= 8; break; case 255: memcpy(state->next_out, &missing_value, sizeof(uint64_t)); state->next_out += 8; state->avail_out -= 8; break; default: fp_value = state->chunk[i] - state->bias; fp_value = state->bswap ? byteswap_double(fp_value) : fp_value; memcpy(state->next_out, &fp_value, sizeof(double)); state->next_out += 8; state->avail_out -= 8; break; } i++; if (state->avail_out < 8) { state->status = SAV_ROW_STREAM_FINISHED_ROW; goto done; } } } done: state->i = 8 - i; } ReadStat-1.1.7/src/spss/readstat_sav_compress.h000066400000000000000000000014101410722155500215500ustar00rootroot00000000000000enum sav_row_stream_status { SAV_ROW_STREAM_NEED_DATA, SAV_ROW_STREAM_HAVE_DATA, SAV_ROW_STREAM_FINISHED_ROW, SAV_ROW_STREAM_FINISHED_ALL }; struct sav_row_stream_s { const unsigned char *next_in; size_t avail_in; unsigned char *next_out; size_t avail_out; uint64_t missing_value; double bias; unsigned char chunk[8]; int i; int bswap; enum sav_row_stream_status status; }; size_t sav_compressed_row_bound(size_t uncompressed_length); size_t sav_compress_row(void *output_row, void *input_row, size_t input_len, readstat_writer_t *writer); void sav_decompress_row(struct sav_row_stream_s *state); ReadStat-1.1.7/src/spss/readstat_sav_parse.c000066400000000000000000000532301410722155500210310ustar00rootroot00000000000000#line 1 "src/spss/readstat_sav_parse.rl" #include #include #include "../readstat.h" #include "../readstat_malloc.h" #include "../readstat_strings.h" #include "readstat_sav.h" #include "readstat_sav_parse.h" #line 21 "src/spss/readstat_sav_parse.rl" typedef struct varlookup { char name[8*4+1]; int index; } varlookup_t; static int compare_key_varlookup(const void *elem1, const void *elem2) { const char *key = (const char *)elem1; const varlookup_t *v = (const varlookup_t *)elem2; return strcasecmp(key, v->name); } static int compare_varlookups(const void *elem1, const void *elem2) { const varlookup_t *v1 = (const varlookup_t *)elem1; const varlookup_t *v2 = (const varlookup_t *)elem2; return strcasecmp(v1->name, v2->name); } static int count_vars(sav_ctx_t *ctx) { int i; spss_varinfo_t *last_info = NULL; int var_count = 0; for (i=0; ivar_index; i++) { spss_varinfo_t *info = ctx->varinfo[i]; if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { var_count++; } last_info = info; } return var_count; } static varlookup_t *build_lookup_table(int var_count, sav_ctx_t *ctx) { varlookup_t *table = readstat_malloc(var_count * sizeof(varlookup_t)); int offset = 0; int i; spss_varinfo_t *last_info = NULL; for (i=0; ivar_index; i++) { spss_varinfo_t *info = ctx->varinfo[i]; if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { varlookup_t *entry = &table[offset++]; memcpy(entry->name, info->name, sizeof(info->name)); entry->index = info->index; } last_info = info; } qsort(table, var_count, sizeof(varlookup_t), &compare_varlookups); return table; } #line 68 "src/spss/readstat_sav_parse.c" static const signed char _sav_long_variable_parse_actions[] = { 0, 1, 1, 1, 5, 2, 2, 0, 3, 6, 4, 3, 0 }; static const short _sav_long_variable_parse_key_offsets[] = { 0, 0, 5, 19, 33, 47, 61, 75, 89, 103, 104, 108, 113, 118, 123, 128, 133, 138, 143, 148, 153, 158, 163, 168, 173, 178, 183, 188, 193, 198, 203, 208, 213, 218, 223, 228, 233, 238, 243, 248, 253, 258, 263, 268, 273, 278, 283, 288, 293, 298, 303, 308, 313, 318, 323, 328, 333, 338, 343, 348, 353, 358, 363, 368, 373, 378, 383, 388, 393, 398, 403, 408, 413, 418, 423, 428, 0 }; static const unsigned char _sav_long_variable_parse_trans_keys[] = { 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 255u, 0u, 63u, 91u, 127u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 127u, 255u, 0u, 31u, 9u, 0u }; static const signed char _sav_long_variable_parse_single_lengths[] = { 0, 1, 4, 4, 4, 4, 4, 4, 4, 1, 2, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 0 }; static const signed char _sav_long_variable_parse_range_lengths[] = { 0, 2, 5, 5, 5, 5, 5, 5, 5, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0 }; static const short _sav_long_variable_parse_index_offsets[] = { 0, 0, 4, 14, 24, 34, 44, 54, 64, 74, 76, 80, 85, 89, 94, 99, 104, 109, 114, 119, 124, 129, 134, 139, 144, 149, 154, 159, 164, 169, 174, 179, 184, 189, 194, 199, 204, 209, 214, 219, 224, 229, 234, 239, 244, 249, 254, 259, 264, 269, 274, 279, 284, 289, 294, 299, 304, 309, 314, 319, 324, 329, 334, 339, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 394, 399, 0 }; static const signed char _sav_long_variable_parse_cond_targs[] = { 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 0, 0, 0, 3, 0, 10, 0, 0, 0, 0, 0, 0, 0, 4, 0, 10, 0, 0, 0, 0, 0, 0, 0, 5, 0, 10, 0, 0, 0, 0, 0, 0, 0, 6, 0, 10, 0, 0, 0, 0, 0, 0, 0, 7, 0, 10, 0, 0, 0, 0, 0, 0, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 0, 0, 0, 11, 12, 0, 0, 0, 13, 0, 0, 0, 2, 12, 0, 0, 0, 14, 12, 0, 0, 0, 15, 12, 0, 0, 0, 16, 12, 0, 0, 0, 17, 12, 0, 0, 0, 18, 12, 0, 0, 0, 19, 12, 0, 0, 0, 20, 12, 0, 0, 0, 21, 12, 0, 0, 0, 22, 12, 0, 0, 0, 23, 12, 0, 0, 0, 24, 12, 0, 0, 0, 25, 12, 0, 0, 0, 26, 12, 0, 0, 0, 27, 12, 0, 0, 0, 28, 12, 0, 0, 0, 29, 12, 0, 0, 0, 30, 12, 0, 0, 0, 31, 12, 0, 0, 0, 32, 12, 0, 0, 0, 33, 12, 0, 0, 0, 34, 12, 0, 0, 0, 35, 12, 0, 0, 0, 36, 12, 0, 0, 0, 37, 12, 0, 0, 0, 38, 12, 0, 0, 0, 39, 12, 0, 0, 0, 40, 12, 0, 0, 0, 41, 12, 0, 0, 0, 42, 12, 0, 0, 0, 43, 12, 0, 0, 0, 44, 12, 0, 0, 0, 45, 12, 0, 0, 0, 46, 12, 0, 0, 0, 47, 12, 0, 0, 0, 48, 12, 0, 0, 0, 49, 12, 0, 0, 0, 50, 12, 0, 0, 0, 51, 12, 0, 0, 0, 52, 12, 0, 0, 0, 53, 12, 0, 0, 0, 54, 12, 0, 0, 0, 55, 12, 0, 0, 0, 56, 12, 0, 0, 0, 57, 12, 0, 0, 0, 58, 12, 0, 0, 0, 59, 12, 0, 0, 0, 60, 12, 0, 0, 0, 61, 12, 0, 0, 0, 62, 12, 0, 0, 0, 63, 12, 0, 0, 0, 64, 12, 0, 0, 0, 65, 12, 0, 0, 0, 66, 12, 0, 0, 0, 67, 12, 0, 0, 0, 68, 12, 0, 0, 0, 69, 12, 0, 0, 0, 70, 12, 0, 0, 0, 71, 12, 0, 0, 0, 72, 12, 0, 0, 0, 73, 12, 0, 0, 0, 74, 12, 0, 0, 0, 75, 12, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0 }; static const signed char _sav_long_variable_parse_cond_actions[] = { 0, 0, 0, 1, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 3, 8, 0, 0, 0, 0, 0, 0, 0, 1, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 0 }; static const short _sav_long_variable_parse_eof_trans[] = { 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 0 }; static const int sav_long_variable_parse_start = 1; static const int sav_long_variable_parse_en_main = 1; #line 79 "src/spss/readstat_sav_parse.rl" readstat_error_t sav_parse_long_variable_names_record(void *data, int count, sav_ctx_t *ctx) { unsigned char *c_data = (unsigned char *)data; int var_count = count_vars(ctx); readstat_error_t retval = READSTAT_OK; char temp_key[8+1]; char temp_val[64+1]; unsigned char *str_start = NULL; size_t str_len = 0; char error_buf[8192]; unsigned char *p = c_data; unsigned char *pe = c_data + count; varlookup_t *table = build_lookup_table(var_count, ctx); unsigned char *eof = pe; int cs; #line 351 "src/spss/readstat_sav_parse.c" { cs = (int)sav_long_variable_parse_start; } #line 356 "src/spss/readstat_sav_parse.c" { int _klen; unsigned int _trans = 0; const unsigned char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _sav_long_variable_parse_eof_trans[cs] > 0 ) { _trans = (unsigned int)_sav_long_variable_parse_eof_trans[cs] - 1; } } else { _keys = ( _sav_long_variable_parse_trans_keys + (_sav_long_variable_parse_key_offsets[cs])); _trans = (unsigned int)_sav_long_variable_parse_index_offsets[cs]; _klen = (int)_sav_long_variable_parse_single_lengths[cs]; if ( _klen > 0 ) { const unsigned char *_lower = _keys; const unsigned char *_upper = _keys + _klen - 1; const unsigned char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_sav_long_variable_parse_range_lengths[cs]; if ( _klen > 0 ) { const unsigned char *_lower = _keys; const unsigned char *_upper = _keys + (_klen<<1) - 2; const unsigned char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_sav_long_variable_parse_cond_targs[_trans]; if ( _sav_long_variable_parse_cond_actions[_trans] != 0 ) { _acts = ( _sav_long_variable_parse_actions + (_sav_long_variable_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 13 "src/spss/readstat_sav_parse.rl" memcpy(temp_key, str_start, str_len); temp_key[str_len] = '\0'; } #line 442 "src/spss/readstat_sav_parse.c" break; } case 1: { { #line 20 "src/spss/readstat_sav_parse.rl" str_start = p; } #line 451 "src/spss/readstat_sav_parse.c" break; } case 2: { { #line 20 "src/spss/readstat_sav_parse.rl" str_len = p - str_start; } #line 460 "src/spss/readstat_sav_parse.c" break; } case 3: { { #line 102 "src/spss/readstat_sav_parse.rl" varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); if (found) { spss_varinfo_t *info = ctx->varinfo[found->index]; memcpy(info->longname, temp_val, str_len); info->longname[str_len] = '\0'; } else if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); ctx->handle.error(error_buf, ctx->user_ctx); } } #line 479 "src/spss/readstat_sav_parse.c" break; } case 4: { { #line 114 "src/spss/readstat_sav_parse.rl" memcpy(temp_val, str_start, str_len); temp_val[str_len] = '\0'; } #line 491 "src/spss/readstat_sav_parse.c" break; } case 5: { { #line 119 "src/spss/readstat_sav_parse.rl" str_start = p; } #line 500 "src/spss/readstat_sav_parse.c" break; } case 6: { { #line 119 "src/spss/readstat_sav_parse.rl" str_len = p - str_start; } #line 509 "src/spss/readstat_sav_parse.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 11 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 127 "src/spss/readstat_sav_parse.rl" if (cs < #line 537 "src/spss/readstat_sav_parse.c" 11 #line 129 "src/spss/readstat_sav_parse.rl" || p != pe) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error parsing string \"%.*s\" around byte #%ld/%d, character %c", count, (char *)data, (long)(p - c_data), count, *p); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_PARSE; } if (table) free(table); /* suppress warning */ (void)sav_long_variable_parse_en_main; return retval; } #line 560 "src/spss/readstat_sav_parse.c" static const signed char _sav_very_long_string_parse_actions[] = { 0, 1, 1, 1, 3, 1, 4, 2, 2, 0, 2, 5, 4, 0 }; static const signed char _sav_very_long_string_parse_key_offsets[] = { 0, 0, 5, 19, 33, 47, 61, 75, 89, 103, 104, 106, 109, 111, 0 }; static const unsigned char _sav_very_long_string_parse_trans_keys[] = { 255u, 0u, 63u, 91u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 47u, 61u, 96u, 255u, 0u, 34u, 37u, 45u, 58u, 63u, 91u, 94u, 123u, 127u, 61u, 48u, 57u, 0u, 48u, 57u, 0u, 9u, 255u, 0u, 63u, 91u, 127u, 0u }; static const signed char _sav_very_long_string_parse_single_lengths[] = { 0, 1, 4, 4, 4, 4, 4, 4, 4, 1, 0, 1, 2, 1, 0 }; static const signed char _sav_very_long_string_parse_range_lengths[] = { 0, 2, 5, 5, 5, 5, 5, 5, 5, 0, 1, 1, 0, 2, 0 }; static const signed char _sav_very_long_string_parse_index_offsets[] = { 0, 0, 4, 14, 24, 34, 44, 54, 64, 74, 76, 78, 81, 84, 0 }; static const signed char _sav_very_long_string_parse_cond_targs[] = { 0, 0, 0, 2, 0, 10, 0, 0, 0, 0, 0, 0, 0, 3, 0, 10, 0, 0, 0, 0, 0, 0, 0, 4, 0, 10, 0, 0, 0, 0, 0, 0, 0, 5, 0, 10, 0, 0, 0, 0, 0, 0, 0, 6, 0, 10, 0, 0, 0, 0, 0, 0, 0, 7, 0, 10, 0, 0, 0, 0, 0, 0, 0, 8, 0, 10, 0, 0, 0, 0, 0, 0, 0, 9, 10, 0, 11, 0, 12, 11, 0, 12, 13, 0, 0, 0, 0, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0 }; static const signed char _sav_very_long_string_parse_cond_actions[] = { 0, 0, 0, 1, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 10, 0, 3, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const int sav_very_long_string_parse_start = 1; static const int sav_very_long_string_parse_en_main = 1; #line 153 "src/spss/readstat_sav_parse.rl" readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ctx_t *ctx) { unsigned char *c_data = (unsigned char *)data; int var_count = count_vars(ctx); readstat_error_t retval = READSTAT_OK; char temp_key[8*4+1]; unsigned int temp_val = 0; unsigned char *str_start = NULL; size_t str_len = 0; size_t error_buf_len = 1024 + count; char *error_buf = NULL; unsigned char *p = c_data; unsigned char *pe = c_data + count; varlookup_t *table = NULL; int cs; error_buf = readstat_malloc(error_buf_len); table = build_lookup_table(var_count, ctx); #line 666 "src/spss/readstat_sav_parse.c" { cs = (int)sav_very_long_string_parse_start; } #line 671 "src/spss/readstat_sav_parse.c" { int _klen; unsigned int _trans = 0; const unsigned char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe ) goto _out; _keys = ( _sav_very_long_string_parse_trans_keys + (_sav_very_long_string_parse_key_offsets[cs])); _trans = (unsigned int)_sav_very_long_string_parse_index_offsets[cs]; _klen = (int)_sav_very_long_string_parse_single_lengths[cs]; if ( _klen > 0 ) { const unsigned char *_lower = _keys; const unsigned char *_upper = _keys + _klen - 1; const unsigned char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_sav_very_long_string_parse_range_lengths[cs]; if ( _klen > 0 ) { const unsigned char *_lower = _keys; const unsigned char *_upper = _keys + (_klen<<1) - 2; const unsigned char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} cs = (int)_sav_very_long_string_parse_cond_targs[_trans]; if ( _sav_very_long_string_parse_cond_actions[_trans] != 0 ) { _acts = ( _sav_very_long_string_parse_actions + (_sav_very_long_string_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 13 "src/spss/readstat_sav_parse.rl" memcpy(temp_key, str_start, str_len); temp_key[str_len] = '\0'; } #line 750 "src/spss/readstat_sav_parse.c" break; } case 1: { { #line 20 "src/spss/readstat_sav_parse.rl" str_start = p; } #line 759 "src/spss/readstat_sav_parse.c" break; } case 2: { { #line 20 "src/spss/readstat_sav_parse.rl" str_len = p - str_start; } #line 768 "src/spss/readstat_sav_parse.c" break; } case 3: { { #line 177 "src/spss/readstat_sav_parse.rl" varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); if (found) { ctx->varinfo[found->index]->string_length = temp_val; ctx->varinfo[found->index]->write_format.width = temp_val; ctx->varinfo[found->index]->print_format.width = temp_val; } } #line 784 "src/spss/readstat_sav_parse.c" break; } case 4: { { #line 186 "src/spss/readstat_sav_parse.rl" if ((( (*( p)))) != '\0') { unsigned char digit = (( (*( p)))) - '0'; if (temp_val <= (UINT_MAX - digit) / 10) { temp_val = 10 * temp_val + digit; } else { {p += 1; goto _out; } } } } #line 802 "src/spss/readstat_sav_parse.c" break; } case 5: { { #line 197 "src/spss/readstat_sav_parse.rl" temp_val = 0; } #line 811 "src/spss/readstat_sav_parse.c" break; } } _nacts -= 1; _acts += 1; } } if ( cs != 0 ) { p += 1; goto _resume; } _out: {} } #line 205 "src/spss/readstat_sav_parse.rl" if (cs < #line 833 "src/spss/readstat_sav_parse.c" 12 #line 207 "src/spss/readstat_sav_parse.rl" || p != pe) { if (ctx->handle.error) { snprintf(error_buf, error_buf_len, "Parsed %ld of %ld bytes. Remaining bytes: %.*s", (long)(p - c_data), (long)(pe - c_data), (int)(pe - p), p); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_PARSE; } if (table) free(table); if (error_buf) free(error_buf); /* suppress warning */ (void)sav_very_long_string_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_sav_parse.h000066400000000000000000000003211410722155500210270ustar00rootroot00000000000000// // sav_parse.h // readstat_error_t sav_parse_long_variable_names_record(void *data, int count, sav_ctx_t *ctx); readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ctx_t *ctx); ReadStat-1.1.7/src/spss/readstat_sav_parse.rl000066400000000000000000000147461410722155500212350ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../readstat_malloc.h" #include "../readstat_strings.h" #include "readstat_sav.h" #include "readstat_sav_parse.h" %%{ machine key_defs; action copy_key { memcpy(temp_key, str_start, str_len); temp_key[str_len] = '\0'; } non_ascii_byte = (0x80 .. 0xFE); # multi-byte sequence might be incomplete key = ( ( non_ascii_byte | [A-Z@] ) ( non_ascii_byte | [A-Za-z0-9@#$_\.] ){0,7} ) >{ str_start = fpc; } %{ str_len = fpc - str_start; }; }%% typedef struct varlookup { char name[8*4+1]; int index; } varlookup_t; static int compare_key_varlookup(const void *elem1, const void *elem2) { const char *key = (const char *)elem1; const varlookup_t *v = (const varlookup_t *)elem2; return strcasecmp(key, v->name); } static int compare_varlookups(const void *elem1, const void *elem2) { const varlookup_t *v1 = (const varlookup_t *)elem1; const varlookup_t *v2 = (const varlookup_t *)elem2; return strcasecmp(v1->name, v2->name); } static int count_vars(sav_ctx_t *ctx) { int i; spss_varinfo_t *last_info = NULL; int var_count = 0; for (i=0; ivar_index; i++) { spss_varinfo_t *info = ctx->varinfo[i]; if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { var_count++; } last_info = info; } return var_count; } static varlookup_t *build_lookup_table(int var_count, sav_ctx_t *ctx) { varlookup_t *table = readstat_malloc(var_count * sizeof(varlookup_t)); int offset = 0; int i; spss_varinfo_t *last_info = NULL; for (i=0; ivar_index; i++) { spss_varinfo_t *info = ctx->varinfo[i]; if (last_info == NULL || strcmp(info->name, last_info->name) != 0) { varlookup_t *entry = &table[offset++]; memcpy(entry->name, info->name, sizeof(info->name)); entry->index = info->index; } last_info = info; } qsort(table, var_count, sizeof(varlookup_t), &compare_varlookups); return table; } %%{ machine sav_long_variable_parse; include key_defs; write data nofinal noerror; alphtype unsigned char; }%% readstat_error_t sav_parse_long_variable_names_record(void *data, int count, sav_ctx_t *ctx) { unsigned char *c_data = (unsigned char *)data; int var_count = count_vars(ctx); readstat_error_t retval = READSTAT_OK; char temp_key[8+1]; char temp_val[64+1]; unsigned char *str_start = NULL; size_t str_len = 0; char error_buf[8192]; unsigned char *p = c_data; unsigned char *pe = c_data + count; varlookup_t *table = build_lookup_table(var_count, ctx); unsigned char *eof = pe; int cs; %%{ action set_long_name { varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); if (found) { spss_varinfo_t *info = ctx->varinfo[found->index]; memcpy(info->longname, temp_val, str_len); info->longname[str_len] = '\0'; } else if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Failed to find %s", temp_key); ctx->handle.error(error_buf, ctx->user_ctx); } } action copy_value { memcpy(temp_val, str_start, str_len); temp_val[str_len] = '\0'; } value = ( non_ascii_byte | print ){1,64} >{ str_start = fpc; } %{ str_len = fpc - str_start; }; keyval = ( key %copy_key "=" value %copy_value ) %set_long_name; main := keyval ("\t" keyval)* "\t"?; write init; write exec; }%% if (cs < %%{ write first_final; }%%|| p != pe) { if (ctx->handle.error) { snprintf(error_buf, sizeof(error_buf), "Error parsing string \"%.*s\" around byte #%ld/%d, character %c", count, (char *)data, (long)(p - c_data), count, *p); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_PARSE; } if (table) free(table); /* suppress warning */ (void)sav_long_variable_parse_en_main; return retval; } %%{ machine sav_very_long_string_parse; include key_defs; write data nofinal noerror; alphtype unsigned char; }%% readstat_error_t sav_parse_very_long_string_record(void *data, int count, sav_ctx_t *ctx) { unsigned char *c_data = (unsigned char *)data; int var_count = count_vars(ctx); readstat_error_t retval = READSTAT_OK; char temp_key[8*4+1]; unsigned int temp_val = 0; unsigned char *str_start = NULL; size_t str_len = 0; size_t error_buf_len = 1024 + count; char *error_buf = NULL; unsigned char *p = c_data; unsigned char *pe = c_data + count; varlookup_t *table = NULL; int cs; error_buf = readstat_malloc(error_buf_len); table = build_lookup_table(var_count, ctx); %%{ action set_width { varlookup_t *found = bsearch(temp_key, table, var_count, sizeof(varlookup_t), &compare_key_varlookup); if (found) { ctx->varinfo[found->index]->string_length = temp_val; ctx->varinfo[found->index]->write_format.width = temp_val; ctx->varinfo[found->index]->print_format.width = temp_val; } } action incr_val { if (fc != '\0') { unsigned char digit = fc - '0'; if (temp_val <= (UINT_MAX - digit) / 10) { temp_val = 10 * temp_val + digit; } else { fbreak; } } } value = [0-9]+ >{ temp_val = 0; } $incr_val; keyval = ( key %copy_key "=" value ) %set_width; main := keyval ("\0"+ "\t" keyval)* "\0"+ "\t"?; write init; write exec; }%% if (cs < %%{ write first_final; }%% || p != pe) { if (ctx->handle.error) { snprintf(error_buf, error_buf_len, "Parsed %ld of %ld bytes. Remaining bytes: %.*s", (long)(p - c_data), (long)(pe - c_data), (int)(pe - p), p); ctx->handle.error(error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_PARSE; } if (table) free(table); if (error_buf) free(error_buf); /* suppress warning */ (void)sav_very_long_string_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_sav_parse_timestamp.c000066400000000000000000000365551410722155500231270ustar00rootroot00000000000000#line 1 "src/spss/readstat_sav_parse_timestamp.rl" #include #include "../readstat.h" #include "../readstat_iconv.h" #include "readstat_sav.h" #include "readstat_sav_parse_timestamp.h" #line 12 "src/spss/readstat_sav_parse_timestamp.c" static const signed char _sav_time_parse_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 2, 1, 0, 0 }; static const signed char _sav_time_parse_key_offsets[] = { 0, 0, 3, 5, 6, 9, 11, 12, 15, 17, 19, 21, 23, 0 }; static const char _sav_time_parse_trans_keys[] = { 32, 48, 57, 48, 57, 58, 32, 48, 57, 48, 57, 58, 32, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 0 }; static const signed char _sav_time_parse_single_lengths[] = { 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0 }; static const signed char _sav_time_parse_range_lengths[] = { 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0 }; static const signed char _sav_time_parse_index_offsets[] = { 0, 0, 3, 5, 7, 10, 12, 14, 17, 19, 21, 23, 25, 0 }; static const signed char _sav_time_parse_cond_targs[] = { 2, 11, 0, 3, 0, 4, 0, 5, 10, 0, 6, 0, 7, 0, 8, 9, 0, 12, 0, 12, 0, 6, 0, 3, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0 }; static const signed char _sav_time_parse_cond_actions[] = { 0, 3, 0, 11, 0, 5, 0, 0, 3, 0, 11, 0, 7, 0, 0, 3, 0, 11, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0 }; static const signed char _sav_time_parse_eof_trans[] = { 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 0 }; static const int sav_time_parse_start = 1; static const int sav_time_parse_en_main = 1; #line 12 "src/spss/readstat_sav_parse_timestamp.rl" readstat_error_t sav_parse_time(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx) { readstat_error_t retval = READSTAT_OK; char error_buf[8192]; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; int temp_val = 0; #line 83 "src/spss/readstat_sav_parse_timestamp.c" { cs = (int)sav_time_parse_start; } #line 88 "src/spss/readstat_sav_parse_timestamp.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _sav_time_parse_eof_trans[cs] > 0 ) { _trans = (unsigned int)_sav_time_parse_eof_trans[cs] - 1; } } else { _keys = ( _sav_time_parse_trans_keys + (_sav_time_parse_key_offsets[cs])); _trans = (unsigned int)_sav_time_parse_index_offsets[cs]; _klen = (int)_sav_time_parse_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_sav_time_parse_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_sav_time_parse_cond_targs[_trans]; if ( _sav_time_parse_cond_actions[_trans] != 0 ) { _acts = ( _sav_time_parse_actions + (_sav_time_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 24 "src/spss/readstat_sav_parse_timestamp.rl" temp_val = 10 * temp_val + ((( (*( p)))) - '0'); } #line 173 "src/spss/readstat_sav_parse_timestamp.c" break; } case 1: { { #line 28 "src/spss/readstat_sav_parse_timestamp.rl" temp_val = 0; } #line 182 "src/spss/readstat_sav_parse_timestamp.c" break; } case 2: { { #line 28 "src/spss/readstat_sav_parse_timestamp.rl" temp_val = (( (*( p)))) - '0'; } #line 191 "src/spss/readstat_sav_parse_timestamp.c" break; } case 3: { { #line 30 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_hour = temp_val; } #line 200 "src/spss/readstat_sav_parse_timestamp.c" break; } case 4: { { #line 32 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_min = temp_val; } #line 209 "src/spss/readstat_sav_parse_timestamp.c" break; } case 5: { { #line 34 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_sec = temp_val; } #line 218 "src/spss/readstat_sav_parse_timestamp.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 12 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 40 "src/spss/readstat_sav_parse_timestamp.rl" if (cs < #line 246 "src/spss/readstat_sav_parse_timestamp.c" 12 #line 42 "src/spss/readstat_sav_parse_timestamp.rl" || p != pe) { if (error_cb) { snprintf(error_buf, sizeof(error_buf), "Invalid time string (length=%d): %.*s", (int)len, (int)len, data); error_cb(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)sav_time_parse_en_main; return retval; } #line 264 "src/spss/readstat_sav_parse_timestamp.c" static const signed char _sav_date_parse_actions[] = { 0, 1, 0, 1, 1, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 14, 1, 15, 2, 2, 0, 0 }; static const signed char _sav_date_parse_key_offsets[] = { 0, 0, 3, 6, 8, 16, 20, 21, 23, 26, 29, 30, 32, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, 50, 51, 53, 55, 57, 59, 60, 62, 64, 66, 68, 70, 72, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 89, 90, 0 }; static const char _sav_date_parse_trans_keys[] = { 32, 48, 57, 32, 48, 57, 32, 45, 65, 68, 70, 74, 77, 78, 79, 83, 80, 85, 112, 117, 82, 32, 45, 32, 48, 57, 32, 48, 57, 71, 32, 45, 114, 103, 69, 101, 67, 32, 45, 99, 69, 101, 66, 32, 45, 98, 65, 85, 97, 117, 78, 32, 45, 76, 78, 32, 45, 32, 45, 110, 108, 110, 65, 97, 82, 89, 32, 45, 32, 45, 114, 121, 79, 111, 86, 32, 45, 118, 67, 99, 84, 32, 45, 116, 69, 101, 80, 32, 45, 112, 0 }; static const signed char _sav_date_parse_single_lengths[] = { 0, 1, 1, 2, 8, 4, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 1, 4, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 0, 0 }; static const signed char _sav_date_parse_range_lengths[] = { 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const short _sav_date_parse_index_offsets[] = { 0, 0, 3, 6, 9, 18, 23, 25, 28, 31, 34, 36, 39, 41, 43, 46, 48, 51, 53, 56, 58, 61, 63, 68, 70, 73, 76, 79, 82, 84, 87, 90, 93, 96, 99, 102, 105, 107, 110, 112, 115, 117, 120, 122, 125, 127, 130, 132, 0 }; static const signed char _sav_date_parse_cond_targs[] = { 2, 2, 0, 3, 3, 0, 4, 4, 0, 5, 14, 18, 22, 30, 35, 39, 43, 0, 6, 10, 12, 13, 0, 7, 0, 8, 8, 0, 9, 9, 0, 47, 47, 0, 11, 0, 8, 8, 0, 7, 0, 11, 0, 15, 17, 0, 16, 0, 8, 8, 0, 16, 0, 19, 21, 0, 20, 0, 8, 8, 0, 20, 0, 23, 25, 28, 29, 0, 24, 0, 8, 8, 0, 26, 27, 0, 8, 8, 0, 8, 8, 0, 24, 0, 26, 27, 0, 31, 34, 0, 32, 33, 0, 8, 8, 0, 8, 8, 0, 32, 33, 0, 36, 38, 0, 37, 0, 8, 8, 0, 37, 0, 40, 42, 0, 41, 0, 8, 8, 0, 41, 0, 44, 46, 0, 45, 0, 8, 8, 0, 45, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 0 }; static const signed char _sav_date_parse_cond_actions[] = { 31, 31, 0, 1, 1, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13, 0, 31, 31, 0, 1, 1, 0, 0, 0, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 7, 0, 0, 0, 0, 19, 19, 0, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 0, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0 }; static const short _sav_date_parse_eof_trans[] = { 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 0 }; static const int sav_date_parse_start = 1; static const int sav_date_parse_en_main = 1; #line 59 "src/spss/readstat_sav_parse_timestamp.rl" readstat_error_t sav_parse_date(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx) { readstat_error_t retval = READSTAT_OK; char error_buf[8192]; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; int temp_val = 0; #line 408 "src/spss/readstat_sav_parse_timestamp.c" { cs = (int)sav_date_parse_start; } #line 413 "src/spss/readstat_sav_parse_timestamp.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _sav_date_parse_eof_trans[cs] > 0 ) { _trans = (unsigned int)_sav_date_parse_eof_trans[cs] - 1; } } else { _keys = ( _sav_date_parse_trans_keys + (_sav_date_parse_key_offsets[cs])); _trans = (unsigned int)_sav_date_parse_index_offsets[cs]; _klen = (int)_sav_date_parse_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_sav_date_parse_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_sav_date_parse_cond_targs[_trans]; if ( _sav_date_parse_cond_actions[_trans] != 0 ) { _acts = ( _sav_date_parse_actions + (_sav_date_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 71 "src/spss/readstat_sav_parse_timestamp.rl" char digit = ((( (*( p)))) - '0'); if (digit >= 0 && digit <= 9) { temp_val = 10 * temp_val + digit; } } #line 501 "src/spss/readstat_sav_parse_timestamp.c" break; } case 1: { { #line 78 "src/spss/readstat_sav_parse_timestamp.rl" if (temp_val < 70) { timestamp->tm_year = 100 + temp_val; } else { timestamp->tm_year = temp_val; } } #line 516 "src/spss/readstat_sav_parse_timestamp.c" break; } case 2: { { #line 87 "src/spss/readstat_sav_parse_timestamp.rl" temp_val = 0; } #line 525 "src/spss/readstat_sav_parse_timestamp.c" break; } case 3: { { #line 89 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mday = temp_val; } #line 534 "src/spss/readstat_sav_parse_timestamp.c" break; } case 4: { { #line 94 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 0; } #line 543 "src/spss/readstat_sav_parse_timestamp.c" break; } case 5: { { #line 95 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 1; } #line 552 "src/spss/readstat_sav_parse_timestamp.c" break; } case 6: { { #line 96 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 2; } #line 561 "src/spss/readstat_sav_parse_timestamp.c" break; } case 7: { { #line 97 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 3; } #line 570 "src/spss/readstat_sav_parse_timestamp.c" break; } case 8: { { #line 98 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 4; } #line 579 "src/spss/readstat_sav_parse_timestamp.c" break; } case 9: { { #line 99 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 5; } #line 588 "src/spss/readstat_sav_parse_timestamp.c" break; } case 10: { { #line 100 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 6; } #line 597 "src/spss/readstat_sav_parse_timestamp.c" break; } case 11: { { #line 101 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 7; } #line 606 "src/spss/readstat_sav_parse_timestamp.c" break; } case 12: { { #line 102 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 8; } #line 615 "src/spss/readstat_sav_parse_timestamp.c" break; } case 13: { { #line 103 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 9; } #line 624 "src/spss/readstat_sav_parse_timestamp.c" break; } case 14: { { #line 104 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 10; } #line 633 "src/spss/readstat_sav_parse_timestamp.c" break; } case 15: { { #line 105 "src/spss/readstat_sav_parse_timestamp.rl" timestamp->tm_mon = 11; } #line 642 "src/spss/readstat_sav_parse_timestamp.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 47 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 112 "src/spss/readstat_sav_parse_timestamp.rl" if (cs < #line 670 "src/spss/readstat_sav_parse_timestamp.c" 47 #line 114 "src/spss/readstat_sav_parse_timestamp.rl" || p != pe) { if (error_cb) { snprintf(error_buf, sizeof(error_buf), "Invalid date string (length=%d): %.*s", (int)len, (int)len, data); error_cb(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)sav_date_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_sav_parse_timestamp.h000066400000000000000000000004351410722155500231200ustar00rootroot00000000000000 readstat_error_t sav_parse_time(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx); readstat_error_t sav_parse_date(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx); ReadStat-1.1.7/src/spss/readstat_sav_parse_timestamp.rl000066400000000000000000000071201410722155500233040ustar00rootroot00000000000000 #include #include "../readstat.h" #include "../readstat_iconv.h" #include "readstat_sav.h" #include "readstat_sav_parse_timestamp.h" %%{ machine sav_time_parse; write data nofinal noerror; }%% readstat_error_t sav_parse_time(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx) { readstat_error_t retval = READSTAT_OK; char error_buf[8192]; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; int temp_val = 0; %%{ action incr_val { temp_val = 10 * temp_val + (fc - '0'); } integer2 = ( " " %{ temp_val = 0; } | [0-9] ${ temp_val = fc - '0'; } ) [0-9] $incr_val; hour = integer2 %{ timestamp->tm_hour = temp_val; }; minute = integer2 %{ timestamp->tm_min = temp_val; }; second = integer2 %{ timestamp->tm_sec = temp_val; }; main := hour ":" minute ":" second; write init; write exec; }%% if (cs < %%{ write first_final; }%%|| p != pe) { if (error_cb) { snprintf(error_buf, sizeof(error_buf), "Invalid time string (length=%d): %.*s", (int)len, (int)len, data); error_cb(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)sav_time_parse_en_main; return retval; } %%{ machine sav_date_parse; write data nofinal noerror; }%% readstat_error_t sav_parse_date(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_cb, void *user_ctx) { readstat_error_t retval = READSTAT_OK; char error_buf[8192]; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; int temp_val = 0; %%{ action incr_val { char digit = (fc - '0'); if (digit >= 0 && digit <= 9) { temp_val = 10 * temp_val + digit; } } action save_year { if (temp_val < 70) { timestamp->tm_year = 100 + temp_val; } else { timestamp->tm_year = temp_val; } } # some files in the wild use space padding instead of 0 padding integer2 = [0-9 ]{2} >{ temp_val = 0; } $incr_val; day = integer2 %{ timestamp->tm_mday = temp_val; }; year = integer2 %save_year; month = ("Jan" | "JAN") %{ timestamp->tm_mon = 0; } | ("Feb" | "FEB") %{ timestamp->tm_mon = 1; } | ("Mar" | "MAR") %{ timestamp->tm_mon = 2; } | ("Apr" | "APR") %{ timestamp->tm_mon = 3; } | ("May" | "MAY") %{ timestamp->tm_mon = 4; } | ("Jun" | "JUN") %{ timestamp->tm_mon = 5; } | ("Jul" | "JUL") %{ timestamp->tm_mon = 6; } | ("Aug" | "AUG") %{ timestamp->tm_mon = 7; } | ("Sep" | "SEP") %{ timestamp->tm_mon = 8; } | ("Oct" | "OCT") %{ timestamp->tm_mon = 9; } | ("Nov" | "NOV") %{ timestamp->tm_mon = 10; } | ("Dec" | "DEC") %{ timestamp->tm_mon = 11; }; # somebody is outputting dash separators main := day [ \-] month [ \-] year; write init; write exec; }%% if (cs < %%{ write first_final; }%%|| p != pe) { if (error_cb) { snprintf(error_buf, sizeof(error_buf), "Invalid date string (length=%d): %.*s", (int)len, (int)len, data); error_cb(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)sav_date_parse_en_main; return retval; } ReadStat-1.1.7/src/spss/readstat_sav_read.c000066400000000000000000001621271410722155500206400ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" #include "readstat_sav.h" #include "readstat_sav_compress.h" #include "readstat_sav_parse.h" #include "readstat_sav_parse_timestamp.h" #if HAVE_ZLIB #include "readstat_zsav_read.h" #endif #define DATA_BUFFER_SIZE 65536 #define VERY_LONG_STRING_MAX_LENGTH INT_MAX /* Others defined in table below */ /* See http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx */ static readstat_charset_entry_t _charset_table[] = { { .code = 1, .name = "EBCDIC-US" }, { .code = 2, .name = "WINDOWS-1252" }, /* supposed to be ASCII, but some files are miscoded */ { .code = 3, .name = "WINDOWS-1252" }, { .code = 4, .name = "DEC-KANJI" }, { .code = 437, .name = "CP437" }, { .code = 708, .name = "ASMO-708" }, { .code = 737, .name = "CP737" }, { .code = 775, .name = "CP775" }, { .code = 850, .name = "CP850" }, { .code = 852, .name = "CP852" }, { .code = 855, .name = "CP855" }, { .code = 857, .name = "CP857" }, { .code = 858, .name = "CP858" }, { .code = 860, .name = "CP860" }, { .code = 861, .name = "CP861" }, { .code = 862, .name = "CP862" }, { .code = 863, .name = "CP863" }, { .code = 864, .name = "CP864" }, { .code = 865, .name = "CP865" }, { .code = 866, .name = "CP866" }, { .code = 869, .name = "CP869" }, { .code = 874, .name = "CP874" }, { .code = 932, .name = "SHIFT-JIS" }, { .code = 936, .name = "ISO-IR-58" }, { .code = 949, .name = "ISO-IR-149" }, { .code = 950, .name = "BIG-5" }, { .code = 1200, .name = "UTF-16LE" }, { .code = 1201, .name = "UTF-16BE" }, { .code = 1250, .name = "WINDOWS-1250" }, { .code = 1251, .name = "WINDOWS-1251" }, { .code = 1252, .name = "WINDOWS-1252" }, { .code = 1253, .name = "WINDOWS-1253" }, { .code = 1254, .name = "WINDOWS-1254" }, { .code = 1255, .name = "WINDOWS-1255" }, { .code = 1256, .name = "WINDOWS-1256" }, { .code = 1257, .name = "WINDOWS-1257" }, { .code = 1258, .name = "WINDOWS-1258" }, { .code = 1361, .name = "CP1361" }, { .code = 10000, .name = "MACROMAN" }, { .code = 10004, .name = "MACARABIC" }, { .code = 10005, .name = "MACHEBREW" }, { .code = 10006, .name = "MACGREEK" }, { .code = 10007, .name = "MACCYRILLIC" }, { .code = 10010, .name = "MACROMANIA" }, { .code = 10017, .name = "MACUKRAINE" }, { .code = 10021, .name = "MACTHAI" }, { .code = 10029, .name = "MACCENTRALEUROPE" }, { .code = 10079, .name = "MACICELAND" }, { .code = 10081, .name = "MACTURKISH" }, { .code = 10082, .name = "MACCROATIAN" }, { .code = 12000, .name = "UTF-32LE" }, { .code = 12001, .name = "UTF-32BE" }, { .code = 20127, .name = "US-ASCII" }, { .code = 20866, .name = "KOI8-R" }, { .code = 20932, .name = "EUC-JP" }, { .code = 21866, .name = "KOI8-U" }, { .code = 28591, .name = "ISO-8859-1" }, { .code = 28592, .name = "ISO-8859-2" }, { .code = 28593, .name = "ISO-8859-3" }, { .code = 28594, .name = "ISO-8859-4" }, { .code = 28595, .name = "ISO-8859-5" }, { .code = 28596, .name = "ISO-8859-6" }, { .code = 28597, .name = "ISO-8859-7" }, { .code = 28598, .name = "ISO-8859-8" }, { .code = 28599, .name = "ISO-8859-9" }, { .code = 28603, .name = "ISO-8859-13" }, { .code = 28605, .name = "ISO-8859-15" }, { .code = 50220, .name = "ISO-2022-JP" }, { .code = 50221, .name = "ISO-2022-JP" }, // same as above? { .code = 50222, .name = "ISO-2022-JP" }, // same as above? { .code = 50225, .name = "ISO-2022-KR" }, { .code = 50229, .name = "ISO-2022-CN" }, { .code = 51932, .name = "EUC-JP" }, { .code = 51936, .name = "GBK" }, { .code = 51949, .name = "EUC-KR" }, { .code = 52936, .name = "HZ-GB-2312" }, { .code = 54936, .name = "GB18030" }, { .code = 65000, .name = "UTF-7" }, { .code = 65001, .name = "UTF-8" } }; #define SAV_LABEL_NAME_PREFIX "labels" typedef struct value_label_s { char raw_value[8]; char utf8_string_value[8*4+1]; readstat_value_t final_value; char *label; } value_label_t; static readstat_error_t sav_update_progress(sav_ctx_t *ctx); static readstat_error_t sav_read_data(sav_ctx_t *ctx); static readstat_error_t sav_read_compressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)); static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)); static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx); static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx); static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx); static readstat_error_t sav_read_document_record(sav_ctx_t *ctx); static readstat_error_t sav_skip_value_label_record(sav_ctx_t *ctx); static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx); static readstat_error_t sav_read_dictionary_termination_record(sav_ctx_t *ctx); static readstat_error_t sav_parse_machine_floating_point_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); static readstat_error_t sav_store_variable_display_parameter_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); static readstat_error_t sav_parse_variable_display_parameter_record(sav_ctx_t *ctx); static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx); static readstat_error_t sav_parse_long_string_value_labels_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); static readstat_error_t sav_parse_long_string_missing_values_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx); static void sav_tag_missing_double(readstat_value_t *value, sav_ctx_t *ctx) { double fp_value = value->v.double_value; uint64_t long_value = 0; memcpy(&long_value, &fp_value, 8); if (long_value == ctx->missing_double) value->is_system_missing = 1; if (long_value == ctx->lowest_double) value->is_system_missing = 1; if (long_value == ctx->highest_double) value->is_system_missing = 1; if (isnan(fp_value)) value->is_system_missing = 1; } static readstat_error_t sav_update_progress(sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx); } static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx) { sav_variable_record_t variable; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (variable.has_var_label) { uint32_t label_len; if (io->read(&label_len, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->bswap ? byteswap4(label_len) : label_len; uint32_t label_capacity = (label_len + 3) / 4 * 4; if (io->seek(label_capacity, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } if (variable.n_missing_values) { int n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; if (io->seek(abs(n_missing_values) * sizeof(double), READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } cleanup: return retval; } static readstat_error_t sav_read_variable_label(spss_varinfo_t *info, sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; readstat_error_t retval = READSTAT_OK; uint32_t label_len, label_capacity; size_t out_label_len; char *label_buf = NULL; if (io->read(&label_len, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } label_len = ctx->bswap ? byteswap4(label_len) : label_len; if (label_len == 0) goto cleanup; label_capacity = (label_len + 3) / 4 * 4; if ((label_buf = readstat_malloc(label_capacity)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } out_label_len = (size_t)label_len*4+1; if ((info->label = readstat_malloc(out_label_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; cleanup: if (label_buf) free(label_buf); if (retval != READSTAT_OK) { if (info->label) { free(info->label); info->label = NULL; } } return retval; } static readstat_error_t sav_read_variable_missing_double_values(spss_varinfo_t *info, sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; int i; readstat_error_t retval = READSTAT_OK; if (io->read(info->missing_double_values, info->n_missing_values * sizeof(double), io->io_ctx) < info->n_missing_values * sizeof(double)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; in_missing_values; i++) { if (ctx->bswap) { info->missing_double_values[i] = byteswap_double(info->missing_double_values[i]); } uint64_t long_value = 0; memcpy(&long_value, &info->missing_double_values[i], 8); if (long_value == ctx->missing_double) info->missing_double_values[i] = NAN; if (long_value == ctx->lowest_double) info->missing_double_values[i] = -HUGE_VAL; if (long_value == ctx->highest_double) info->missing_double_values[i] = HUGE_VAL; } cleanup: return retval; } static readstat_error_t sav_read_variable_missing_string_values(spss_varinfo_t *info, sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; int i; readstat_error_t retval = READSTAT_OK; for (i=0; in_missing_values; i++) { char missing_value[8]; if (io->read(missing_value, sizeof(missing_value), io->io_ctx) < sizeof(missing_value)) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(info->missing_string_values[i], sizeof(info->missing_string_values[0]), missing_value, sizeof(missing_value), ctx->converter); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t sav_read_variable_missing_values(spss_varinfo_t *info, sav_ctx_t *ctx) { if (info->n_missing_values > 3 || info->n_missing_values < -3) { return READSTAT_ERROR_PARSE; } if (info->n_missing_values < 0) { info->missing_range = 1; info->n_missing_values = abs(info->n_missing_values); } else { info->missing_range = 0; } if (info->type == READSTAT_TYPE_DOUBLE) { return sav_read_variable_missing_double_values(info, ctx); } return sav_read_variable_missing_string_values(info, ctx); } static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) { readstat_io_t *io = ctx->io; sav_variable_record_t variable = { 0 }; spss_varinfo_t *info = NULL; readstat_error_t retval = READSTAT_OK; if (ctx->var_index == ctx->varinfo_capacity) { if ((ctx->varinfo = readstat_realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t *))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print; variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write; int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type; if (type < 0) { if (ctx->var_index == 0) { return READSTAT_ERROR_PARSE; } ctx->var_offset++; ctx->varinfo[ctx->var_index-1]->width++; return 0; } if ((info = readstat_calloc(1, sizeof(spss_varinfo_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } info->width = 1; info->n_segments = 1; info->index = ctx->var_index; info->offset = ctx->var_offset; info->labels_index = -1; retval = readstat_convert(info->name, sizeof(info->name), variable.name, sizeof(variable.name), NULL); if (retval != READSTAT_OK) goto cleanup; retval = readstat_convert(info->longname, sizeof(info->longname), variable.name, sizeof(variable.name), NULL); if (retval != READSTAT_OK) goto cleanup; info->print_format.decimal_places = (variable.print & 0x000000FF); info->print_format.width = (variable.print & 0x0000FF00) >> 8; info->print_format.type = (variable.print & 0x00FF0000) >> 16; info->write_format.decimal_places = (variable.write & 0x000000FF); info->write_format.width = (variable.write & 0x0000FF00) >> 8; info->write_format.type = (variable.write & 0x00FF0000) >> 16; if (type > 0 || info->print_format.type == SPSS_FORMAT_TYPE_A || info->write_format.type == SPSS_FORMAT_TYPE_A) { info->type = READSTAT_TYPE_STRING; } else { info->type = READSTAT_TYPE_DOUBLE; } if (variable.has_var_label) { if ((retval = sav_read_variable_label(info, ctx)) != READSTAT_OK) { goto cleanup; } } if (variable.n_missing_values) { info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values; if ((retval = sav_read_variable_missing_values(info, ctx)) != READSTAT_OK) { goto cleanup; } } ctx->varinfo[ctx->var_index] = info; ctx->var_index++; ctx->var_offset++; cleanup: if (retval != READSTAT_OK) { spss_varinfo_free(info); } return retval; } static readstat_error_t sav_skip_value_label_record(sav_ctx_t *ctx) { uint32_t label_count; uint32_t rec_type; uint32_t var_count; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&label_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) label_count = byteswap4(label_count); int i; for (i=0; iseek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(&unpadded_len, 1, io->io_ctx) < 1) { retval = READSTAT_ERROR_READ; goto cleanup; } padded_len = (unpadded_len + 8) / 8 * 8 - 1; if (io->seek(padded_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) rec_type = byteswap4(rec_type); if (rec_type != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(&var_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) var_count = byteswap4(var_count); if (io->seek(var_count * sizeof(uint32_t), READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } cleanup: return retval; } static readstat_error_t sav_submit_value_labels(value_label_t *value_labels, int32_t label_count, readstat_type_t value_type, sav_ctx_t *ctx) { char label_name_buf[256]; readstat_error_t retval = READSTAT_OK; int32_t i; snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", ctx->value_labels_count); for (i=0; ihandle.value_label(label_name_buf, vlabel->final_value, vlabel->label, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; } static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx) { uint32_t label_count; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; uint32_t *vars = NULL; uint32_t var_count; int32_t rec_type; readstat_type_t value_type = READSTAT_TYPE_STRING; char label_buf[256]; value_label_t *value_labels = NULL; if (io->read(&label_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) label_count = byteswap4(label_count); if (label_count && (value_labels = readstat_calloc(label_count, sizeof(value_label_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } int i; for (i=0; iread(vlabel->raw_value, 8, io->io_ctx) < 8) { retval = READSTAT_ERROR_READ; goto cleanup; } if (io->read(&unpadded_label_len, 1, io->io_ctx) < 1) { retval = READSTAT_ERROR_READ; goto cleanup; } padded_label_len = (unpadded_label_len + 8) / 8 * 8 - 1; if (io->read(label_buf, padded_label_len, io->io_ctx) < padded_label_len) { retval = READSTAT_ERROR_READ; goto cleanup; } utf8_label_len = padded_label_len*4+1; if ((vlabel->label = readstat_malloc(utf8_label_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } retval = readstat_convert(vlabel->label, utf8_label_len, label_buf, padded_label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; } if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) rec_type = byteswap4(rec_type); if (rec_type != 4) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(&var_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) var_count = byteswap4(var_count); if (var_count && (vars = readstat_malloc(var_count * sizeof(uint32_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(vars, var_count * sizeof(uint32_t), io->io_ctx) < var_count * sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; ibswap) var_offset = byteswap4(var_offset); var_offset--; // Why subtract 1???? spss_varinfo_t **var = bsearch(&var_offset, ctx->varinfo, ctx->var_index, sizeof(spss_varinfo_t *), &spss_varinfo_compare); if (var) { (*var)->labels_index = ctx->value_labels_count; value_type = (*var)->type; } } for (i=0; ifinal_value.type = value_type; if (value_type == READSTAT_TYPE_DOUBLE) { memcpy(&val_d, vlabel->raw_value, 8); if (ctx->bswap) val_d = byteswap_double(val_d); vlabel->final_value.v.double_value = val_d; sav_tag_missing_double(&vlabel->final_value, ctx); } else { retval = readstat_convert(vlabel->utf8_string_value, sizeof(vlabel->utf8_string_value), vlabel->raw_value, 8, ctx->converter); if (retval != READSTAT_OK) break; vlabel->final_value.v.string_value = vlabel->utf8_string_value; } } if (ctx->handle.value_label) { sav_submit_value_labels(value_labels, label_count, value_type, ctx); } ctx->value_labels_count++; cleanup: if (vars) free(vars); if (value_labels) { for (i=0; ilabel) free(vlabel->label); } free(value_labels); } return retval; } static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx) { uint32_t n_lines; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&n_lines, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) n_lines = byteswap4(n_lines); if (io->seek(n_lines * SPSS_DOC_LINE_SIZE, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } cleanup: return retval; } static readstat_error_t sav_read_document_record(sav_ctx_t *ctx) { if (!ctx->handle.note) return sav_skip_document_record(ctx); uint32_t n_lines; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&n_lines, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) n_lines = byteswap4(n_lines); char raw_buffer[SPSS_DOC_LINE_SIZE]; char utf8_buffer[4*SPSS_DOC_LINE_SIZE+1]; int i; for (i=0; iread(raw_buffer, SPSS_DOC_LINE_SIZE, io->io_ctx) < SPSS_DOC_LINE_SIZE) { retval = READSTAT_ERROR_READ; goto cleanup; } retval = readstat_convert(utf8_buffer, sizeof(utf8_buffer), raw_buffer, sizeof(raw_buffer), ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->handle.note(i, utf8_buffer, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } cleanup: return retval; } static readstat_error_t sav_read_dictionary_termination_record(sav_ctx_t *ctx) { int32_t filler; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->read(&filler, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) { retval = READSTAT_ERROR_READ; } return retval; } static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len, sav_ctx_t *ctx) { if (ctx->row_offset) { ctx->row_offset--; return READSTAT_OK; } readstat_error_t retval = READSTAT_OK; double fp_value; int offset = 0; readstat_off_t data_offset = 0; size_t raw_str_used = 0; int segment_offset = 0; int var_index = 0, col = 0; int raw_str_is_utf8 = ctx->input_encoding && !strcmp(ctx->input_encoding, "UTF-8"); while (data_offset < buffer_len && col < ctx->var_index && var_index < ctx->var_index) { spss_varinfo_t *col_info = ctx->varinfo[col]; spss_varinfo_t *var_info = ctx->varinfo[var_index]; readstat_value_t value = { .type = var_info->type }; if (offset > 31) { retval = READSTAT_ERROR_PARSE; goto done; } if (var_info->type == READSTAT_TYPE_STRING) { if (raw_str_used + 8 <= ctx->raw_string_len) { if (raw_str_is_utf8) { /* Skip null bytes, see https://github.com/tidyverse/haven/issues/560 */ char c; for (int i=0; i<8; i++) if ((c = buffer[data_offset+i])) ctx->raw_string[raw_str_used++] = c; } else { memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8); raw_str_used += 8; } } if (++offset == col_info->width) { if (++segment_offset < var_info->n_segments) { raw_str_used--; } offset = 0; col++; } if (segment_offset == var_info->n_segments) { if (!ctx->variables[var_info->index]->skip) { retval = readstat_convert(ctx->utf8_string, ctx->utf8_string_len, ctx->raw_string, raw_str_used, ctx->converter); if (retval != READSTAT_OK) goto done; value.v.string_value = ctx->utf8_string; if (ctx->handle.value(ctx->current_row, ctx->variables[var_info->index], value, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto done; } } raw_str_used = 0; segment_offset = 0; var_index += var_info->n_segments; } } else if (var_info->type == READSTAT_TYPE_DOUBLE) { if (!ctx->variables[var_info->index]->skip) { memcpy(&fp_value, &buffer[data_offset], 8); if (ctx->bswap) { fp_value = byteswap_double(fp_value); } value.v.double_value = fp_value; sav_tag_missing_double(&value, ctx); if (ctx->handle.value(ctx->current_row, ctx->variables[var_info->index], value, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto done; } } var_index += var_info->n_segments; col++; } data_offset += 8; } ctx->current_row++; done: return retval; } static readstat_error_t sav_read_data(sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t longest_string = 256; int i; for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; if (info->string_length > longest_string) { longest_string = info->string_length; } i += info->n_segments; } ctx->raw_string_len = longest_string + sizeof(SAV_EIGHT_SPACES)-2; ctx->raw_string = readstat_malloc(ctx->raw_string_len); ctx->utf8_string_len = 4*longest_string+1 + sizeof(SAV_EIGHT_SPACES)-2; ctx->utf8_string = readstat_malloc(ctx->utf8_string_len); if (ctx->raw_string == NULL || ctx->utf8_string == NULL) { retval = READSTAT_ERROR_MALLOC; goto done; } if (ctx->compression == READSTAT_COMPRESS_ROWS) { retval = sav_read_compressed_data(ctx, &sav_process_row); } else if (ctx->compression == READSTAT_COMPRESS_BINARY) { #if HAVE_ZLIB retval = zsav_read_compressed_data(ctx, &sav_process_row); #else retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION; #endif } else { retval = sav_read_uncompressed_data(ctx, &sav_process_row); } if (retval != READSTAT_OK) goto done; if (ctx->record_count >= 0 && ctx->current_row != ctx->row_limit) { retval = READSTAT_ERROR_ROW_COUNT_MISMATCH; } done: return retval; } static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; unsigned char *buffer = NULL; size_t bytes_read = 0; size_t buffer_len = ctx->var_offset * 8; buffer = readstat_malloc(buffer_len); if (ctx->row_offset) { if (io->seek(buffer_len * ctx->row_offset, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto done; } ctx->row_offset = 0; } while (ctx->row_limit == -1 || ctx->current_row < ctx->row_limit) { retval = sav_update_progress(ctx); if (retval != READSTAT_OK) goto done; if ((bytes_read = io->read(buffer, buffer_len, io->io_ctx)) != buffer_len) goto done; retval = row_handler(buffer, buffer_len, ctx); if (retval != READSTAT_OK) goto done; } done: if (buffer) free(buffer); return retval; } static readstat_error_t sav_read_compressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; readstat_off_t data_offset = 0; unsigned char buffer[DATA_BUFFER_SIZE]; int buffer_used = 0; size_t uncompressed_row_len = ctx->var_offset * 8; readstat_off_t uncompressed_offset = 0; unsigned char *uncompressed_row = NULL; struct sav_row_stream_s state = { .missing_value = ctx->missing_double, .bias = ctx->bias, .bswap = ctx->bswap }; if (uncompressed_row_len && (uncompressed_row = readstat_malloc(uncompressed_row_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto done; } while (1) { retval = sav_update_progress(ctx); if (retval != READSTAT_OK) goto done; buffer_used = io->read(buffer, sizeof(buffer), io->io_ctx); if (buffer_used == -1 || buffer_used == 0 || (buffer_used % 8) != 0) goto done; state.status = SAV_ROW_STREAM_HAVE_DATA; data_offset = 0; while (state.status != SAV_ROW_STREAM_NEED_DATA) { state.next_in = &buffer[data_offset]; state.avail_in = buffer_used - data_offset; state.next_out = &uncompressed_row[uncompressed_offset]; state.avail_out = uncompressed_row_len - uncompressed_offset; sav_decompress_row(&state); uncompressed_offset = uncompressed_row_len - state.avail_out; data_offset = buffer_used - state.avail_in; if (state.status == SAV_ROW_STREAM_FINISHED_ROW) { retval = row_handler(uncompressed_row, uncompressed_row_len, ctx); if (retval != READSTAT_OK) goto done; uncompressed_offset = 0; } if (state.status == SAV_ROW_STREAM_FINISHED_ALL) goto done; if (ctx->row_limit > 0 && ctx->current_row == ctx->row_limit) goto done; } } done: if (uncompressed_row) free(uncompressed_row); return retval; } static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx) { if (data_len != 32) return READSTAT_ERROR_PARSE; const char *src_charset = NULL; const char *dst_charset = ctx->output_encoding; sav_machine_integer_info_record_t record; memcpy(&record, data, data_len); if (ctx->bswap) { record.character_code = byteswap4(record.character_code); } if (ctx->input_encoding) { src_charset = ctx->input_encoding; } else { int i; for (i=0; ihandle.error) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Unsupported character set: %d\n", record.character_code); ctx->handle.error(error_buf, ctx->user_ctx); } return READSTAT_ERROR_UNSUPPORTED_CHARSET; } ctx->input_encoding = src_charset; } if (src_charset && dst_charset) { // You might be tempted to skip the charset conversion when src_charset // and dst_charset are the same. However, some versions of SPSS insert // illegally truncated strings (e.g. the last character is three bytes // but the field only has room for two bytes). So to prevent the client // from receiving an invalid byte sequence, we ram everything through // our iconv machinery. iconv_t converter = iconv_open(dst_charset, src_charset); if (converter == (iconv_t)-1) { return READSTAT_ERROR_UNSUPPORTED_CHARSET; } if (ctx->converter) { iconv_close(ctx->converter); } ctx->converter = converter; } return READSTAT_OK; } static readstat_error_t sav_parse_machine_floating_point_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) { if (size != 8 || count != 3) return READSTAT_ERROR_PARSE; sav_machine_floating_point_info_record_t fp_info; memcpy(&fp_info, data, sizeof(sav_machine_floating_point_info_record_t)); ctx->missing_double = ctx->bswap ? byteswap8(fp_info.sysmis) : fp_info.sysmis; ctx->highest_double = ctx->bswap ? byteswap8(fp_info.highest) : fp_info.highest; ctx->lowest_double = ctx->bswap ? byteswap8(fp_info.lowest) : fp_info.lowest; return READSTAT_OK; } /* We don't yet know how many real variables there are, so store the values in the record * and make sense of them later. */ static readstat_error_t sav_store_variable_display_parameter_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) { if (size != 4) return READSTAT_ERROR_PARSE; const uint32_t *data_ptr = data; int i; ctx->variable_display_values = readstat_realloc(ctx->variable_display_values, count * sizeof(uint32_t)); if (count > 0 && ctx->variable_display_values == NULL) return READSTAT_ERROR_MALLOC; ctx->variable_display_values_count = count; for (i=0; ivariable_display_values[i] = ctx->bswap ? byteswap4(data_ptr[i]) : data_ptr[i]; } return READSTAT_OK; } static readstat_error_t sav_parse_variable_display_parameter_record(sav_ctx_t *ctx) { if (!ctx->variable_display_values) return READSTAT_OK; int i; long count = ctx->variable_display_values_count; if (count != 2 * ctx->var_index && count != 3 * ctx->var_index) { return READSTAT_ERROR_PARSE; } int has_display_width = ctx->var_index > 0 && (count / ctx->var_index == 3); int offset = 0; for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; offset = (2 + has_display_width)*i; info->measure = spss_measure_to_readstat_measure(ctx->variable_display_values[offset++]); if (has_display_width) { info->display_width = ctx->variable_display_values[offset++]; } info->alignment = spss_alignment_to_readstat_alignment(ctx->variable_display_values[offset++]); i += info->n_segments; } return READSTAT_OK; } static readstat_error_t sav_read_pascal_string(char *buf, size_t buf_len, const char **inout_data_ptr, size_t data_ptr_len, sav_ctx_t *ctx) { const char *data_ptr = *inout_data_ptr; const char *data_end = data_ptr + data_ptr_len; readstat_error_t retval = READSTAT_OK; uint32_t var_name_len = 0; if (data_ptr + sizeof(uint32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&var_name_len, data_ptr, sizeof(uint32_t)); if (ctx->bswap) var_name_len = byteswap4(var_name_len); data_ptr += sizeof(uint32_t); if (data_ptr + var_name_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(buf, buf_len, data_ptr, var_name_len, NULL); if (retval != READSTAT_OK) goto cleanup; data_ptr += var_name_len; cleanup: *inout_data_ptr = data_ptr; return retval; } static readstat_error_t sav_parse_long_string_value_labels_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) { if (!ctx->handle.value_label) return READSTAT_OK; if (size != 1) return READSTAT_ERROR_PARSE; readstat_error_t retval = READSTAT_OK; uint32_t label_count = 0; uint32_t i = 0; const char *data_ptr = data; const char *data_end = data_ptr + count; char var_name_buf[256+1]; // unconverted char label_name_buf[256]; char *value_buffer = NULL; char *label_buffer = NULL; while (data_ptr < data_end) { memset(label_name_buf, '\0', sizeof(label_name_buf)); retval = sav_read_pascal_string(var_name_buf, sizeof(var_name_buf), &data_ptr, data_end - data_ptr, ctx); if (retval != READSTAT_OK) goto cleanup; for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; if (strcmp(var_name_buf, info->longname) == 0) { info->labels_index = ctx->value_labels_count++; snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", info->labels_index); break; } i += info->n_segments; } if (label_name_buf[0] == '\0') { retval = READSTAT_ERROR_PARSE; goto cleanup; } data_ptr += sizeof(uint32_t); if (data_ptr + sizeof(uint32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_count, data_ptr, sizeof(uint32_t)); if (ctx->bswap) label_count = byteswap4(label_count); data_ptr += sizeof(uint32_t); for (i=0; i data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&value_len, data_ptr, sizeof(uint32_t)); if (ctx->bswap) value_len = byteswap4(value_len); data_ptr += sizeof(uint32_t); value_buffer_len = value_len*4+1; value_buffer = readstat_realloc(value_buffer, value_buffer_len); if (value_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + value_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(value_buffer, value_buffer_len, data_ptr, value_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += value_len; if (data_ptr + sizeof(uint32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&label_len, data_ptr, sizeof(uint32_t)); if (ctx->bswap) label_len = byteswap4(label_len); data_ptr += sizeof(uint32_t); label_buffer_len = label_len*4+1; label_buffer = readstat_realloc(label_buffer, label_buffer_len); if (label_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (data_ptr + label_len > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(label_buffer, label_buffer_len, data_ptr, label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += label_len; readstat_value_t value = { .type = READSTAT_TYPE_STRING }; value.v.string_value = value_buffer; if (ctx->handle.value_label(label_name_buf, value, label_buffer, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } if (data_ptr != data_end) { retval = READSTAT_ERROR_PARSE; } cleanup: if (value_buffer) free(value_buffer); if (label_buffer) free(label_buffer); return retval; } static readstat_error_t sav_parse_long_string_missing_values_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) { if (size != 1) return READSTAT_ERROR_PARSE; readstat_error_t retval = READSTAT_OK; uint32_t i = 0, j = 0; const char *data_ptr = data; const char *data_end = data_ptr + count; char var_name_buf[256+1]; while (data_ptr < data_end) { retval = sav_read_pascal_string(var_name_buf, sizeof(var_name_buf), &data_ptr, data_end - data_ptr, ctx); if (retval != READSTAT_OK) goto cleanup; if (data_ptr == data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } char n_missing_values = *data_ptr++; if (n_missing_values < 1 || n_missing_values > 3) { retval = READSTAT_ERROR_PARSE; goto cleanup; } for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; if (strcmp(var_name_buf, info->longname) == 0) { info->n_missing_values = n_missing_values; uint32_t var_name_len = 0; if (data_ptr + sizeof(uint32_t) > data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } memcpy(&var_name_len, data_ptr, sizeof(uint32_t)); if (ctx->bswap) var_name_len = byteswap4(var_name_len); data_ptr += sizeof(uint32_t); for (j=0; j data_end) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = readstat_convert(info->missing_string_values[j], sizeof(info->missing_string_values[0]), data_ptr, var_name_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; data_ptr += var_name_len; } break; } i += info->n_segments; } if (i == ctx->var_index) { retval = READSTAT_ERROR_PARSE; goto cleanup; } } if (data_ptr != data_end) { retval = READSTAT_ERROR_PARSE; } cleanup: return retval; } static readstat_error_t sav_parse_records_pass1(sav_ctx_t *ctx) { char data_buf[4096]; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; while (1) { uint32_t rec_type; uint32_t extra_info[3]; size_t data_len = 0; int i; int done = 0; if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { rec_type = byteswap4(rec_type); } switch (rec_type) { case SAV_RECORD_TYPE_VARIABLE: retval = sav_skip_variable_record(ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_VALUE_LABEL: retval = sav_skip_value_label_record(ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_DOCUMENT: retval = sav_skip_document_record(ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_DICT_TERMINATION: done = 1; break; case SAV_RECORD_TYPE_HAS_DATA: if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { for (i=0; i<3; i++) extra_info[i] = byteswap4(extra_info[i]); } uint32_t subtype = extra_info[0]; size_t size = extra_info[1]; size_t count = extra_info[2]; data_len = size * count; if (subtype == SAV_RECORD_SUBTYPE_INTEGER_INFO) { if (data_len > sizeof(data_buf)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (io->read(data_buf, data_len, io->io_ctx) < data_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } retval = sav_parse_machine_integer_info_record(data_buf, data_len, ctx); if (retval != READSTAT_OK) goto cleanup; } else { if (io->seek(data_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } break; default: retval = READSTAT_ERROR_PARSE; goto cleanup; break; } if (done) break; } cleanup: return retval; } static readstat_error_t sav_parse_records_pass2(sav_ctx_t *ctx) { void *data_buf = NULL; size_t data_buf_capacity = 4096; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if ((data_buf = readstat_malloc(data_buf_capacity)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } while (1) { uint32_t rec_type; uint32_t extra_info[3]; size_t data_len = 0; int i; int done = 0; if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { rec_type = byteswap4(rec_type); } switch (rec_type) { case SAV_RECORD_TYPE_VARIABLE: if ((retval = sav_read_variable_record(ctx)) != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_VALUE_LABEL: if ((retval = sav_read_value_label_record(ctx)) != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_DOCUMENT: if ((retval = sav_read_document_record(ctx)) != READSTAT_OK) goto cleanup; break; case SAV_RECORD_TYPE_DICT_TERMINATION: if ((retval = sav_read_dictionary_termination_record(ctx)) != READSTAT_OK) goto cleanup; done = 1; break; case SAV_RECORD_TYPE_HAS_DATA: if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (ctx->bswap) { for (i=0; i<3; i++) extra_info[i] = byteswap4(extra_info[i]); } uint32_t subtype = extra_info[0]; size_t size = extra_info[1]; size_t count = extra_info[2]; data_len = size * count; if (data_buf_capacity < data_len) { if ((data_buf = readstat_realloc(data_buf, data_buf_capacity = data_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } if (data_len == 0 || io->read(data_buf, data_len, io->io_ctx) < data_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } switch (subtype) { case SAV_RECORD_SUBTYPE_INTEGER_INFO: /* parsed in pass 1 */ break; case SAV_RECORD_SUBTYPE_FP_INFO: retval = sav_parse_machine_floating_point_record(data_buf, size, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_VAR_DISPLAY: retval = sav_store_variable_display_parameter_record(data_buf, size, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_LONG_VAR_NAME: retval = sav_parse_long_variable_names_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_VERY_LONG_STR: retval = sav_parse_very_long_string_record(data_buf, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_LONG_STRING_VALUE_LABELS: retval = sav_parse_long_string_value_labels_record(data_buf, size, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; case SAV_RECORD_SUBTYPE_LONG_STRING_MISSING_VALUES: retval = sav_parse_long_string_missing_values_record(data_buf, size, count, ctx); if (retval != READSTAT_OK) goto cleanup; break; default: /* misc. info */ break; } break; default: retval = READSTAT_ERROR_PARSE; goto cleanup; break; } if (done) break; } cleanup: if (data_buf) free(data_buf); return retval; } static readstat_error_t sav_set_n_segments_and_var_count(sav_ctx_t *ctx) { int i; ctx->var_count = 0; for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; if (info->string_length > VERY_LONG_STRING_MAX_LENGTH) return READSTAT_ERROR_PARSE; if (info->string_length) { info->n_segments = (info->string_length + 251) / 252; } info->index = ctx->var_count++; i += info->n_segments; } ctx->variables = readstat_calloc(ctx->var_count, sizeof(readstat_variable_t *)); return READSTAT_OK; } static readstat_error_t sav_handle_variables(sav_ctx_t *ctx) { int i; int index_after_skipping = 0; readstat_error_t retval = READSTAT_OK; if (!ctx->handle.variable) return retval; for (i=0; ivar_index;) { char label_name_buf[256]; spss_varinfo_t *info = ctx->varinfo[i]; ctx->variables[info->index] = spss_init_variable_for_info(info, index_after_skipping, ctx->converter); snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", info->labels_index); int cb_retval = ctx->handle.variable(info->index, ctx->variables[info->index], info->labels_index == -1 ? NULL : label_name_buf, ctx->user_ctx); if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { ctx->variables[info->index]->skip = 1; } else { index_after_skipping++; } i += info->n_segments; } cleanup: return retval; } static readstat_error_t sav_handle_fweight(sav_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i; if (ctx->handle.fweight && ctx->fweight_index >= 0) { for (i=0; ivar_index;) { spss_varinfo_t *info = ctx->varinfo[i]; if (info->offset == ctx->fweight_index - 1) { if (ctx->handle.fweight(ctx->variables[info->index], ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } break; } i += info->n_segments; } } cleanup: return retval; } readstat_error_t sav_parse_timestamp(sav_ctx_t *ctx, sav_file_header_record_t *header) { readstat_error_t retval = READSTAT_OK; struct tm timestamp = { .tm_isdst = -1 }; if ((retval = sav_parse_time(header->creation_time, sizeof(header->creation_time), ×tamp, ctx->handle.error, ctx->user_ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_date(header->creation_date, sizeof(header->creation_date), ×tamp, ctx->handle.error, ctx->user_ctx)) != READSTAT_OK) goto cleanup; ctx->timestamp = mktime(×tamp); cleanup: return retval; } readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; sav_file_header_record_t header; sav_ctx_t *ctx = NULL; size_t file_size = 0; if (io->open(path, io->io_ctx) == -1) { return READSTAT_ERROR_OPEN; } file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx); if (file_size == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(&header, sizeof(sav_file_header_record_t), io->io_ctx) < sizeof(sav_file_header_record_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } ctx = sav_ctx_init(&header, io); if (ctx == NULL) { retval = READSTAT_ERROR_PARSE; goto cleanup; } ctx->handle = parser->handlers; ctx->input_encoding = parser->input_encoding; ctx->output_encoding = parser->output_encoding; ctx->user_ctx = user_ctx; ctx->file_size = file_size; if (parser->row_offset > 0) ctx->row_offset = parser->row_offset; if (ctx->record_count >= 0) { int record_count_after_skipping = ctx->record_count - ctx->row_offset; if (record_count_after_skipping < 0) { record_count_after_skipping = 0; ctx->row_offset = ctx->record_count; } ctx->row_limit = record_count_after_skipping; if (parser->row_limit > 0 && parser->row_limit < record_count_after_skipping) ctx->row_limit = parser->row_limit; } else if (parser->row_limit > 0) { ctx->row_limit = parser->row_limit; } if ((retval = sav_parse_timestamp(ctx, &header)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_records_pass1(ctx)) != READSTAT_OK) goto cleanup; if (io->seek(sizeof(sav_file_header_record_t), READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = sav_update_progress(ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_parse_records_pass2(ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_set_n_segments_and_var_count(ctx)) != READSTAT_OK) goto cleanup; if (ctx->var_count == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (ctx->handle.metadata) { readstat_metadata_t metadata = { .row_count = ctx->record_count < 0 ? -1 : ctx->row_limit, .var_count = ctx->var_count, .file_encoding = ctx->input_encoding, .file_format_version = ctx->format_version, .creation_time = ctx->timestamp, .modified_time = ctx->timestamp, .compression = ctx->compression, .endianness = ctx->endianness }; if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label), header.file_label, sizeof(header.file_label), ctx->converter)) != READSTAT_OK) goto cleanup; metadata.file_label = ctx->file_label; if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if ((retval = sav_parse_variable_display_parameter_record(ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_handle_variables(ctx)) != READSTAT_OK) goto cleanup; if ((retval = sav_handle_fweight(ctx)) != READSTAT_OK) goto cleanup; if (ctx->handle.value) { retval = sav_read_data(ctx); } cleanup: io->close(io->io_ctx); if (ctx) sav_ctx_free(ctx); return retval; } ReadStat-1.1.7/src/spss/readstat_sav_write.c000066400000000000000000001453101410722155500210520ustar00rootroot00000000000000 #include #include #include #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_bits.h" #include "../readstat_malloc.h" #include "../readstat_writer.h" #include "../CKHashTable.h" #include "readstat_sav.h" #include "readstat_sav_compress.h" #include "readstat_spss_parse.h" #if HAVE_ZLIB #include #include "readstat_zsav_compress.h" #include "readstat_zsav_write.h" #endif #define MAX_STRING_SIZE 255 #define MAX_LABEL_SIZE 256 #define MAX_VALUE_LABEL_SIZE 120 typedef struct sav_varnames_s { char shortname[9]; char stem[6]; } sav_varnames_t; static long readstat_label_set_number_short_variables(readstat_label_set_t *r_label_set) { long count = 0; int j; for (j=0; jvariables_count; j++) { readstat_variable_t *r_variable = readstat_get_label_set_variable(r_label_set, j); if (r_variable->storage_width <= 8) { count++; } } return count; } static int readstat_label_set_needs_short_value_labels_record(readstat_label_set_t *r_label_set) { return readstat_label_set_number_short_variables(r_label_set) > 0; } static int readstat_label_set_needs_long_value_labels_record(readstat_label_set_t *r_label_set) { return readstat_label_set_number_short_variables(r_label_set) < r_label_set->variables_count; } static int32_t sav_encode_format(spss_format_t *spss_format) { uint8_t width = spss_format->width > 0xff ? 0xff : spss_format->width; return ((spss_format->type << 16) | (width << 8) | spss_format->decimal_places); } static readstat_error_t sav_encode_base_variable_format(readstat_variable_t *r_variable, int32_t *out_code) { spss_format_t spss_format; readstat_error_t retval = spss_format_for_variable(r_variable, &spss_format); if (retval == READSTAT_OK && out_code) *out_code = sav_encode_format(&spss_format); return retval; } static readstat_error_t sav_encode_ghost_variable_format(readstat_variable_t *r_variable, size_t user_width, int32_t *out_code) { spss_format_t spss_format; readstat_error_t retval = spss_format_for_variable(r_variable, &spss_format); spss_format.width = user_width; if (retval == READSTAT_OK && out_code) *out_code = sav_encode_format(&spss_format); return retval; } static size_t sav_format_variable_name(char *output, size_t output_len, sav_varnames_t *varnames) { snprintf(output, output_len, "%s", varnames->shortname); return strlen(output); } static size_t sav_format_ghost_variable_name(char *output, size_t output_len, sav_varnames_t *varnames, unsigned int segment) { snprintf(output, output_len, "%s", varnames->stem); size_t len = strlen(output); int letter = segment % 36; if (letter < 10) { output[len++] = '0' + letter; } else { output[len++] = 'A' + (letter - 10); } return len; } static int sav_variable_segments(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING && user_width > MAX_STRING_SIZE) { return (user_width + 251) / 252; } return 1; } static readstat_error_t sav_emit_header(readstat_writer_t *writer) { sav_file_header_record_t header = { { 0 } }; readstat_error_t retval = READSTAT_OK; time_t now = writer->timestamp; struct tm *time_s = localtime(&now); /* There are portability issues with strftime so hack something up */ char months[][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; char creation_date[sizeof(header.creation_date)+1] = { 0 }; char creation_time[sizeof(header.creation_time)+1] = { 0 }; if (!time_s) { retval = READSTAT_ERROR_BAD_TIMESTAMP_VALUE; goto cleanup; } memcpy(header.rec_type, "$FL2", sizeof("$FL2")-1); if (writer->compression == READSTAT_COMPRESS_BINARY) { header.rec_type[3] = '3'; } memset(header.prod_name, ' ', sizeof(header.prod_name)); memcpy(header.prod_name, "@(#) SPSS DATA FILE - " READSTAT_PRODUCT_URL, sizeof("@(#) SPSS DATA FILE - " READSTAT_PRODUCT_URL)-1); header.layout_code = 2; header.nominal_case_size = writer->row_len / 8; if (writer->compression == READSTAT_COMPRESS_ROWS) { header.compression = 1; } else if (writer->compression == READSTAT_COMPRESS_BINARY) { header.compression = 2; } if (writer->fweight_variable) { int32_t dictionary_index = 1 + writer->fweight_variable->offset / 8; header.weight_index = dictionary_index; } else { header.weight_index = 0; } header.ncases = writer->row_count; header.bias = 100.0; snprintf(creation_date, sizeof(creation_date), "%02d %3.3s %02d", (unsigned int)time_s->tm_mday % 100, months[time_s->tm_mon], (unsigned int)time_s->tm_year % 100); memcpy(header.creation_date, creation_date, sizeof(header.creation_date)); snprintf(creation_time, sizeof(creation_time), "%02d:%02d:%02d", (unsigned int)time_s->tm_hour % 100, (unsigned int)time_s->tm_min % 100, (unsigned int)time_s->tm_sec % 100); memcpy(header.creation_time, creation_time, sizeof(header.creation_time)); memset(header.file_label, ' ', sizeof(header.file_label)); size_t file_label_len = strlen(writer->file_label); if (file_label_len > sizeof(header.file_label)) file_label_len = sizeof(header.file_label); if (writer->file_label[0]) memcpy(header.file_label, writer->file_label, file_label_len); retval = readstat_write_bytes(writer, &header, sizeof(header)); cleanup: return retval; } static readstat_error_t sav_emit_variable_label(readstat_writer_t *writer, readstat_variable_t *r_variable) { readstat_error_t retval = READSTAT_OK; const char *title_data = r_variable->label; size_t title_data_len = strlen(title_data); if (title_data_len > 0) { char padded_label[MAX_LABEL_SIZE]; uint32_t label_len = title_data_len; if (label_len > sizeof(padded_label)) label_len = sizeof(padded_label); retval = readstat_write_bytes(writer, &label_len, sizeof(label_len)); if (retval != READSTAT_OK) goto cleanup; strncpy(padded_label, title_data, (label_len + 3) / 4 * 4); retval = readstat_write_bytes(writer, padded_label, (label_len + 3) / 4 * 4); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static int sav_n_missing_double_values(readstat_variable_t *r_variable) { int n_missing_ranges = readstat_variable_get_missing_ranges_count(r_variable); int n_missing_values = n_missing_ranges; int has_missing_range = 0; int j; for (j=0; j 3) { return READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS; } if (out_n_missing_values) *out_n_missing_values = n_missing_values; return READSTAT_OK; } static readstat_error_t sav_emit_variable_missing_string_values(readstat_writer_t *writer, readstat_variable_t *r_variable) { readstat_error_t retval = READSTAT_OK; int n_missing_values = 0; int n_missing_ranges = readstat_variable_get_missing_ranges_count(r_variable); /* ranges */ int j; for (j=0; jtype == READSTAT_TYPE_STRING) { variable.type = r_variable->user_width > MAX_STRING_SIZE ? MAX_STRING_SIZE : r_variable->user_width; } variable.has_var_label = (r_variable->label[0] != '\0'); retval = sav_n_missing_values(&variable.n_missing_values, r_variable); if (retval != READSTAT_OK) goto cleanup; retval = sav_encode_base_variable_format(r_variable, &variable.print); if (retval != READSTAT_OK) goto cleanup; variable.write = variable.print; memset(variable.name, ' ', sizeof(variable.name)); if (name_data_len > 0 && name_data_len <= sizeof(variable.name)) memcpy(variable.name, name_data, name_data_len); retval = readstat_write_bytes(writer, &variable, sizeof(variable)); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_variable_label(writer, r_variable); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_variable_missing_values(writer, r_variable); if (retval != READSTAT_OK) goto cleanup; int extra_fields = r_variable->storage_width / 8 - 1; if (extra_fields > 31) extra_fields = 31; retval = sav_emit_blank_variable_records(writer, extra_fields); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sav_emit_ghost_variable_record(readstat_writer_t *writer, readstat_variable_t *r_variable, sav_varnames_t *varnames, int segment, size_t user_width) { readstat_error_t retval = READSTAT_OK; int32_t rec_type = SAV_RECORD_TYPE_VARIABLE; sav_variable_record_t variable = { 0 }; char name_data[9]; size_t name_len = sav_format_ghost_variable_name(name_data, sizeof(name_data), varnames, segment); retval = readstat_write_bytes(writer, &rec_type, sizeof(rec_type)); if (retval != READSTAT_OK) goto cleanup; variable.type = user_width; retval = sav_encode_ghost_variable_format(r_variable, user_width, &variable.print); if (retval != READSTAT_OK) goto cleanup; variable.write = variable.print; memset(variable.name, ' ', sizeof(variable.name)); if (name_len > 0 && name_len <= sizeof(variable.name)) memcpy(variable.name, name_data, name_len); retval = readstat_write_bytes(writer, &variable, sizeof(variable)); if (retval != READSTAT_OK) goto cleanup; int extra_fields = (user_width + 7) / 8 - 1; if (extra_fields > 31) extra_fields = 31; retval = sav_emit_blank_variable_records(writer, extra_fields); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sav_emit_full_variable_record(readstat_writer_t *writer, readstat_variable_t *r_variable, sav_varnames_t *varnames) { readstat_error_t retval = READSTAT_OK; retval = sav_emit_base_variable_record(writer, r_variable, varnames); if (retval != READSTAT_OK) goto cleanup; int n_segments = sav_variable_segments(r_variable->type, r_variable->user_width); int i; for (i=1; iuser_width - (n_segments - 1) * 252); } retval = sav_emit_ghost_variable_record(writer, r_variable, varnames, i, storage_size); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t sav_emit_variable_records(readstat_writer_t *writer, sav_varnames_t *varnames) { readstat_error_t retval = READSTAT_OK; int i; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); retval = sav_emit_full_variable_record(writer, r_variable, &varnames[i]); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t sav_emit_value_label_records(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int i, j; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); if (!readstat_label_set_needs_short_value_labels_record(r_label_set)) continue; readstat_type_t user_type = r_label_set->type; int32_t label_count = r_label_set->value_labels_count; int32_t rec_type = 0; if (label_count) { rec_type = SAV_RECORD_TYPE_VALUE_LABEL; retval = readstat_write_bytes(writer, &rec_type, sizeof(rec_type)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &label_count, sizeof(label_count)); if (retval != READSTAT_OK) goto cleanup; for (j=0; jstring_key_len; if (key_len > sizeof(value)) key_len = sizeof(value); memset(value, ' ', sizeof(value)); memcpy(value, r_value_label->string_key, key_len); } else if (user_type == READSTAT_TYPE_DOUBLE) { double num_val = r_value_label->double_key; memcpy(value, &num_val, sizeof(double)); } else if (user_type == READSTAT_TYPE_INT32) { double num_val = r_value_label->int32_key; memcpy(value, &num_val, sizeof(double)); } retval = readstat_write_bytes(writer, value, sizeof(value)); const char *label_data = r_value_label->label; uint8_t label_len = MAX_VALUE_LABEL_SIZE; if (label_len > r_value_label->label_len) label_len = r_value_label->label_len; retval = readstat_write_bytes(writer, &label_len, sizeof(label_len)); if (retval != READSTAT_OK) goto cleanup; char label[MAX_VALUE_LABEL_SIZE+8]; memset(label, ' ', sizeof(label)); memcpy(label, label_data, label_len); retval = readstat_write_bytes(writer, label, (label_len + sizeof(label_len) + 7) / 8 * 8 - sizeof(label_len)); if (retval != READSTAT_OK) goto cleanup; } rec_type = SAV_RECORD_TYPE_VALUE_LABEL_VARIABLES; int32_t var_count = readstat_label_set_number_short_variables(r_label_set); retval = readstat_write_bytes(writer, &rec_type, sizeof(rec_type)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &var_count, sizeof(var_count)); if (retval != READSTAT_OK) goto cleanup; for (j=0; jvariables_count; j++) { readstat_variable_t *r_variable = readstat_get_label_set_variable(r_label_set, j); if (r_variable->storage_width > 8) continue; int32_t dictionary_index = 1 + r_variable->offset / 8; retval = readstat_write_bytes(writer, &dictionary_index, sizeof(dictionary_index)); if (retval != READSTAT_OK) goto cleanup; } } } cleanup: return retval; } static readstat_error_t sav_emit_document_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int32_t rec_type = SAV_RECORD_TYPE_DOCUMENT; int32_t n_lines = writer->notes_count; if (n_lines == 0) goto cleanup; retval = readstat_write_bytes(writer, &rec_type, sizeof(rec_type)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &n_lines, sizeof(n_lines)); if (retval != READSTAT_OK) goto cleanup; int i; for (i=0; inotes_count; i++) { size_t len = strlen(writer->notes[i]); if (len > SPSS_DOC_LINE_SIZE) { retval = READSTAT_ERROR_NOTE_IS_TOO_LONG; goto cleanup; } retval = readstat_write_bytes(writer, writer->notes[i], len); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_spaces(writer, SPSS_DOC_LINE_SIZE - len); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t sav_emit_integer_info_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_INTEGER_INFO, .size = 4, .count = 8 }; sav_machine_integer_info_record_t machine_info = { .version_major = 20, .version_minor = 0, .version_revision = 0, .machine_code = -1, .floating_point_rep = SAV_FLOATING_POINT_REP_IEEE, .compression_code = 1, .endianness = machine_is_little_endian() ? SAV_ENDIANNESS_LITTLE : SAV_ENDIANNESS_BIG, .character_code = 65001 // UTF-8 }; retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &machine_info, sizeof(machine_info)); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sav_emit_floating_point_info_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_FP_INFO, .size = 8, .count = 3 }; retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; sav_machine_floating_point_info_record_t fp_info = {0}; fp_info.sysmis = SAV_MISSING_DOUBLE; fp_info.highest = SAV_HIGHEST_DOUBLE; fp_info.lowest = SAV_LOWEST_DOUBLE; retval = readstat_write_bytes(writer, &fp_info, sizeof(fp_info)); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sav_emit_variable_display_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int i; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_VAR_DISPLAY, .size = sizeof(int32_t) }; int total_segments = 0; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); total_segments += sav_variable_segments(r_variable->type, r_variable->user_width); } info_header.count = 3 * total_segments; retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); readstat_measure_t measure = readstat_variable_get_measure(r_variable); int32_t sav_measure = spss_measure_from_readstat_measure(measure); int32_t sav_display_width = readstat_variable_get_display_width(r_variable); if (sav_display_width <= 0) sav_display_width = 8; readstat_alignment_t alignment = readstat_variable_get_alignment(r_variable); int32_t sav_alignment = spss_alignment_from_readstat_alignment(alignment); int n_segments = sav_variable_segments(r_variable->type, r_variable->user_width); while (n_segments--) { retval = readstat_write_bytes(writer, &sav_measure, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &sav_display_width, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &sav_alignment, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; } } cleanup: return retval; } static readstat_error_t sav_emit_long_var_name_record(readstat_writer_t *writer, sav_varnames_t *varnames) { readstat_error_t retval = READSTAT_OK; int i; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_LONG_VAR_NAME, .size = 1, .count = 0 }; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); char name_data[9]; size_t name_data_len = sav_format_variable_name(name_data, sizeof(name_data), &varnames[i]); const char *title_data = r_variable->name; size_t title_data_len = strlen(title_data); if (title_data_len > 0 && name_data_len > 0) { if (title_data_len > 64) title_data_len = 64; info_header.count += name_data_len; info_header.count += sizeof("=")-1; info_header.count += title_data_len; info_header.count += sizeof("\x09")-1; } } if (info_header.count > 0) { info_header.count--; /* no trailing 0x09 */ retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; int is_first = 1; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); char name_data[9]; sav_format_variable_name(name_data, sizeof(name_data), &varnames[i]); const char *title_data = r_variable->name; size_t title_data_len = strlen(title_data); char kv_separator = '='; char tuple_separator = 0x09; if (title_data_len > 0) { if (title_data_len > 64) title_data_len = 64; if (!is_first) { retval = readstat_write_bytes(writer, &tuple_separator, sizeof(tuple_separator)); if (retval != READSTAT_OK) goto cleanup; } retval = readstat_write_string(writer, name_data); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &kv_separator, sizeof(kv_separator)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, title_data, title_data_len); if (retval != READSTAT_OK) goto cleanup; is_first = 0; } } } cleanup: return retval; } static readstat_error_t sav_emit_very_long_string_record(readstat_writer_t *writer, sav_varnames_t *varnames) { readstat_error_t retval = READSTAT_OK; int i; char tuple_separator[2] = { 0x00, 0x09 }; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_VERY_LONG_STR, .size = 1, .count = 0 }; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); if (r_variable->user_width <= MAX_STRING_SIZE) continue; char name_data[9]; sav_format_variable_name(name_data, sizeof(name_data), &varnames[i]); char kv_data[8+1+5+1]; snprintf(kv_data, sizeof(kv_data), "%.8s=%d", name_data, (unsigned int)r_variable->user_width % 100000); info_header.count += strlen(kv_data) + sizeof(tuple_separator); } if (info_header.count == 0) return READSTAT_OK; retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); if (r_variable->user_width <= MAX_STRING_SIZE) continue; char name_data[9]; sav_format_variable_name(name_data, sizeof(name_data), &varnames[i]); char kv_data[8+1+5+1]; snprintf(kv_data, sizeof(kv_data), "%.8s=%d", name_data, (unsigned int)r_variable->user_width % 100000); retval = readstat_write_string(writer, kv_data); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, tuple_separator, sizeof(tuple_separator)); if (retval != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t sav_emit_long_string_value_labels_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int i, j, k; char *space_buffer = NULL; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_LONG_STRING_VALUE_LABELS, .size = 1, .count = 0 }; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); if (!readstat_label_set_needs_long_value_labels_record(r_label_set)) continue; int32_t label_count = r_label_set->value_labels_count; int32_t var_count = r_label_set->variables_count; for (k=0; kname); int32_t storage_width = readstat_variable_get_storage_width(r_variable); if (storage_width <= 8) continue; info_header.count += sizeof(int32_t); // name length info_header.count += name_len; info_header.count += sizeof(int32_t); // variable width info_header.count += sizeof(int32_t); // label count for (j=0; jlabel_len; if (label_len > MAX_VALUE_LABEL_SIZE) label_len = MAX_VALUE_LABEL_SIZE; info_header.count += sizeof(int32_t); // value length info_header.count += storage_width; info_header.count += sizeof(int32_t); // label length info_header.count += label_len; } } } if (info_header.count == 0) goto cleanup; retval = readstat_write_bytes(writer, &info_header, sizeof(info_header)); if (retval != READSTAT_OK) goto cleanup; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); if (!readstat_label_set_needs_long_value_labels_record(r_label_set)) continue; int32_t label_count = r_label_set->value_labels_count; int32_t var_count = r_label_set->variables_count; for (k=0; kname); int32_t storage_width = readstat_variable_get_storage_width(r_variable); if (storage_width <= 8) continue; space_buffer = realloc(space_buffer, storage_width); memset(space_buffer, ' ', storage_width); retval = readstat_write_bytes(writer, &name_len, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, r_variable->name, name_len); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &storage_width, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &label_count, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; for (j=0; jstring_key_len; int32_t label_len = r_value_label->label_len; if (label_len > MAX_VALUE_LABEL_SIZE) label_len = MAX_VALUE_LABEL_SIZE; retval = readstat_write_bytes(writer, &storage_width, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, r_value_label->string_key, value_len); if (retval != READSTAT_OK) goto cleanup; if (value_len < storage_width) { retval = readstat_write_bytes(writer, space_buffer, storage_width - value_len); if (retval != READSTAT_OK) goto cleanup; } retval = readstat_write_bytes(writer, &label_len, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, r_value_label->label, label_len); if (retval != READSTAT_OK) goto cleanup; } } } cleanup: if (space_buffer) free(space_buffer); return retval; } static readstat_error_t sav_emit_long_string_missing_values_record(readstat_writer_t *writer) { readstat_error_t retval = READSTAT_OK; int j, k; sav_info_record_t info_header = { .rec_type = SAV_RECORD_TYPE_HAS_DATA, .subtype = SAV_RECORD_SUBTYPE_LONG_STRING_MISSING_VALUES, .size = 1, .count = 0 }; int32_t var_count = writer->variables_count; for (k=0; kname); int32_t storage_width = readstat_variable_get_storage_width(r_variable); if (storage_width <= 8) continue; int n_missing_values = 0; for (j=0; jname); int8_t n_missing_values = 0; int32_t storage_width = readstat_variable_get_storage_width(r_variable); if (storage_width <= 8) continue; for (j=0; jname, name_len); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &n_missing_values, sizeof(int8_t)); if (retval != READSTAT_OK) goto cleanup; uint32_t value_len = 8; retval = readstat_write_bytes(writer, &value_len, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; for (j=0; jrow_count; retval = readstat_write_bytes(writer, &info_header, sizeof(sav_info_record_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &one, sizeof(uint64_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &ncases, sizeof(uint64_t)); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t sav_emit_termination_record(readstat_writer_t *writer) { sav_dictionary_termination_record_t termination_record = { .rec_type = SAV_RECORD_TYPE_DICT_TERMINATION }; return readstat_write_bytes(writer, &termination_record, sizeof(termination_record)); } static readstat_error_t sav_write_int8(void *row, const readstat_variable_t *var, int8_t value) { double dval = value; memcpy(row, &dval, sizeof(double)); return READSTAT_OK; } static readstat_error_t sav_write_int16(void *row, const readstat_variable_t *var, int16_t value) { double dval = value; memcpy(row, &dval, sizeof(double)); return READSTAT_OK; } static readstat_error_t sav_write_int32(void *row, const readstat_variable_t *var, int32_t value) { double dval = value; memcpy(row, &dval, sizeof(double)); return READSTAT_OK; } static readstat_error_t sav_write_float(void *row, const readstat_variable_t *var, float value) { double dval = value; memcpy(row, &dval, sizeof(double)); return READSTAT_OK; } static readstat_error_t sav_write_double(void *row, const readstat_variable_t *var, double value) { double dval = value; memcpy(row, &dval, sizeof(double)); return READSTAT_OK; } static readstat_error_t sav_write_string(void *row, const readstat_variable_t *var, const char *value) { memset(row, ' ', var->storage_width); if (value != NULL && value[0] != '\0') { size_t value_len = strlen(value); off_t row_offset = 0; off_t val_offset = 0; unsigned char *row_bytes = (unsigned char *)row; if (value_len > var->storage_width) return READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG; while (value_len - val_offset > 255) { memcpy(&row_bytes[row_offset], &value[val_offset], 255); row_offset += 256; val_offset += 255; } memcpy(&row_bytes[row_offset], &value[val_offset], value_len - val_offset); } return READSTAT_OK; } static readstat_error_t sav_write_missing_string(void *row, const readstat_variable_t *var) { memset(row, ' ', var->storage_width); return READSTAT_OK; } static readstat_error_t sav_write_missing_number(void *row, const readstat_variable_t *var) { uint64_t missing_val = SAV_MISSING_DOUBLE; memcpy(row, &missing_val, sizeof(uint64_t)); return READSTAT_OK; } static size_t sav_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { if (user_width > MAX_STRING_SIZE) { size_t n_segments = sav_variable_segments(type, user_width); size_t last_segment_width = ((user_width - (n_segments - 1) * 252) + 7)/8*8; return (n_segments-1)*256 + last_segment_width; } if (user_width == 0) { return 8; } return (user_width + 7) / 8 * 8; } return 8; } static readstat_error_t sav_validate_name_chars(const char *name, int unicode) { /* TODO check Unicode class */ int j; for (j=0; name[j]; j++) { if (name[j] == ' ') return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; if ((name[j] > 0 || !unicode) && name[j] != '@' && name[j] != '.' && name[j] != '_' && name[j] != '$' && name[j] != '#' && !(name[j] >= 'a' && name[j] <= 'z') && !(name[j] >= 'A' && name[j] <= 'Z') && !(name[j] >= '0' && name[j] <= '9')) { return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; } } char first_char = name[0]; if ((first_char > 0 || !unicode) && first_char != '@' && !(first_char >= 'a' && first_char <= 'z') && !(first_char >= 'A' && first_char <= 'Z')) { return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER; } return READSTAT_OK; } static readstat_error_t sav_validate_name_unreserved(const char *name) { if (strcmp(name, "ALL") == 0 || strcmp(name, "AND") == 0 || strcmp(name, "BY") == 0 || strcmp(name, "EQ") == 0 || strcmp(name, "GE") == 0 || strcmp(name, "GT") == 0 || strcmp(name, "GT") == 0 || strcmp(name, "LE") == 0 || strcmp(name, "LT") == 0 || strcmp(name, "NE") == 0 || strcmp(name, "NOT") == 0 || strcmp(name, "OR") == 0 || strcmp(name, "TO") == 0 || strcmp(name, "WITH") == 0) return READSTAT_ERROR_NAME_IS_RESERVED_WORD; return READSTAT_OK; } static readstat_error_t sav_validate_name_length(size_t name_len) { if (name_len > 64) return READSTAT_ERROR_NAME_IS_TOO_LONG; if (name_len == 0) return READSTAT_ERROR_NAME_IS_ZERO_LENGTH; return READSTAT_OK; } static readstat_error_t sav_variable_ok(const readstat_variable_t *variable) { readstat_error_t error = READSTAT_OK; error = sav_validate_name_length(strlen(variable->name)); if (error != READSTAT_OK) return error; error = sav_validate_name_unreserved(variable->name); if (error != READSTAT_OK) return error; return sav_validate_name_chars(variable->name, 1); } static sav_varnames_t *sav_varnames_init(readstat_writer_t *writer) { sav_varnames_t *varnames = calloc(writer->variables_count, sizeof(sav_varnames_t)); ck_hash_table_t *table = ck_hash_table_init(writer->variables_count, 8); int i, k; for (i=0; ivariables_count; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); const char *name = r_variable->name; char *shortname = varnames[i].shortname; char *stem = varnames[i].stem; snprintf(shortname, sizeof(varnames[0].shortname), "%.8s", name); for (k=0; shortname[k]; k++) { // upcase shortname[k] = toupper(shortname[k]); } if (ck_str_hash_lookup(shortname, table)) { snprintf(shortname, sizeof(varnames[0].shortname), "V%d_A", ((unsigned int)i+1)%100000); } ck_str_hash_insert(shortname, r_variable, table); if (r_variable->user_width <= MAX_STRING_SIZE) continue; snprintf(stem, sizeof(varnames[0].stem), "%.5s", shortname); // conflict resolution? } ck_hash_table_free(table); return varnames; } static readstat_error_t sav_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t retval = READSTAT_OK; if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; sav_varnames_t *varnames = sav_varnames_init(writer); retval = sav_emit_header(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_variable_records(writer, varnames); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_value_label_records(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_document_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_integer_info_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_floating_point_info_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_variable_display_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_long_var_name_record(writer, varnames); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_very_long_string_record(writer, varnames); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_long_string_value_labels_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_long_string_missing_values_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_number_of_cases_record(writer); if (retval != READSTAT_OK) goto cleanup; retval = sav_emit_termination_record(writer); if (retval != READSTAT_OK) goto cleanup; cleanup: free(varnames); if (retval == READSTAT_OK) { size_t row_bound = sav_compressed_row_bound(writer->row_len); if (writer->compression == READSTAT_COMPRESS_ROWS) { writer->module_ctx = readstat_malloc(row_bound); #if HAVE_ZLIB } else if (writer->compression == READSTAT_COMPRESS_BINARY) { writer->module_ctx = zsav_ctx_init(row_bound, writer->bytes_written); #endif } } return retval; } static readstat_error_t sav_write_compressed_row(void *writer_ctx, void *row, size_t len) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; unsigned char *output = writer->module_ctx; size_t output_offset = sav_compress_row(output, row, len, writer); return readstat_write_bytes(writer, output, output_offset); } static readstat_error_t sav_metadata_ok(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; if (writer->version == 2 && writer->compression == READSTAT_COMPRESS_BINARY) return READSTAT_ERROR_UNSUPPORTED_COMPRESSION; if (writer->version != 2 && writer->version != 3) return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; return READSTAT_OK; } readstat_error_t readstat_begin_writing_sav(readstat_writer_t *writer, void *user_ctx, long row_count) { writer->callbacks.metadata_ok = &sav_metadata_ok; writer->callbacks.variable_width = &sav_variable_width; writer->callbacks.variable_ok = &sav_variable_ok; writer->callbacks.write_int8 = &sav_write_int8; writer->callbacks.write_int16 = &sav_write_int16; writer->callbacks.write_int32 = &sav_write_int32; writer->callbacks.write_float = &sav_write_float; writer->callbacks.write_double = &sav_write_double; writer->callbacks.write_string = &sav_write_string; writer->callbacks.write_missing_string = &sav_write_missing_string; writer->callbacks.write_missing_number = &sav_write_missing_number; writer->callbacks.begin_data = &sav_begin_data; if (writer->version == 3) { writer->compression = READSTAT_COMPRESS_BINARY; } else if (writer->version == 0) { writer->version = (writer->compression == READSTAT_COMPRESS_BINARY) ? 3 : 2; } if (writer->compression == READSTAT_COMPRESS_ROWS) { writer->callbacks.write_row = &sav_write_compressed_row; writer->callbacks.module_ctx_free = &free; #if HAVE_ZLIB } else if (writer->compression == READSTAT_COMPRESS_BINARY) { writer->callbacks.write_row = &zsav_write_compressed_row; writer->callbacks.end_data = &zsav_end_data; writer->callbacks.module_ctx_free = (readstat_module_ctx_free_callback)&zsav_ctx_free; #endif } else if (writer->compression == READSTAT_COMPRESS_NONE) { /* void */ } else { return READSTAT_ERROR_UNSUPPORTED_COMPRESSION; } return readstat_begin_writing_file(writer, user_ctx, row_count); } ReadStat-1.1.7/src/spss/readstat_spss.c000066400000000000000000000215131410722155500200350ustar00rootroot00000000000000 #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "readstat_spss.h" #include "readstat_spss_parse.h" static char spss_type_strings[][16] = { [SPSS_FORMAT_TYPE_A] = "A", [SPSS_FORMAT_TYPE_AHEX] = "AHEX", [SPSS_FORMAT_TYPE_COMMA] = "COMMA", [SPSS_FORMAT_TYPE_DOLLAR] = "DOLLAR", [SPSS_FORMAT_TYPE_F] = "F", [SPSS_FORMAT_TYPE_IB] = "IB", [SPSS_FORMAT_TYPE_PIBHEX] = "PIBHEX", [SPSS_FORMAT_TYPE_P] = "P", [SPSS_FORMAT_TYPE_PIB] = "PIB", [SPSS_FORMAT_TYPE_PK] = "PK", [SPSS_FORMAT_TYPE_RB] = "RB", [SPSS_FORMAT_TYPE_RBHEX] = "RBHEX", [SPSS_FORMAT_TYPE_Z] = "Z", [SPSS_FORMAT_TYPE_N] = "N", [SPSS_FORMAT_TYPE_E] = "E", [SPSS_FORMAT_TYPE_DATE] = "DATE", [SPSS_FORMAT_TYPE_TIME] = "TIME", [SPSS_FORMAT_TYPE_DATETIME] = "DATETIME", [SPSS_FORMAT_TYPE_ADATE] = "ADATE", [SPSS_FORMAT_TYPE_JDATE] = "JDATE", [SPSS_FORMAT_TYPE_DTIME] = "DTIME", [SPSS_FORMAT_TYPE_WKDAY] = "WKDAY", [SPSS_FORMAT_TYPE_MONTH] = "MONTH", [SPSS_FORMAT_TYPE_MOYR] = "MOYR", [SPSS_FORMAT_TYPE_QYR] = "QYR", [SPSS_FORMAT_TYPE_WKYR] = "WKYR", [SPSS_FORMAT_TYPE_PCT] = "PCT", [SPSS_FORMAT_TYPE_DOT] = "DOT", [SPSS_FORMAT_TYPE_CCA] = "CCA", [SPSS_FORMAT_TYPE_CCB] = "CCB", [SPSS_FORMAT_TYPE_CCC] = "CCC", [SPSS_FORMAT_TYPE_CCD] = "CCD", [SPSS_FORMAT_TYPE_CCE] = "CCE", [SPSS_FORMAT_TYPE_EDATE] = "EDATE", [SPSS_FORMAT_TYPE_SDATE] = "SDATE", [SPSS_FORMAT_TYPE_MTIME] = "MTIME", [SPSS_FORMAT_TYPE_YMDHMS] = "YMDHMS", }; int spss_format(char *buffer, size_t len, spss_format_t *format) { if (format->type < 0 || format->type >= sizeof(spss_type_strings)/sizeof(spss_type_strings[0]) || spss_type_strings[format->type][0] == '\0') { return 0; } char *string = spss_type_strings[format->type]; if (format->decimal_places || format->type == SPSS_FORMAT_TYPE_F) { snprintf(buffer, len, "%s%d.%d", string, format->width, format->decimal_places); } else if (format->width) { snprintf(buffer, len, "%s%d", string, format->width); } else { snprintf(buffer, len, "%s", string); } return 1; } int spss_varinfo_compare(const void *elem1, const void *elem2) { int offset = *(int *)elem1; const spss_varinfo_t *v = *(const spss_varinfo_t **)elem2; if (offset < v->offset) return -1; return (offset > v->offset); } void spss_varinfo_free(spss_varinfo_t *info) { if (info) { if (info->label) free(info->label); free(info); } } uint64_t spss_64bit_value(readstat_value_t value) { double dval = readstat_double_value(value); uint64_t special_val; memcpy(&special_val, &dval, sizeof(double)); if (isinf(dval)) { if (dval < 0.0) { special_val = SAV_LOWEST_DOUBLE; } else { special_val = SAV_HIGHEST_DOUBLE; } } else if (isnan(dval)) { special_val = SAV_MISSING_DOUBLE; } return special_val; } static readstat_value_t spss_boxed_double_value(double fp_value) { readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = fp_value }, .is_system_missing = isnan(fp_value) }; return value; } static readstat_value_t spss_boxed_string_value(const char *string) { readstat_value_t value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = string } }; return value; } static readstat_value_t spss_boxed_missing_value(spss_varinfo_t *info, int i) { if (info->type == READSTAT_TYPE_DOUBLE) { return spss_boxed_double_value(info->missing_double_values[i]); } return spss_boxed_string_value(info->missing_string_values[i]); } readstat_missingness_t spss_missingness_for_info(spss_varinfo_t *info) { readstat_missingness_t missingness; memset(&missingness, '\0', sizeof(readstat_missingness_t)); if (info->missing_range) { missingness.missing_ranges_count++; missingness.missing_ranges[0] = spss_boxed_missing_value(info, 0); missingness.missing_ranges[1] = spss_boxed_missing_value(info, 1); if (info->n_missing_values == 3) { missingness.missing_ranges_count++; missingness.missing_ranges[2] = missingness.missing_ranges[3] = spss_boxed_missing_value(info, 2); } } else if (info->n_missing_values > 0) { missingness.missing_ranges_count = info->n_missing_values; int i=0; for (i=0; in_missing_values; i++) { missingness.missing_ranges[2*i] = missingness.missing_ranges[2*i+1] = spss_boxed_missing_value(info, i); } } return missingness; } readstat_variable_t *spss_init_variable_for_info(spss_varinfo_t *info, int index_after_skipping, iconv_t converter) { readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->index = info->index; variable->index_after_skipping = index_after_skipping; variable->type = info->type; if (info->string_length) { variable->storage_width = info->string_length; } else { variable->storage_width = 8 * info->width; } if (info->longname[0]) { readstat_convert(variable->name, sizeof(variable->name), info->longname, sizeof(info->longname), converter); } else { readstat_convert(variable->name, sizeof(variable->name), info->name, sizeof(info->name), converter); } if (info->label) { snprintf(variable->label, sizeof(variable->label), "%s", info->label); } spss_format(variable->format, sizeof(variable->format), &info->print_format); variable->missingness = spss_missingness_for_info(info); variable->measure = info->measure; if (info->display_width) { variable->display_width = info->display_width; } else { variable->display_width = info->print_format.width; } return variable; } uint32_t spss_measure_from_readstat_measure(readstat_measure_t measure) { uint32_t sav_measure = SAV_MEASURE_UNKNOWN; if (measure == READSTAT_MEASURE_NOMINAL) { sav_measure = SAV_MEASURE_NOMINAL; } else if (measure == READSTAT_MEASURE_ORDINAL) { sav_measure = SAV_MEASURE_ORDINAL; } else if (measure == READSTAT_MEASURE_SCALE) { sav_measure = SAV_MEASURE_SCALE; } return sav_measure; } readstat_measure_t spss_measure_to_readstat_measure(uint32_t sav_measure) { if (sav_measure == SAV_MEASURE_NOMINAL) return READSTAT_MEASURE_NOMINAL; if (sav_measure == SAV_MEASURE_ORDINAL) return READSTAT_MEASURE_ORDINAL; if (sav_measure == SAV_MEASURE_SCALE) return READSTAT_MEASURE_SCALE; return READSTAT_MEASURE_UNKNOWN; } uint32_t spss_alignment_from_readstat_alignment(readstat_alignment_t alignment) { uint32_t sav_alignment = 0; if (alignment == READSTAT_ALIGNMENT_LEFT) { sav_alignment = SAV_ALIGNMENT_LEFT; } else if (alignment == READSTAT_ALIGNMENT_CENTER) { sav_alignment = SAV_ALIGNMENT_CENTER; } else if (alignment == READSTAT_ALIGNMENT_RIGHT) { sav_alignment = SAV_ALIGNMENT_RIGHT; } return sav_alignment; } readstat_alignment_t spss_alignment_to_readstat_alignment(uint32_t sav_alignment) { if (sav_alignment == SAV_ALIGNMENT_LEFT) return READSTAT_ALIGNMENT_LEFT; if (sav_alignment == SAV_ALIGNMENT_CENTER) return READSTAT_ALIGNMENT_CENTER; if (sav_alignment == SAV_ALIGNMENT_RIGHT) return READSTAT_ALIGNMENT_RIGHT; return READSTAT_ALIGNMENT_UNKNOWN; } readstat_error_t spss_format_for_variable(readstat_variable_t *r_variable, spss_format_t *spss_format) { readstat_error_t retval = READSTAT_OK; memset(spss_format, 0, sizeof(spss_format_t)); if (r_variable->type == READSTAT_TYPE_STRING) { spss_format->type = SPSS_FORMAT_TYPE_A; if (r_variable->display_width) { spss_format->width = r_variable->display_width; } else if (r_variable->user_width) { spss_format->width = r_variable->user_width; } else { spss_format->width = r_variable->storage_width; } } else { spss_format->type = SPSS_FORMAT_TYPE_F; if (r_variable->display_width) { spss_format->width = r_variable->display_width; } else { spss_format->width = 8; } if (r_variable->type == READSTAT_TYPE_DOUBLE || r_variable->type == READSTAT_TYPE_FLOAT) { spss_format->decimal_places = 2; } } if (r_variable->format[0]) { spss_format->decimal_places = 0; const char *fmt = r_variable->format; if (spss_parse_format(fmt, strlen(fmt), spss_format) != READSTAT_OK) { retval = READSTAT_ERROR_BAD_FORMAT_STRING; goto cleanup; } } cleanup: return retval; } ReadStat-1.1.7/src/spss/readstat_spss.h000066400000000000000000000076171410722155500200530ustar00rootroot00000000000000 #define SPSS_FORMAT_TYPE_A 1 #define SPSS_FORMAT_TYPE_AHEX 2 #define SPSS_FORMAT_TYPE_COMMA 3 #define SPSS_FORMAT_TYPE_DOLLAR 4 #define SPSS_FORMAT_TYPE_F 5 #define SPSS_FORMAT_TYPE_IB 6 #define SPSS_FORMAT_TYPE_PIBHEX 7 #define SPSS_FORMAT_TYPE_P 8 #define SPSS_FORMAT_TYPE_PIB 9 #define SPSS_FORMAT_TYPE_PK 10 #define SPSS_FORMAT_TYPE_RB 11 #define SPSS_FORMAT_TYPE_RBHEX 12 #define SPSS_FORMAT_TYPE_Z 15 #define SPSS_FORMAT_TYPE_N 16 #define SPSS_FORMAT_TYPE_E 17 #define SPSS_FORMAT_TYPE_DATE 20 #define SPSS_FORMAT_TYPE_TIME 21 #define SPSS_FORMAT_TYPE_DATETIME 22 #define SPSS_FORMAT_TYPE_ADATE 23 #define SPSS_FORMAT_TYPE_JDATE 24 #define SPSS_FORMAT_TYPE_DTIME 25 #define SPSS_FORMAT_TYPE_WKDAY 26 #define SPSS_FORMAT_TYPE_MONTH 27 #define SPSS_FORMAT_TYPE_MOYR 28 #define SPSS_FORMAT_TYPE_QYR 29 #define SPSS_FORMAT_TYPE_WKYR 30 #define SPSS_FORMAT_TYPE_PCT 31 #define SPSS_FORMAT_TYPE_DOT 32 #define SPSS_FORMAT_TYPE_CCA 33 #define SPSS_FORMAT_TYPE_CCB 34 #define SPSS_FORMAT_TYPE_CCC 35 #define SPSS_FORMAT_TYPE_CCD 36 #define SPSS_FORMAT_TYPE_CCE 37 #define SPSS_FORMAT_TYPE_EDATE 38 #define SPSS_FORMAT_TYPE_SDATE 39 #define SPSS_FORMAT_TYPE_MTIME 40 #define SPSS_FORMAT_TYPE_YMDHMS 41 #define SPSS_DOC_LINE_SIZE 80 #define SAV_HIGHEST_DOUBLE 0x7FEFFFFFFFFFFFFFUL #define SAV_MISSING_DOUBLE 0xFFEFFFFFFFFFFFFFUL #define SAV_LOWEST_DOUBLE 0xFFEFFFFFFFFFFFFEUL #define SAV_MEASURE_UNKNOWN 0 #define SAV_MEASURE_NOMINAL 1 #define SAV_MEASURE_ORDINAL 2 #define SAV_MEASURE_SCALE 3 #define SAV_ALIGNMENT_LEFT 0 #define SAV_ALIGNMENT_RIGHT 1 #define SAV_ALIGNMENT_CENTER 2 #include typedef struct spss_format_s { int type; int width; int decimal_places; } spss_format_t; // The reason some fields are stored unconverted is that some versions of SPSS // store truncated UTF-8 in the fields, and also use the truncated strings for // internal logic (such as matching names). If we convert them too early, the // last character of a truncated string will be dropped, and some of the column // information won't be found (e.g. in the key=value long variable record). typedef struct spss_varinfo_s { readstat_type_t type; int labels_index; int index; int offset; int width; unsigned int string_length; spss_format_t print_format; spss_format_t write_format; int n_segments; int n_missing_values; int missing_range; double missing_double_values[3]; char missing_string_values[3][8*4+1]; // stored UTF-8 char name[8+1]; // stored UNCONVERTED char longname[64+1]; // stored UNCONVERTED char *label; // stored UTF-8 readstat_measure_t measure; readstat_alignment_t alignment; int display_width; } spss_varinfo_t; int spss_format(char *buffer, size_t len, spss_format_t *format); int spss_varinfo_compare(const void *elem1, const void *elem2); void spss_varinfo_free(spss_varinfo_t *info); readstat_missingness_t spss_missingness_for_info(spss_varinfo_t *info); readstat_variable_t *spss_init_variable_for_info(spss_varinfo_t *info, int index_after_skipping, iconv_t converter); uint64_t spss_64bit_value(readstat_value_t value); uint32_t spss_measure_from_readstat_measure(readstat_measure_t measure); readstat_measure_t spss_measure_to_readstat_measure(uint32_t sav_measure); uint32_t spss_alignment_from_readstat_alignment(readstat_alignment_t alignment); readstat_alignment_t spss_alignment_to_readstat_alignment(uint32_t sav_alignment); readstat_error_t spss_format_for_variable(readstat_variable_t *r_variable, spss_format_t *spss_format); ReadStat-1.1.7/src/spss/readstat_spss_parse.c000066400000000000000000000460621410722155500212350ustar00rootroot00000000000000#line 1 "src/spss/readstat_spss_parse.rl" #include #include "../readstat.h" #include "readstat_spss.h" #include "readstat_spss_parse.h" #line 11 "src/spss/readstat_spss_parse.c" static const signed char _spss_format_parser_actions[] = { 0, 1, 1, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1, 20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1, 26, 1, 27, 1, 28, 1, 29, 1, 30, 1, 31, 1, 32, 1, 33, 1, 34, 1, 35, 1, 36, 1, 37, 1, 38, 1, 39, 1, 40, 2, 0, 1, 3, 4, 0, 1, 3, 5, 0, 1, 3, 6, 0, 1, 3, 7, 0, 1, 3, 8, 0, 1, 3, 9, 0, 1, 3, 10, 0, 1, 3, 11, 0, 1, 3, 12, 0, 1, 3, 13, 0, 1, 3, 14, 0, 1, 3, 15, 0, 1, 3, 16, 0, 1, 3, 17, 0, 1, 3, 18, 0, 1, 3, 19, 0, 1, 3, 20, 0, 1, 3, 21, 0, 1, 3, 22, 0, 1, 3, 23, 0, 1, 3, 24, 0, 1, 3, 25, 0, 1, 3, 26, 0, 1, 3, 27, 0, 1, 3, 28, 0, 1, 3, 29, 0, 1, 3, 30, 0, 1, 3, 31, 0, 1, 3, 32, 0, 1, 3, 33, 0, 1, 3, 34, 0, 1, 3, 35, 0, 1, 3, 36, 0, 1, 3, 37, 0, 1, 3, 38, 0, 1, 3, 39, 0, 1, 3, 40, 0, 1, 0 }; static const short _spss_format_parser_key_offsets[] = { 0, 0, 34, 36, 38, 40, 42, 44, 46, 50, 60, 62, 64, 66, 72, 74, 76, 78, 80, 82, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 118, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 172, 174, 176, 178, 180, 182, 184, 186, 188, 194, 197, 199, 201, 203, 205, 207, 209, 211, 213, 215, 219, 221, 223, 225, 227, 231, 233, 235, 237, 239, 241, 243, 245, 247, 255, 257, 261, 263, 265, 267, 271, 273, 275, 277, 279, 281, 283, 0 }; static const char _spss_format_parser_trans_keys[] = { 65, 67, 68, 69, 70, 73, 74, 77, 78, 80, 81, 82, 83, 84, 87, 89, 90, 97, 99, 100, 101, 102, 105, 106, 109, 110, 112, 113, 114, 115, 116, 119, 121, 122, 48, 57, 65, 97, 84, 116, 69, 101, 69, 101, 88, 120, 67, 79, 99, 111, 65, 66, 67, 68, 69, 97, 98, 99, 100, 101, 77, 109, 77, 109, 65, 97, 65, 79, 84, 97, 111, 116, 84, 116, 69, 101, 73, 105, 77, 109, 69, 101, 76, 84, 108, 116, 76, 108, 65, 97, 82, 114, 73, 105, 77, 109, 69, 101, 65, 97, 84, 116, 69, 101, 66, 98, 68, 100, 65, 97, 84, 116, 69, 101, 79, 84, 111, 116, 78, 89, 110, 121, 84, 116, 72, 104, 82, 114, 73, 105, 77, 109, 69, 101, 84, 116, 66, 98, 69, 101, 88, 120, 89, 121, 82, 114, 66, 98, 69, 101, 88, 120, 68, 100, 65, 97, 84, 116, 69, 101, 73, 105, 77, 109, 69, 101, 75, 107, 68, 89, 100, 121, 65, 97, 89, 121, 82, 114, 77, 109, 68, 100, 72, 104, 77, 109, 83, 115, 68, 72, 100, 104, 48, 57, 46, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 84, 116, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 68, 100, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 67, 73, 75, 99, 105, 107, 48, 57, 48, 57, 72, 104, 48, 57, 48, 57, 48, 57, 48, 57, 72, 104, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 48, 57, 0 }; static const signed char _spss_format_parser_single_lengths[] = { 0, 34, 0, 2, 2, 2, 2, 2, 4, 10, 2, 2, 2, 6, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0 }; static const signed char _spss_format_parser_range_lengths[] = { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0 }; static const short _spss_format_parser_index_offsets[] = { 0, 0, 35, 37, 40, 43, 46, 49, 52, 57, 68, 71, 74, 77, 84, 87, 90, 93, 96, 99, 104, 107, 110, 113, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 151, 156, 159, 162, 165, 168, 171, 174, 177, 180, 183, 186, 189, 192, 195, 198, 201, 204, 207, 210, 213, 216, 219, 222, 225, 230, 233, 236, 239, 242, 245, 248, 251, 254, 260, 263, 265, 267, 269, 271, 273, 275, 277, 279, 281, 285, 287, 289, 291, 293, 297, 299, 301, 303, 305, 307, 309, 311, 313, 321, 323, 327, 329, 331, 333, 337, 339, 341, 343, 345, 347, 349, 0 }; static const signed char _spss_format_parser_cond_targs[] = { 68, 8, 13, 84, 86, 29, 30, 34, 92, 93, 46, 48, 51, 55, 58, 63, 106, 68, 8, 13, 84, 86, 29, 30, 34, 92, 93, 46, 48, 51, 55, 58, 63, 106, 0, 70, 0, 4, 4, 0, 5, 5, 0, 71, 71, 0, 7, 7, 0, 72, 72, 0, 9, 10, 9, 10, 0, 73, 74, 75, 76, 77, 73, 74, 75, 76, 77, 0, 11, 11, 0, 12, 12, 0, 78, 78, 0, 14, 19, 23, 14, 19, 23, 0, 15, 15, 0, 79, 79, 0, 17, 17, 0, 18, 18, 0, 80, 80, 0, 20, 82, 20, 82, 0, 21, 21, 0, 22, 22, 0, 81, 81, 0, 24, 24, 0, 25, 25, 0, 83, 83, 0, 27, 27, 0, 28, 28, 0, 85, 85, 0, 87, 87, 0, 31, 31, 0, 32, 32, 0, 33, 33, 0, 88, 88, 0, 35, 39, 35, 39, 0, 36, 38, 36, 38, 0, 37, 37, 0, 89, 89, 0, 90, 90, 0, 40, 40, 0, 41, 41, 0, 91, 91, 0, 94, 94, 0, 95, 95, 0, 45, 45, 0, 96, 96, 0, 47, 47, 0, 98, 98, 0, 99, 99, 0, 50, 50, 0, 100, 100, 0, 52, 52, 0, 53, 53, 0, 54, 54, 0, 101, 101, 0, 56, 56, 0, 57, 57, 0, 102, 102, 0, 59, 59, 0, 60, 62, 60, 62, 0, 61, 61, 0, 103, 103, 0, 104, 104, 0, 64, 64, 0, 65, 65, 0, 66, 66, 0, 67, 67, 0, 105, 105, 0, 3, 6, 3, 6, 69, 0, 2, 69, 0, 70, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 16, 16, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 26, 26, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 42, 43, 97, 42, 43, 97, 69, 0, 69, 0, 44, 44, 69, 0, 69, 0, 69, 0, 69, 0, 49, 49, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 69, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 0 }; static const short _spss_format_parser_cond_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84, 0, 3, 1, 0, 1, 0, 160, 0, 88, 0, 204, 0, 208, 0, 212, 0, 216, 0, 220, 0, 92, 0, 0, 0, 144, 0, 152, 0, 96, 0, 200, 0, 168, 0, 0, 0, 140, 0, 224, 0, 100, 0, 104, 0, 164, 0, 180, 0, 184, 0, 172, 0, 136, 0, 0, 0, 0, 0, 0, 0, 112, 0, 196, 0, 0, 0, 116, 0, 108, 0, 120, 0, 188, 0, 0, 0, 124, 0, 128, 0, 228, 0, 148, 0, 176, 0, 192, 0, 156, 0, 132, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 3, 5, 45, 9, 67, 69, 71, 73, 75, 11, 37, 41, 13, 65, 49, 35, 77, 15, 17, 47, 55, 57, 51, 33, 21, 63, 23, 19, 25, 59, 27, 29, 79, 39, 53, 61, 43, 31, 0 }; static const short _spss_format_parser_eof_trans[] = { 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 0 }; static const int spss_format_parser_start = 1; static const int spss_format_parser_en_main = 1; #line 11 "src/spss/readstat_spss_parse.rl" // For minimum width information see // https://www.ibm.com/support/knowledgecenter/SSLVMB_sub/statistics_reference_project_ddita/spss/base/syn_date_and_time_date_time_formats.html readstat_error_t spss_parse_format(const char *data, int count, spss_format_t *fmt) { unsigned char *p = (unsigned char *)data; unsigned char *pe = (unsigned char *)data + count; unsigned char *eof = pe; int cs; unsigned int integer = 0; #line 310 "src/spss/readstat_spss_parse.c" { cs = (int)spss_format_parser_start; } #line 315 "src/spss/readstat_spss_parse.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _spss_format_parser_eof_trans[cs] > 0 ) { _trans = (unsigned int)_spss_format_parser_eof_trans[cs] - 1; } } else { _keys = ( _spss_format_parser_trans_keys + (_spss_format_parser_key_offsets[cs])); _trans = (unsigned int)_spss_format_parser_index_offsets[cs]; _klen = (int)_spss_format_parser_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_spss_format_parser_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_spss_format_parser_cond_targs[_trans]; if ( _spss_format_parser_cond_actions[_trans] != 0 ) { _acts = ( _spss_format_parser_actions + (_spss_format_parser_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 24 "src/spss/readstat_spss_parse.rl" integer = 0; } #line 400 "src/spss/readstat_spss_parse.c" break; } case 1: { { #line 28 "src/spss/readstat_spss_parse.rl" integer = 10 * integer + ((( (*( p)))) - '0'); } #line 411 "src/spss/readstat_spss_parse.c" break; } case 2: { { #line 32 "src/spss/readstat_spss_parse.rl" fmt->width = integer; } #line 422 "src/spss/readstat_spss_parse.c" break; } case 3: { { #line 36 "src/spss/readstat_spss_parse.rl" fmt->decimal_places = integer; } #line 433 "src/spss/readstat_spss_parse.c" break; } case 4: { { #line 40 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_A; } #line 442 "src/spss/readstat_spss_parse.c" break; } case 5: { { #line 41 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_AHEX; } #line 451 "src/spss/readstat_spss_parse.c" break; } case 6: { { #line 42 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_COMMA; } #line 460 "src/spss/readstat_spss_parse.c" break; } case 7: { { #line 43 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_DOLLAR; } #line 469 "src/spss/readstat_spss_parse.c" break; } case 8: { { #line 44 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_F; } #line 478 "src/spss/readstat_spss_parse.c" break; } case 9: { { #line 45 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_IB; } #line 487 "src/spss/readstat_spss_parse.c" break; } case 10: { { #line 46 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_PIBHEX; } #line 496 "src/spss/readstat_spss_parse.c" break; } case 11: { { #line 47 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_P; } #line 505 "src/spss/readstat_spss_parse.c" break; } case 12: { { #line 48 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_PIB; } #line 514 "src/spss/readstat_spss_parse.c" break; } case 13: { { #line 49 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_PK; } #line 523 "src/spss/readstat_spss_parse.c" break; } case 14: { { #line 50 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_RB; } #line 532 "src/spss/readstat_spss_parse.c" break; } case 15: { { #line 51 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_RBHEX; } #line 541 "src/spss/readstat_spss_parse.c" break; } case 16: { { #line 52 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_Z; } #line 550 "src/spss/readstat_spss_parse.c" break; } case 17: { { #line 53 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_N; } #line 559 "src/spss/readstat_spss_parse.c" break; } case 18: { { #line 54 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_E; } #line 568 "src/spss/readstat_spss_parse.c" break; } case 19: { { #line 55 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_DATE; fmt->width = 11; } #line 577 "src/spss/readstat_spss_parse.c" break; } case 20: { { #line 56 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_TIME; } #line 586 "src/spss/readstat_spss_parse.c" break; } case 21: { { #line 57 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_DATETIME; fmt->width = 20; } #line 595 "src/spss/readstat_spss_parse.c" break; } case 22: { { #line 58 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_YMDHMS; fmt->width = 19; } #line 604 "src/spss/readstat_spss_parse.c" break; } case 23: { { #line 59 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_ADATE; fmt->width = 10; } #line 613 "src/spss/readstat_spss_parse.c" break; } case 24: { { #line 60 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_JDATE; } #line 622 "src/spss/readstat_spss_parse.c" break; } case 25: { { #line 61 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_DTIME; fmt->width = 23; } #line 631 "src/spss/readstat_spss_parse.c" break; } case 26: { { #line 62 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_MTIME; } #line 640 "src/spss/readstat_spss_parse.c" break; } case 27: { { #line 63 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_WKDAY; } #line 649 "src/spss/readstat_spss_parse.c" break; } case 28: { { #line 64 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_MONTH; } #line 658 "src/spss/readstat_spss_parse.c" break; } case 29: { { #line 65 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_MOYR; } #line 667 "src/spss/readstat_spss_parse.c" break; } case 30: { { #line 66 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_QYR; } #line 676 "src/spss/readstat_spss_parse.c" break; } case 31: { { #line 67 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_WKYR; fmt->width = 10; } #line 685 "src/spss/readstat_spss_parse.c" break; } case 32: { { #line 68 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_PCT; } #line 694 "src/spss/readstat_spss_parse.c" break; } case 33: { { #line 69 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_DOT; } #line 703 "src/spss/readstat_spss_parse.c" break; } case 34: { { #line 70 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_CCA; } #line 712 "src/spss/readstat_spss_parse.c" break; } case 35: { { #line 71 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_CCB; } #line 721 "src/spss/readstat_spss_parse.c" break; } case 36: { { #line 72 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_CCC; } #line 730 "src/spss/readstat_spss_parse.c" break; } case 37: { { #line 73 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_CCD; } #line 739 "src/spss/readstat_spss_parse.c" break; } case 38: { { #line 74 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_CCE; } #line 748 "src/spss/readstat_spss_parse.c" break; } case 39: { { #line 75 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_EDATE; fmt->width = 10; } #line 757 "src/spss/readstat_spss_parse.c" break; } case 40: { { #line 76 "src/spss/readstat_spss_parse.rl" fmt->type = SPSS_FORMAT_TYPE_SDATE; fmt->width = 10; } #line 766 "src/spss/readstat_spss_parse.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 68 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 89 "src/spss/readstat_spss_parse.rl" /* suppress warning */ (void)spss_format_parser_en_main; if (cs < #line 797 "src/spss/readstat_spss_parse.c" 68 #line 94 "src/spss/readstat_spss_parse.rl" || p != eof) { return READSTAT_ERROR_PARSE; } return READSTAT_OK; } ReadStat-1.1.7/src/spss/readstat_spss_parse.h000066400000000000000000000001261410722155500212310ustar00rootroot00000000000000 readstat_error_t spss_parse_format(const char *data, int count, spss_format_t *fmt); ReadStat-1.1.7/src/spss/readstat_spss_parse.rl000066400000000000000000000075521410722155500214310ustar00rootroot00000000000000 #include #include "../readstat.h" #include "readstat_spss.h" #include "readstat_spss_parse.h" %%{ machine spss_format_parser; write data nofinal noerror; }%% // For minimum width information see // https://www.ibm.com/support/knowledgecenter/SSLVMB_sub/statistics_reference_project_ddita/spss/base/syn_date_and_time_date_time_formats.html readstat_error_t spss_parse_format(const char *data, int count, spss_format_t *fmt) { unsigned char *p = (unsigned char *)data; unsigned char *pe = (unsigned char *)data + count; unsigned char *eof = pe; int cs; unsigned int integer = 0; %%{ action start_integer { integer = 0; } action incr_integer { integer = 10 * integer + (fc - '0'); } action save_width { fmt->width = integer; } action save_precision { fmt->decimal_places = integer; } type = ("A"i %{ fmt->type = SPSS_FORMAT_TYPE_A; } | "AHEX"i %{ fmt->type = SPSS_FORMAT_TYPE_AHEX; } | "COMMA"i %{ fmt->type = SPSS_FORMAT_TYPE_COMMA; } | "DOLLAR"i %{ fmt->type = SPSS_FORMAT_TYPE_DOLLAR; } | "F"i %{ fmt->type = SPSS_FORMAT_TYPE_F; } | "IB"i %{ fmt->type = SPSS_FORMAT_TYPE_IB; } | "PIBHEX"i %{ fmt->type = SPSS_FORMAT_TYPE_PIBHEX; } | "P"i %{ fmt->type = SPSS_FORMAT_TYPE_P; } | "PIB"i %{ fmt->type = SPSS_FORMAT_TYPE_PIB; } | "PK"i %{ fmt->type = SPSS_FORMAT_TYPE_PK; } | "RB"i %{ fmt->type = SPSS_FORMAT_TYPE_RB; } | "RBHEX"i %{ fmt->type = SPSS_FORMAT_TYPE_RBHEX; } | "Z"i %{ fmt->type = SPSS_FORMAT_TYPE_Z; } | "N"i %{ fmt->type = SPSS_FORMAT_TYPE_N; } | "E"i %{ fmt->type = SPSS_FORMAT_TYPE_E; } | "DATE"i %{ fmt->type = SPSS_FORMAT_TYPE_DATE; fmt->width = 11; } | "TIME"i %{ fmt->type = SPSS_FORMAT_TYPE_TIME; } | "DATETIME"i %{ fmt->type = SPSS_FORMAT_TYPE_DATETIME; fmt->width = 20; } | "YMDHMS"i %{ fmt->type = SPSS_FORMAT_TYPE_YMDHMS; fmt->width = 19; } | "ADATE"i %{ fmt->type = SPSS_FORMAT_TYPE_ADATE; fmt->width = 10; } | "JDATE"i %{ fmt->type = SPSS_FORMAT_TYPE_JDATE; } | "DTIME"i %{ fmt->type = SPSS_FORMAT_TYPE_DTIME; fmt->width = 23; } | "MTIME"i %{ fmt->type = SPSS_FORMAT_TYPE_MTIME; } | "WKDAY"i %{ fmt->type = SPSS_FORMAT_TYPE_WKDAY; } | "MONTH"i %{ fmt->type = SPSS_FORMAT_TYPE_MONTH; } | "MOYR"i %{ fmt->type = SPSS_FORMAT_TYPE_MOYR; } | "QYR"i %{ fmt->type = SPSS_FORMAT_TYPE_QYR; } | "WKYR"i %{ fmt->type = SPSS_FORMAT_TYPE_WKYR; fmt->width = 10; } | "PCT"i %{ fmt->type = SPSS_FORMAT_TYPE_PCT; } | "DOT"i %{ fmt->type = SPSS_FORMAT_TYPE_DOT; } | "CCA"i %{ fmt->type = SPSS_FORMAT_TYPE_CCA; } | "CCB"i %{ fmt->type = SPSS_FORMAT_TYPE_CCB; } | "CCC"i %{ fmt->type = SPSS_FORMAT_TYPE_CCC; } | "CCD"i %{ fmt->type = SPSS_FORMAT_TYPE_CCD; } | "CCE"i %{ fmt->type = SPSS_FORMAT_TYPE_CCE; } | "EDATE"i %{ fmt->type = SPSS_FORMAT_TYPE_EDATE; fmt->width = 10; } | "SDATE"i %{ fmt->type = SPSS_FORMAT_TYPE_SDATE; fmt->width = 10; } ); integer = [0-9]+ >start_integer $incr_integer; width = integer %save_width; precision = integer %save_precision; main := type (width ("." precision)? )?; write init; write exec; }%% /* suppress warning */ (void)spss_format_parser_en_main; if (cs < %%{ write first_final; }%% || p != eof) { return READSTAT_ERROR_PARSE; } return READSTAT_OK; } ReadStat-1.1.7/src/spss/readstat_zsav_compress.c000066400000000000000000000065131410722155500217460ustar00rootroot00000000000000 #include #include #include #include "readstat_zsav_compress.h" zsav_ctx_t *zsav_ctx_init(size_t max_row_len, int64_t offset) { zsav_ctx_t *ctx = calloc(1, sizeof(zsav_ctx_t)); ctx->buffer = malloc(max_row_len); ctx->blocks_capacity = 10; ctx->blocks = calloc(ctx->blocks_capacity, sizeof(zsav_block_t *)); ctx->uncompressed_block_size = 0x3FF000; ctx->zheader_ofs = offset; ctx->compression_level = Z_DEFAULT_COMPRESSION; return ctx; } void zsav_ctx_free(zsav_ctx_t *ctx) { int i; for (i=0; iblocks_count; i++) { zsav_block_t *block = ctx->blocks[i]; deflateEnd(&block->stream); free(block->compressed_data); free(block); } free(ctx->blocks); free(ctx->buffer); free(ctx); } zsav_block_t *zsav_add_block(zsav_ctx_t *ctx) { zsav_block_t *block = NULL; if (ctx->blocks_count == ctx->blocks_capacity) { ctx->blocks = realloc(ctx->blocks, (ctx->blocks_capacity *= 2 ) * sizeof(zsav_block_t *)); } block = calloc(1, sizeof(zsav_block_t)); ctx->blocks[ctx->blocks_count++] = block; deflateInit(&block->stream, ctx->compression_level); block->compressed_data_capacity = deflateBound(&block->stream, ctx->uncompressed_block_size); block->compressed_data = malloc(block->compressed_data_capacity); return block; } zsav_block_t *zsav_current_block(zsav_ctx_t *ctx) { if (ctx->blocks_count == 0) return NULL; return ctx->blocks[ctx->blocks_count-1]; } int zsav_compress_row(void *input, size_t input_len, int finish, zsav_ctx_t *ctx) { off_t row_off = 0; unsigned char *row_buffer = input; size_t row_len = input_len; zsav_block_t *block = zsav_current_block(ctx); int deflate_status = Z_OK; if (block == NULL) { block = zsav_add_block(ctx); } block->stream.next_in = row_buffer; block->stream.avail_in = row_len; block->stream.next_out = &block->compressed_data[block->compressed_size]; block->stream.avail_out = block->compressed_data_capacity - block->compressed_size; /* If the row won't fit into this block, keep writing and flushing * until the remainder fits. */ while (row_len - row_off > ctx->uncompressed_block_size - block->uncompressed_size) { block->stream.avail_in = ctx->uncompressed_block_size - block->uncompressed_size; row_off += ctx->uncompressed_block_size - block->uncompressed_size; if ((deflate_status = deflate(&block->stream, Z_FINISH)) != Z_STREAM_END) { goto cleanup; } block->compressed_size = block->compressed_data_capacity - block->stream.avail_out; block->uncompressed_size = ctx->uncompressed_block_size - block->stream.avail_in; block = zsav_add_block(ctx); block->stream.next_in = &row_buffer[row_off]; block->stream.avail_in = row_len - row_off; block->stream.next_out = block->compressed_data; block->stream.avail_out = block->compressed_data_capacity; } /* Now the rest of the row will fit in the block */ deflate_status = deflate(&block->stream, finish ? Z_FINISH : Z_NO_FLUSH); block->compressed_size = block->compressed_data_capacity - block->stream.avail_out; block->uncompressed_size += (row_len - row_off) - block->stream.avail_in; cleanup: return deflate_status; } ReadStat-1.1.7/src/spss/readstat_zsav_compress.h000066400000000000000000000014441410722155500217510ustar00rootroot00000000000000 typedef struct zsav_block_s { int32_t uncompressed_size; int32_t compressed_size; z_stream stream; unsigned char *compressed_data; size_t compressed_data_capacity; } zsav_block_t; typedef struct zsav_ctx_s { void *buffer; zsav_block_t **blocks; int blocks_count; int blocks_capacity; int64_t uncompressed_block_size; int64_t zheader_ofs; int compression_level; } zsav_ctx_t; zsav_ctx_t *zsav_ctx_init(size_t max_row_len, int64_t offset); void zsav_ctx_free(zsav_ctx_t *ctx); zsav_block_t *zsav_add_block(zsav_ctx_t *ctx); zsav_block_t *zsav_current_block(zsav_ctx_t *ctx); int zsav_compress_row(void *input, size_t input_len, int finish, zsav_ctx_t *zctx); ReadStat-1.1.7/src/spss/readstat_zsav_read.c000066400000000000000000000145231410722155500210260ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_malloc.h" #include "readstat_sav.h" #include "readstat_sav_compress.h" struct zheader { uint64_t zheader_ofs; uint64_t ztrailer_ofs; uint64_t ztrailer_len; }; struct ztrailer { int64_t bias; int64_t zero; int32_t block_size; int32_t n_blocks; }; struct ztrailer_entry { int64_t uncompressed_ofs; int64_t compressed_ofs; int32_t uncompressed_size; int32_t compressed_size; }; readstat_error_t zsav_read_compressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; readstat_off_t data_offset = 0; size_t uncompressed_row_len = ctx->var_offset * 8; readstat_off_t uncompressed_offset = 0; unsigned char *uncompressed_row = NULL; uLongf uncompressed_block_len = 0; unsigned char *compressed_block = NULL, *uncompressed_block = NULL; struct sav_row_stream_s state = { .missing_value = ctx->missing_double, .bias = ctx->bias, .bswap = ctx->bswap }; struct zheader zheader; struct ztrailer ztrailer; struct ztrailer_entry *ztrailer_entries = NULL; int n_blocks = 0; int block_i = 0; int i; if (io->read(&zheader, sizeof(struct zheader), io->io_ctx) < sizeof(struct zheader)) { retval = READSTAT_ERROR_READ; goto cleanup; } zheader.zheader_ofs = ctx->bswap ? byteswap8(zheader.zheader_ofs) : zheader.zheader_ofs; zheader.ztrailer_ofs = ctx->bswap ? byteswap8(zheader.ztrailer_ofs) : zheader.ztrailer_ofs; zheader.ztrailer_len = ctx->bswap ? byteswap8(zheader.ztrailer_len) : zheader.ztrailer_len; if (zheader.zheader_ofs != io->seek(0, READSTAT_SEEK_CUR, io->io_ctx) - sizeof(struct zheader)) { retval = READSTAT_ERROR_PARSE; goto cleanup; } n_blocks = (zheader.ztrailer_len - 24) / 24; if (io->seek(zheader.ztrailer_ofs, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->read(&ztrailer, sizeof(struct ztrailer), io->io_ctx) < sizeof(struct ztrailer)) { retval = READSTAT_ERROR_READ; goto cleanup; } ztrailer.bias = ctx->bswap ? byteswap8(ztrailer.bias) : ztrailer.bias; ztrailer.zero = ctx->bswap ? byteswap8(ztrailer.zero) : ztrailer.zero; ztrailer.block_size = ctx->bswap ? byteswap4(ztrailer.block_size) : ztrailer.block_size; ztrailer.n_blocks = ctx->bswap ? byteswap4(ztrailer.n_blocks) : ztrailer.n_blocks; if (n_blocks != ztrailer.n_blocks) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (n_blocks && (ztrailer_entries = readstat_malloc(n_blocks * sizeof(struct ztrailer_entry))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(ztrailer_entries, n_blocks * sizeof(struct ztrailer_entry), io->io_ctx) < n_blocks * sizeof(struct ztrailer_entry)) { retval = READSTAT_ERROR_READ; goto cleanup; } for (i=0; iuncompressed_ofs = ctx->bswap ? byteswap8(entry->uncompressed_ofs) : entry->uncompressed_ofs; entry->compressed_ofs = ctx->bswap ? byteswap8(entry->compressed_ofs) : entry->compressed_ofs; entry->uncompressed_size = ctx->bswap ? byteswap4(entry->uncompressed_size) : entry->uncompressed_size; entry->compressed_size = ctx->bswap ? byteswap4(entry->compressed_size) : entry->compressed_size; } if (uncompressed_row_len && (uncompressed_row = readstat_malloc(uncompressed_row_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } while (1) { if (block_i == n_blocks) goto cleanup; struct ztrailer_entry *entry = &ztrailer_entries[block_i]; if (io->seek(entry->compressed_ofs, READSTAT_SEEK_SET, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((compressed_block = readstat_realloc(compressed_block, entry->compressed_size)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(compressed_block, entry->compressed_size, io->io_ctx) != entry->compressed_size) { retval = READSTAT_ERROR_READ; goto cleanup; } uncompressed_block_len = entry->uncompressed_size; if ((uncompressed_block = readstat_realloc(uncompressed_block, uncompressed_block_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } int status = uncompress(uncompressed_block, &uncompressed_block_len, compressed_block, entry->compressed_size); if (status != Z_OK || uncompressed_block_len != entry->uncompressed_size) { retval = READSTAT_ERROR_PARSE; goto cleanup; } block_i++; state.status = SAV_ROW_STREAM_HAVE_DATA; data_offset = 0; while (state.status != SAV_ROW_STREAM_NEED_DATA) { state.next_in = &uncompressed_block[data_offset]; state.avail_in = uncompressed_block_len - data_offset; state.next_out = &uncompressed_row[uncompressed_offset]; state.avail_out = uncompressed_row_len - uncompressed_offset; sav_decompress_row(&state); uncompressed_offset = uncompressed_row_len - state.avail_out; data_offset = uncompressed_block_len - state.avail_in; if (state.status == SAV_ROW_STREAM_FINISHED_ROW) { retval = row_handler(uncompressed_row, uncompressed_row_len, ctx); if (retval != READSTAT_OK) goto cleanup; uncompressed_offset = 0; } if (state.status == SAV_ROW_STREAM_FINISHED_ALL) goto cleanup; if (ctx->row_limit > 0 && ctx->current_row == ctx->row_limit) goto cleanup; } } cleanup: if (uncompressed_row) free(uncompressed_row); if (ztrailer_entries) free(ztrailer_entries); if (compressed_block) free(compressed_block); if (uncompressed_block) free(uncompressed_block); return retval; } ReadStat-1.1.7/src/spss/readstat_zsav_read.h000066400000000000000000000002141410722155500210230ustar00rootroot00000000000000 readstat_error_t zsav_read_compressed_data(sav_ctx_t *ctx, readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)); ReadStat-1.1.7/src/spss/readstat_zsav_write.c000066400000000000000000000113421410722155500212410ustar00rootroot00000000000000#include #include #include #include "../readstat.h" #include "../readstat_writer.h" #include "readstat_sav_compress.h" #include "readstat_zsav_compress.h" #include "readstat_zsav_write.h" readstat_error_t zsav_write_compressed_row(void *writer_ctx, void *row, size_t len) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; zsav_ctx_t *zctx = writer->module_ctx; /* Kind of frustrating that SPSS does double compression. If they just * z-compressed the uncompressed data, we could calculate the block count * in advance and write out the file in a streaming manner. As things stand * we have to build up the file in memory until we know the final block * count. A possible streaming solution would be to declare the number of * blocks required to hold the maximum possible row-compressed size and * then fill out the end with no-op zero bytes (that get z-compressed very * small). */ size_t row_len = sav_compress_row(zctx->buffer, row, len, writer); int deflate_status = zsav_compress_row(zctx->buffer, row_len, writer->current_row + 1 == writer->row_count, zctx); if (deflate_status != Z_OK && deflate_status != Z_STREAM_END) return READSTAT_ERROR_WRITE; return READSTAT_OK; } static readstat_error_t zsav_write_data_header(readstat_writer_t *writer, zsav_ctx_t *zctx) { readstat_error_t retval = READSTAT_OK; uint64_t zheader_ofs = zctx->zheader_ofs; uint64_t ztrailer_ofs = zheader_ofs + 24; uint64_t ztrailer_len = 24 + zctx->blocks_count * 24; int i; for (i=0; iblocks_count; i++) { zsav_block_t *block = zctx->blocks[i]; ztrailer_ofs += block->compressed_size; } if ((retval = readstat_write_bytes(writer, &zheader_ofs, sizeof(uint64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &ztrailer_ofs, sizeof(uint64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &ztrailer_len, sizeof(uint64_t))) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t zsav_write_data_blocks(readstat_writer_t *writer, zsav_ctx_t *zctx) { readstat_error_t retval = READSTAT_OK; int i; for (i=0; iblocks_count; i++) { zsav_block_t *block = zctx->blocks[i]; if ((retval = readstat_write_bytes(writer, block->compressed_data, block->compressed_size)) != READSTAT_OK) goto cleanup; } cleanup: return retval; } static readstat_error_t zsav_write_data_trailer(readstat_writer_t *writer, zsav_ctx_t *zctx) { readstat_error_t retval = READSTAT_OK; int64_t bias = -100; int64_t zero = 0; int32_t block_size = zctx->uncompressed_block_size; int32_t n_blocks = zctx->blocks_count; if ((retval = readstat_write_bytes(writer, &bias, sizeof(int64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &zero, sizeof(int64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &block_size, sizeof(int32_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &n_blocks, sizeof(int32_t))) != READSTAT_OK) goto cleanup; int i; int64_t uncompressed_ofs = zctx->zheader_ofs; int64_t compressed_ofs = zctx->zheader_ofs + 24; for (i=0; iblocks_count; i++) { zsav_block_t *block = zctx->blocks[i]; int32_t uncompressed_size = block->uncompressed_size; int32_t compressed_size = block->compressed_size; if ((retval = readstat_write_bytes(writer, &uncompressed_ofs, sizeof(int64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &compressed_ofs, sizeof(int64_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &uncompressed_size, sizeof(int32_t))) != READSTAT_OK) goto cleanup; if ((retval = readstat_write_bytes(writer, &compressed_size, sizeof(int32_t))) != READSTAT_OK) goto cleanup; uncompressed_ofs += uncompressed_size; compressed_ofs += compressed_size; } cleanup: return retval; } readstat_error_t zsav_end_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; zsav_ctx_t *zctx = writer->module_ctx; readstat_error_t retval = READSTAT_OK; retval = zsav_write_data_header(writer, zctx); if (retval != READSTAT_OK) goto cleanup; retval = zsav_write_data_blocks(writer, zctx); if (retval != READSTAT_OK) goto cleanup; retval = zsav_write_data_trailer(writer, zctx); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } ReadStat-1.1.7/src/spss/readstat_zsav_write.h000066400000000000000000000002101410722155500212360ustar00rootroot00000000000000 readstat_error_t zsav_write_compressed_row(void *writer_ctx, void *row, size_t len); readstat_error_t zsav_end_data(void *writer_ctx); ReadStat-1.1.7/src/stata/000077500000000000000000000000001410722155500151345ustar00rootroot00000000000000ReadStat-1.1.7/src/stata/readstat_dta.c000066400000000000000000000232661410722155500177500ustar00rootroot00000000000000#include #include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_malloc.h" #include "../readstat_bits.h" #include "readstat_dta.h" #define DTA_MIN_VERSION 104 #define DTA_MAX_VERSION 119 dta_ctx_t *dta_ctx_alloc(readstat_io_t *io) { dta_ctx_t *ctx = calloc(1, sizeof(dta_ctx_t)); if (ctx == NULL) { return NULL; } ctx->io = io; ctx->initialized = 0; return ctx; } readstat_error_t dta_ctx_init(dta_ctx_t *ctx, uint32_t nvar, uint64_t nobs, unsigned char byteorder, unsigned char ds_format, const char *input_encoding, const char *output_encoding) { readstat_error_t retval = READSTAT_OK; int machine_byteorder = DTA_HILO; if (ds_format < DTA_MIN_VERSION || ds_format > DTA_MAX_VERSION) return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; if (machine_is_little_endian()) { machine_byteorder = DTA_LOHI; } ctx->bswap = (byteorder != machine_byteorder); ctx->ds_format = ds_format; ctx->endianness = byteorder == DTA_LOHI ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG; ctx->nvar = nvar; ctx->nobs = nobs; if (ctx->nvar) { if ((ctx->variables = readstat_calloc(ctx->nvar, sizeof(readstat_variable_t *))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } ctx->machine_is_twos_complement = READSTAT_MACHINE_IS_TWOS_COMPLEMENT; if (ds_format < 105) { ctx->fmtlist_entry_len = 7; } else if (ds_format < 114) { ctx->fmtlist_entry_len = 12; } else if (ds_format < 118) { ctx->fmtlist_entry_len = 49; } else { ctx->fmtlist_entry_len = 57; } if (ds_format >= 117) { ctx->typlist_version = 117; } else if (ds_format >= 111) { ctx->typlist_version = 111; } else { ctx->typlist_version = 0; } if (ds_format >= 118) { ctx->data_label_len_len = 2; ctx->strl_v_len = 2; ctx->strl_o_len = 6; } else if (ds_format >= 117) { ctx->data_label_len_len = 1; ctx->strl_v_len = 4; ctx->strl_o_len = 4; } if (ds_format < 105) { ctx->expansion_len_len = 0; } else if (ds_format < 110) { ctx->expansion_len_len = 2; } else { ctx->expansion_len_len = 4; } if (ds_format < 110) { ctx->lbllist_entry_len = 9; ctx->variable_name_len = 9; ctx->ch_metadata_len = 9; } else if (ds_format < 118) { ctx->lbllist_entry_len = 33; ctx->variable_name_len = 33; ctx->ch_metadata_len = 33; } else { ctx->lbllist_entry_len = 129; ctx->variable_name_len = 129; ctx->ch_metadata_len = 129; } if (ds_format < 108) { ctx->variable_labels_entry_len = 32; ctx->data_label_len = 32; } else if (ds_format < 118) { ctx->variable_labels_entry_len = 81; ctx->data_label_len = 81; } else { ctx->variable_labels_entry_len = 321; ctx->data_label_len = 321; } if (ds_format < 105) { ctx->timestamp_len = 0; ctx->value_label_table_len_len = 2; ctx->value_label_table_labname_len = 12; ctx->value_label_table_padding_len = 2; } else { ctx->timestamp_len = 18; ctx->value_label_table_len_len = 4; if (ds_format < 118) { ctx->value_label_table_labname_len = 33; } else { ctx->value_label_table_labname_len = 129; } ctx->value_label_table_padding_len = 3; } if (ds_format < 117) { ctx->typlist_entry_len = 1; ctx->file_is_xmlish = 0; } else { ctx->typlist_entry_len = 2; ctx->file_is_xmlish = 1; } if (ds_format < 113) { ctx->max_int8 = DTA_OLD_MAX_INT8; ctx->max_int16 = DTA_OLD_MAX_INT16; ctx->max_int32 = DTA_OLD_MAX_INT32; ctx->max_float = DTA_OLD_MAX_FLOAT; ctx->max_double = DTA_OLD_MAX_DOUBLE; } else { ctx->max_int8 = DTA_113_MAX_INT8; ctx->max_int16 = DTA_113_MAX_INT16; ctx->max_int32 = DTA_113_MAX_INT32; ctx->max_float = DTA_113_MAX_FLOAT; ctx->max_double = DTA_113_MAX_DOUBLE; ctx->supports_tagged_missing = 1; } if (output_encoding) { if (input_encoding) { ctx->converter = iconv_open(output_encoding, input_encoding); } else if (ds_format < 118) { ctx->converter = iconv_open(output_encoding, "WINDOWS-1252"); } else if (strcmp(output_encoding, "UTF-8") != 0) { ctx->converter = iconv_open(output_encoding, "UTF-8"); } if (ctx->converter == (iconv_t)-1) { ctx->converter = NULL; retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } } if (ds_format < 119) { ctx->srtlist_len = (ctx->nvar + 1) * sizeof(int16_t); } else { ctx->srtlist_len = (ctx->nvar + 1) * sizeof(int32_t); } if ((ctx->srtlist = readstat_malloc(ctx->srtlist_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->nvar > 0) { ctx->typlist_len = ctx->nvar * sizeof(uint16_t); ctx->varlist_len = ctx->variable_name_len * ctx->nvar * sizeof(char); ctx->fmtlist_len = ctx->fmtlist_entry_len * ctx->nvar * sizeof(char); ctx->lbllist_len = ctx->lbllist_entry_len * ctx->nvar * sizeof(char); ctx->variable_labels_len = ctx->variable_labels_entry_len * ctx->nvar * sizeof(char); if ((ctx->typlist = readstat_malloc(ctx->typlist_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((ctx->varlist = readstat_malloc(ctx->varlist_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((ctx->fmtlist = readstat_malloc(ctx->fmtlist_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((ctx->lbllist = readstat_malloc(ctx->lbllist_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((ctx->variable_labels = readstat_malloc(ctx->variable_labels_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } ctx->initialized = 1; cleanup: return retval; } void dta_ctx_free(dta_ctx_t *ctx) { if (ctx->typlist) free(ctx->typlist); if (ctx->varlist) free(ctx->varlist); if (ctx->srtlist) free(ctx->srtlist); if (ctx->fmtlist) free(ctx->fmtlist); if (ctx->lbllist) free(ctx->lbllist); if (ctx->variable_labels) free(ctx->variable_labels); if (ctx->converter) iconv_close(ctx->converter); if (ctx->data_label) free(ctx->data_label); if (ctx->variables) { int i; for (i=0; invar; i++) { if (ctx->variables[i]) free(ctx->variables[i]); } free(ctx->variables); } if (ctx->strls) { int i; for (i=0; istrls_count; i++) { free(ctx->strls[i]); } free(ctx->strls); } free(ctx); } readstat_error_t dta_type_info(uint16_t typecode, dta_ctx_t *ctx, size_t *max_len, readstat_type_t *out_type) { readstat_error_t retval = READSTAT_OK; size_t len = 0; readstat_type_t type = READSTAT_TYPE_STRING; if (ctx->typlist_version == 111) { switch (typecode) { case DTA_111_TYPE_CODE_INT8: len = 1; type = READSTAT_TYPE_INT8; break; case DTA_111_TYPE_CODE_INT16: len = 2; type = READSTAT_TYPE_INT16; break; case DTA_111_TYPE_CODE_INT32: len = 4; type = READSTAT_TYPE_INT32; break; case DTA_111_TYPE_CODE_FLOAT: len = 4; type = READSTAT_TYPE_FLOAT; break; case DTA_111_TYPE_CODE_DOUBLE: len = 8; type = READSTAT_TYPE_DOUBLE; break; default: len = typecode; type = READSTAT_TYPE_STRING; break; } } else if (ctx->typlist_version == 117) { switch (typecode) { case DTA_117_TYPE_CODE_INT8: len = 1; type = READSTAT_TYPE_INT8; break; case DTA_117_TYPE_CODE_INT16: len = 2; type = READSTAT_TYPE_INT16; break; case DTA_117_TYPE_CODE_INT32: len = 4; type = READSTAT_TYPE_INT32; break; case DTA_117_TYPE_CODE_FLOAT: len = 4; type = READSTAT_TYPE_FLOAT; break; case DTA_117_TYPE_CODE_DOUBLE: len = 8; type = READSTAT_TYPE_DOUBLE; break; case DTA_117_TYPE_CODE_STRL: len = 8; type = READSTAT_TYPE_STRING_REF; break; default: len = typecode; type = READSTAT_TYPE_STRING; break; } } else if (typecode < 0x7F) { switch (typecode) { case DTA_OLD_TYPE_CODE_INT8: len = 1; type = READSTAT_TYPE_INT8; break; case DTA_OLD_TYPE_CODE_INT16: len = 2; type = READSTAT_TYPE_INT16; break; case DTA_OLD_TYPE_CODE_INT32: len = 4; type = READSTAT_TYPE_INT32; break; case DTA_OLD_TYPE_CODE_FLOAT: len = 4; type = READSTAT_TYPE_FLOAT; break; case DTA_OLD_TYPE_CODE_DOUBLE: len = 8; type = READSTAT_TYPE_DOUBLE; break; default: retval = READSTAT_ERROR_PARSE; break; } } else { len = typecode - 0x7F; type = READSTAT_TYPE_STRING; } if (max_len) *max_len = len; if (out_type) *out_type = type; return retval; } ReadStat-1.1.7/src/stata/readstat_dta.h000066400000000000000000000136021410722155500177460ustar00rootroot00000000000000#pragma pack(push, 1) // DTA files typedef struct dta_header_s { unsigned char ds_format; unsigned char byteorder; unsigned char filetype; unsigned char unused; uint16_t nvar; uint32_t nobs; } dta_header_t; typedef struct dta_header64_s { unsigned char ds_format; unsigned char byteorder; unsigned char filetype; unsigned char unused; uint32_t nvar; uint64_t nobs; } dta_header64_t; /* Some compilers will pack the elements, but still round up * the total size of the structure. So use macros in place of * sizeof(dta_117_strl_header_t)/sizeof(dta_118_strl_header_t). */ typedef struct dta_117_strl_header_s { uint32_t v; uint32_t o; unsigned char type; int32_t len; } dta_117_strl_header_t; #define SIZEOF_DTA_117_STRL_HEADER_T 13 typedef struct dta_118_strl_header_s { uint32_t v; uint64_t o; unsigned char type; int32_t len; } dta_118_strl_header_t; #define SIZEOF_DTA_118_STRL_HEADER_T 17 #pragma pack(pop) typedef struct dta_strl_s { uint16_t v; uint64_t o; unsigned char type; size_t len; char data[1]; // Flexible array; use [1] for C++98 compatibility } dta_strl_t; typedef struct dta_ctx_s { char *data_label; size_t data_label_len; size_t data_label_len_len; time_t timestamp; size_t timestamp_len; char typlist_version; size_t typlist_entry_len; uint16_t *typlist; size_t typlist_len; char *varlist; size_t varlist_len; int16_t *srtlist; size_t srtlist_len; char *fmtlist; size_t fmtlist_len; char *lbllist; size_t lbllist_len; char *variable_labels; size_t variable_labels_len; size_t variable_name_len; size_t fmtlist_entry_len; size_t lbllist_entry_len; size_t variable_labels_entry_len; size_t expansion_len_len; size_t ch_metadata_len; size_t value_label_table_len_len; size_t value_label_table_labname_len; size_t value_label_table_padding_len; size_t strl_v_len; size_t strl_o_len; int64_t data_offset; int64_t strls_offset; int64_t value_labels_offset; int ds_format; int nvar; int64_t nobs; size_t record_len; int64_t row_limit; int64_t row_offset; int64_t current_row; unsigned int bswap:1; unsigned int machine_is_twos_complement:1; unsigned int file_is_xmlish:1; unsigned int supports_tagged_missing:1; int8_t max_int8; int16_t max_int16; int32_t max_int32; int32_t max_float; int64_t max_double; dta_strl_t **strls; size_t strls_count; size_t strls_capacity; readstat_variable_t **variables; readstat_endian_t endianness; iconv_t converter; readstat_callbacks_t handle; size_t file_size; void *user_ctx; readstat_io_t *io; int initialized; char error_buf[256]; } dta_ctx_t; #define DTA_HILO 0x01 #define DTA_LOHI 0x02 #define DTA_OLD_MAX_INT8 0x7e #define DTA_OLD_MAX_INT16 0x7ffe #define DTA_OLD_MAX_INT32 0x7ffffffe #define DTA_OLD_MAX_FLOAT 0x7effffff // +1.7e38f #define DTA_OLD_MAX_DOUBLE 0x7fdfffffffffffffL // +8.9e307 #define DTA_OLD_MISSING_INT8 0x7F #define DTA_OLD_MISSING_INT16 0x7FFF #define DTA_OLD_MISSING_INT32 0x7FFFFFFF #define DTA_OLD_MISSING_FLOAT 0x7F000000 #define DTA_OLD_MISSING_DOUBLE 0x7FE0000000000000L #define DTA_113_MAX_INT8 0x64 #define DTA_113_MAX_INT16 0x7fe4 #define DTA_113_MAX_INT32 0x7fffffe4 #define DTA_113_MAX_FLOAT 0x7effffff // +1.7e38f #define DTA_113_MAX_DOUBLE 0x7fdfffffffffffffL // +8.9e307 #define DTA_113_MISSING_INT8 0x65 #define DTA_113_MISSING_INT16 0x7FE5 #define DTA_113_MISSING_INT32 0x7FFFFFE5 #define DTA_113_MISSING_FLOAT 0x7F000000 #define DTA_113_MISSING_DOUBLE 0x7FE0000000000000L #define DTA_113_MISSING_INT8_A (DTA_113_MISSING_INT8+1) #define DTA_113_MISSING_INT16_A (DTA_113_MISSING_INT16+1) #define DTA_113_MISSING_INT32_A (DTA_113_MISSING_INT32+1) #define DTA_113_MISSING_FLOAT_A (DTA_113_MISSING_FLOAT+0x0800) #define DTA_113_MISSING_DOUBLE_A (DTA_113_MISSING_DOUBLE+0x010000000000) #define DTA_GSO_TYPE_BINARY 0x81 #define DTA_GSO_TYPE_ASCII 0x82 #define DTA_117_TYPE_CODE_INT8 0xFFFA #define DTA_117_TYPE_CODE_INT16 0xFFF9 #define DTA_117_TYPE_CODE_INT32 0xFFF8 #define DTA_117_TYPE_CODE_FLOAT 0xFFF7 #define DTA_117_TYPE_CODE_DOUBLE 0xFFF6 #define DTA_117_TYPE_CODE_STRL 0x8000 #define DTA_111_TYPE_CODE_INT8 0xFB #define DTA_111_TYPE_CODE_INT16 0xFC #define DTA_111_TYPE_CODE_INT32 0xFD #define DTA_111_TYPE_CODE_FLOAT 0xFE #define DTA_111_TYPE_CODE_DOUBLE 0xFF #define DTA_OLD_TYPE_CODE_INT8 'b' #define DTA_OLD_TYPE_CODE_INT16 'i' #define DTA_OLD_TYPE_CODE_INT32 'l' #define DTA_OLD_TYPE_CODE_FLOAT 'f' #define DTA_OLD_TYPE_CODE_DOUBLE 'd' dta_ctx_t *dta_ctx_alloc(readstat_io_t *io); readstat_error_t dta_ctx_init(dta_ctx_t *ctx, uint32_t nvar, uint64_t nobs, unsigned char byteorder, unsigned char ds_format, const char *input_encoding, const char *output_encoding); void dta_ctx_free(dta_ctx_t *ctx); readstat_error_t dta_type_info(uint16_t typecode, dta_ctx_t *ctx, size_t *max_len, readstat_type_t *out_type); ReadStat-1.1.7/src/stata/readstat_dta_parse_timestamp.c000066400000000000000000000246031410722155500232210ustar00rootroot00000000000000#line 1 "src/stata/readstat_dta_parse_timestamp.rl" #include #include "../readstat.h" #include "readstat_dta_parse_timestamp.h" #line 9 "src/stata/readstat_dta_parse_timestamp.c" static const signed char _dta_timestamp_parse_actions[] = { 0, 1, 0, 1, 2, 1, 3, 1, 4, 1, 5, 1, 6, 1, 7, 1, 8, 1, 9, 1, 10, 1, 11, 1, 12, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 2, 1, 0, 0 }; static const signed char _dta_timestamp_parse_key_offsets[] = { 0, 0, 3, 5, 8, 26, 34, 36, 37, 39, 42, 45, 48, 50, 52, 53, 55, 59, 63, 64, 66, 68, 70, 71, 73, 75, 76, 80, 82, 86, 87, 88, 90, 96, 97, 98, 100, 102, 103, 107, 109, 110, 112, 114, 115, 0 }; static const char _dta_timestamp_parse_trans_keys[] = { 32, 48, 57, 48, 57, 32, 48, 57, 65, 68, 69, 70, 74, 77, 78, 79, 83, 97, 100, 101, 102, 106, 109, 110, 111, 115, 66, 71, 80, 85, 98, 103, 112, 117, 82, 114, 32, 48, 57, 32, 48, 57, 32, 48, 57, 58, 48, 57, 48, 57, 79, 111, 32, 71, 103, 69, 73, 101, 105, 67, 90, 99, 122, 32, 67, 99, 78, 110, 69, 101, 32, 69, 101, 66, 98, 32, 65, 85, 97, 117, 78, 110, 76, 78, 108, 110, 32, 32, 65, 97, 73, 82, 89, 105, 114, 121, 32, 32, 79, 111, 86, 118, 32, 67, 75, 99, 107, 84, 116, 32, 69, 101, 80, 112, 32, 48, 57, 0 }; static const signed char _dta_timestamp_parse_single_lengths[] = { 0, 1, 0, 1, 18, 8, 2, 1, 0, 1, 1, 1, 0, 2, 1, 2, 4, 4, 1, 2, 2, 2, 1, 2, 2, 1, 4, 2, 4, 1, 1, 2, 6, 1, 1, 2, 2, 1, 4, 2, 1, 2, 2, 1, 0, 0 }; static const signed char _dta_timestamp_parse_range_lengths[] = { 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0 }; static const short _dta_timestamp_parse_index_offsets[] = { 0, 0, 3, 5, 8, 27, 36, 39, 41, 43, 46, 49, 52, 54, 57, 59, 62, 67, 72, 74, 77, 80, 83, 85, 88, 91, 93, 98, 101, 106, 108, 110, 113, 120, 122, 124, 127, 130, 132, 137, 140, 142, 145, 148, 150, 0 }; static const signed char _dta_timestamp_parse_cond_targs[] = { 2, 3, 0, 3, 0, 4, 3, 0, 5, 16, 20, 23, 26, 31, 35, 38, 41, 5, 16, 20, 23, 26, 31, 35, 38, 41, 0, 6, 13, 6, 15, 6, 13, 6, 15, 0, 7, 7, 0, 8, 0, 9, 0, 10, 9, 0, 10, 11, 0, 12, 11, 0, 44, 0, 14, 14, 0, 8, 0, 14, 14, 0, 17, 19, 17, 19, 0, 18, 18, 18, 18, 0, 8, 0, 18, 18, 0, 21, 21, 0, 22, 22, 0, 8, 0, 24, 24, 0, 25, 25, 0, 8, 0, 27, 28, 27, 28, 0, 22, 22, 0, 29, 30, 29, 30, 0, 8, 0, 8, 0, 32, 32, 0, 33, 34, 33, 33, 34, 33, 0, 8, 0, 8, 0, 36, 36, 0, 37, 37, 0, 8, 0, 39, 39, 39, 39, 0, 40, 40, 0, 8, 0, 42, 42, 0, 43, 43, 0, 8, 0, 44, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 0 }; static const signed char _dta_timestamp_parse_cond_actions[] = { 0, 35, 0, 35, 0, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 35, 0, 29, 1, 0, 0, 35, 0, 31, 1, 0, 35, 0, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 9, 0, 0, 0, 0, 0, 0, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 21, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0 }; static const short _dta_timestamp_parse_eof_trans[] = { 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 0 }; static const int dta_timestamp_parse_start = 1; static const int dta_timestamp_parse_en_main = 1; #line 9 "src/stata/readstat_dta_parse_timestamp.rl" readstat_error_t dta_parse_timestamp(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_handler, void *user_ctx) { readstat_error_t retval = READSTAT_OK; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; unsigned int temp_val = 0; #line 154 "src/stata/readstat_dta_parse_timestamp.c" { cs = (int)dta_timestamp_parse_start; } #line 159 "src/stata/readstat_dta_parse_timestamp.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _dta_timestamp_parse_eof_trans[cs] > 0 ) { _trans = (unsigned int)_dta_timestamp_parse_eof_trans[cs] - 1; } } else { _keys = ( _dta_timestamp_parse_trans_keys + (_dta_timestamp_parse_key_offsets[cs])); _trans = (unsigned int)_dta_timestamp_parse_index_offsets[cs]; _klen = (int)_dta_timestamp_parse_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_dta_timestamp_parse_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_dta_timestamp_parse_cond_targs[_trans]; if ( _dta_timestamp_parse_cond_actions[_trans] != 0 ) { _acts = ( _dta_timestamp_parse_actions + (_dta_timestamp_parse_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 20 "src/stata/readstat_dta_parse_timestamp.rl" temp_val = 10 * temp_val + ((( (*( p)))) - '0'); } #line 244 "src/stata/readstat_dta_parse_timestamp.c" break; } case 1: { { #line 24 "src/stata/readstat_dta_parse_timestamp.rl" temp_val = 0; } #line 253 "src/stata/readstat_dta_parse_timestamp.c" break; } case 2: { { #line 26 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mday = temp_val; } #line 262 "src/stata/readstat_dta_parse_timestamp.c" break; } case 3: { { #line 29 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 0; } #line 271 "src/stata/readstat_dta_parse_timestamp.c" break; } case 4: { { #line 30 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 1; } #line 280 "src/stata/readstat_dta_parse_timestamp.c" break; } case 5: { { #line 31 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 2; } #line 289 "src/stata/readstat_dta_parse_timestamp.c" break; } case 6: { { #line 32 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 3; } #line 298 "src/stata/readstat_dta_parse_timestamp.c" break; } case 7: { { #line 33 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 4; } #line 307 "src/stata/readstat_dta_parse_timestamp.c" break; } case 8: { { #line 34 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 5; } #line 316 "src/stata/readstat_dta_parse_timestamp.c" break; } case 9: { { #line 35 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 6; } #line 325 "src/stata/readstat_dta_parse_timestamp.c" break; } case 10: { { #line 36 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 7; } #line 334 "src/stata/readstat_dta_parse_timestamp.c" break; } case 11: { { #line 37 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 8; } #line 343 "src/stata/readstat_dta_parse_timestamp.c" break; } case 12: { { #line 38 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 9; } #line 352 "src/stata/readstat_dta_parse_timestamp.c" break; } case 13: { { #line 39 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 10; } #line 361 "src/stata/readstat_dta_parse_timestamp.c" break; } case 14: { { #line 40 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_mon = 11; } #line 370 "src/stata/readstat_dta_parse_timestamp.c" break; } case 15: { { #line 42 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_year = temp_val - 1900; } #line 379 "src/stata/readstat_dta_parse_timestamp.c" break; } case 16: { { #line 44 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_hour = temp_val; } #line 388 "src/stata/readstat_dta_parse_timestamp.c" break; } case 17: { { #line 46 "src/stata/readstat_dta_parse_timestamp.rl" timestamp->tm_min = temp_val; } #line 397 "src/stata/readstat_dta_parse_timestamp.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 44 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 52 "src/stata/readstat_dta_parse_timestamp.rl" if (cs < #line 425 "src/stata/readstat_dta_parse_timestamp.c" 44 #line 54 "src/stata/readstat_dta_parse_timestamp.rl" || p != pe) { char error_buf[1024]; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Invalid timestamp string (length=%d): %.*s", (int)len, (int)len, data); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)dta_timestamp_parse_en_main; return retval; } ReadStat-1.1.7/src/stata/readstat_dta_parse_timestamp.h000066400000000000000000000002311410722155500232150ustar00rootroot00000000000000 readstat_error_t dta_parse_timestamp(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_handler, void *user_ctx); ReadStat-1.1.7/src/stata/readstat_dta_parse_timestamp.rl000066400000000000000000000041701410722155500234110ustar00rootroot00000000000000 #include #include "../readstat.h" #include "readstat_dta_parse_timestamp.h" %%{ machine dta_timestamp_parse; write data nofinal noerror; }%% readstat_error_t dta_parse_timestamp(const char *data, size_t len, struct tm *timestamp, readstat_error_handler error_handler, void *user_ctx) { readstat_error_t retval = READSTAT_OK; const char *p = data; const char *pe = p + len; const char *eof = pe; int cs; unsigned int temp_val = 0; %%{ action incr_val { temp_val = 10 * temp_val + (fc - '0'); } integer = [0-9]+ >{ temp_val = 0; } $incr_val; day = integer %{ timestamp->tm_mday = temp_val; }; month = # with some German and Spanish variants thrown in ("Jan"i | "Ene"i) %{ timestamp->tm_mon = 0; } | ("Feb"i) %{ timestamp->tm_mon = 1; } | ("Mar"i) %{ timestamp->tm_mon = 2; } | ("Apr"i | "Abr"i) %{ timestamp->tm_mon = 3; } | ("May"i | "Mai"i) %{ timestamp->tm_mon = 4; } | ("Jun"i) %{ timestamp->tm_mon = 5; } | ("Jul"i) %{ timestamp->tm_mon = 6; } | ("Aug"i | "Ago"i) %{ timestamp->tm_mon = 7; } | ("Sep"i) %{ timestamp->tm_mon = 8; } | ("Oct"i | "Okt"i) %{ timestamp->tm_mon = 9; } | ("Nov"i) %{ timestamp->tm_mon = 10; } | ("Dec"i | "Dez"i | "Dic"i) %{ timestamp->tm_mon = 11; }; year = integer %{ timestamp->tm_year = temp_val - 1900; }; hour = integer %{ timestamp->tm_hour = temp_val; }; minute = integer %{ timestamp->tm_min = temp_val; }; main := " "? day " " month " " year " "+ hour ":" minute; write init; write exec; }%% if (cs < %%{ write first_final; }%%|| p != pe) { char error_buf[1024]; if (error_handler) { snprintf(error_buf, sizeof(error_buf), "Invalid timestamp string (length=%d): %.*s", (int)len, (int)len, data); error_handler(error_buf, user_ctx); } retval = READSTAT_ERROR_BAD_TIMESTAMP_STRING; } (void)dta_timestamp_parse_en_main; return retval; } ReadStat-1.1.7/src/stata/readstat_dta_read.c000066400000000000000000001206201410722155500207330ustar00rootroot00000000000000 #define _XOPEN_SOURCE 700 /* for strnlen */ #include #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "../readstat_malloc.h" #include "readstat_dta.h" #include "readstat_dta_parse_timestamp.h" #define MAX_VALUE_LABEL_LEN 32000 static readstat_error_t dta_update_progress(dta_ctx_t *ctx); static readstat_error_t dta_read_descriptors(dta_ctx_t *ctx); static readstat_error_t dta_read_tag(dta_ctx_t *ctx, const char *tag); static readstat_error_t dta_read_expansion_fields(dta_ctx_t *ctx); static readstat_error_t dta_update_progress(dta_ctx_t *ctx) { double progress = 0.0; if (ctx->row_limit > 0) progress = 1.0 * ctx->current_row / ctx->row_limit; if (ctx->handle.progress && ctx->handle.progress(progress, ctx->user_ctx) != READSTAT_HANDLER_OK) return READSTAT_ERROR_USER_ABORT; return READSTAT_OK; } static readstat_variable_t *dta_init_variable(dta_ctx_t *ctx, int i, int index_after_skipping, readstat_type_t type, size_t max_len) { readstat_variable_t *variable = calloc(1, sizeof(readstat_variable_t)); variable->type = type; variable->index = i; variable->index_after_skipping = index_after_skipping; variable->storage_width = max_len; readstat_convert(variable->name, sizeof(variable->name), &ctx->varlist[ctx->variable_name_len*i], strnlen(&ctx->varlist[ctx->variable_name_len*i], ctx->variable_name_len), ctx->converter); if (ctx->variable_labels[ctx->variable_labels_entry_len*i]) { readstat_convert(variable->label, sizeof(variable->label), &ctx->variable_labels[ctx->variable_labels_entry_len*i], strnlen(&ctx->variable_labels[ctx->variable_labels_entry_len*i], ctx->variable_labels_entry_len), ctx->converter); } if (ctx->fmtlist[ctx->fmtlist_entry_len*i]) { readstat_convert(variable->format, sizeof(variable->format), &ctx->fmtlist[ctx->fmtlist_entry_len*i], strnlen(&ctx->fmtlist[ctx->fmtlist_entry_len*i], ctx->fmtlist_entry_len), ctx->converter); if (variable->format[0] == '%') { if (variable->format[1] == '-') { variable->alignment = READSTAT_ALIGNMENT_LEFT; } else if (variable->format[1] == '~') { variable->alignment = READSTAT_ALIGNMENT_CENTER; } else { variable->alignment = READSTAT_ALIGNMENT_RIGHT; } } int display_width; if (sscanf(variable->format, "%%%ds", &display_width) == 1 || sscanf(variable->format, "%%-%ds", &display_width) == 1) { variable->display_width = display_width; } } return variable; } static readstat_error_t dta_read_chunk( dta_ctx_t *ctx, const char *start_tag, void *dst, size_t dst_len, const char *end_tag) { char *dst_buffer = (char *)dst; readstat_io_t *io = ctx->io; readstat_error_t retval = READSTAT_OK; if ((retval = dta_read_tag(ctx, start_tag)) != READSTAT_OK) goto cleanup; if (io->read(dst_buffer, dst_len, io->io_ctx) != dst_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = dta_read_tag(ctx, end_tag)) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t dta_read_map(dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; uint64_t map_buffer[14]; if ((retval = dta_read_chunk(ctx, "", map_buffer, sizeof(map_buffer), "")) != READSTAT_OK) { goto cleanup; } ctx->data_offset = ctx->bswap ? byteswap8(map_buffer[9]) : map_buffer[9]; ctx->strls_offset = ctx->bswap ? byteswap8(map_buffer[10]) : map_buffer[10]; ctx->value_labels_offset = ctx->bswap ? byteswap8(map_buffer[11]) : map_buffer[11]; cleanup: return retval; } static readstat_error_t dta_read_descriptors(dta_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; size_t buffer_len = ctx->nvar * ctx->typlist_entry_len; unsigned char *buffer = NULL; int i; if (ctx->nvar && (buffer = readstat_malloc(buffer_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if ((retval = dta_read_chunk(ctx, "", buffer, buffer_len, "")) != READSTAT_OK) goto cleanup; if (ctx->typlist_entry_len == 1) { for (i=0; invar; i++) { ctx->typlist[i] = buffer[i]; } } else if (ctx->typlist_entry_len == 2) { memcpy(ctx->typlist, buffer, buffer_len); if (ctx->bswap) { for (i=0; invar; i++) { ctx->typlist[i] = byteswap2(ctx->typlist[i]); } } } if ((retval = dta_read_chunk(ctx, "", ctx->varlist, ctx->varlist_len, "")) != READSTAT_OK) goto cleanup; if ((retval = dta_read_chunk(ctx, "", ctx->srtlist, ctx->srtlist_len, "")) != READSTAT_OK) goto cleanup; if ((retval = dta_read_chunk(ctx, "", ctx->fmtlist, ctx->fmtlist_len, "")) != READSTAT_OK) goto cleanup; if ((retval = dta_read_chunk(ctx, "", ctx->lbllist, ctx->lbllist_len, "")) != READSTAT_OK) goto cleanup; if ((retval = dta_read_chunk(ctx, "", ctx->variable_labels, ctx->variable_labels_len, "")) != READSTAT_OK) goto cleanup; cleanup: if (buffer) free(buffer); return retval; } static readstat_error_t dta_read_expansion_fields(dta_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; char *buffer = NULL; if (ctx->expansion_len_len == 0) return READSTAT_OK; if (ctx->file_is_xmlish && !ctx->handle.note) { if (io->seek(ctx->data_offset, READSTAT_SEEK_SET, io->io_ctx) == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to data section (offset=%" PRId64 ")", ctx->data_offset); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } return READSTAT_ERROR_SEEK; } return READSTAT_OK; } retval = dta_read_tag(ctx, ""); if (retval != READSTAT_OK) goto cleanup; while (1) { size_t len; char data_type; if (ctx->file_is_xmlish) { char start[4]; if (io->read(start, sizeof(start), io->io_ctx) != sizeof(start)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(start, ""); if (retval != READSTAT_OK) goto cleanup; break; } else if (memcmp(start, "", sizeof(start)) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } data_type = 1; } else { if (io->read(&data_type, 1, io->io_ctx) != 1) { retval = READSTAT_ERROR_READ; goto cleanup; } } if (ctx->expansion_len_len == 2) { uint16_t len16; if (io->read(&len16, sizeof(uint16_t), io->io_ctx) != sizeof(uint16_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } len = ctx->bswap ? byteswap2(len16) : len16; } else { uint32_t len32; if (io->read(&len32, sizeof(uint32_t), io->io_ctx) != sizeof(uint32_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } len = ctx->bswap ? byteswap4(len32) : len32; } if (data_type == 0 && len == 0) break; if (data_type != 1 || len > (1<<20)) { retval = READSTAT_ERROR_NOTE_IS_TOO_LONG; goto cleanup; } if (ctx->handle.note && len >= 2 * ctx->ch_metadata_len) { if ((buffer = readstat_realloc(buffer, len + 1)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } buffer[len] = '\0'; if (io->read(buffer, len, io->io_ctx) != len) { retval = READSTAT_ERROR_READ; goto cleanup; } int index = 0; if (strncmp(&buffer[0], "_dta", 4) == 0 && sscanf(&buffer[ctx->ch_metadata_len], "note%d", &index) == 1) { if (ctx->handle.note(index, &buffer[2*ctx->ch_metadata_len], ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } else { if (io->seek(len, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } retval = dta_read_tag(ctx, ""); if (retval != READSTAT_OK) goto cleanup; } cleanup: if (buffer) free(buffer); return retval; } static readstat_error_t dta_read_tag(dta_ctx_t *ctx, const char *tag) { readstat_error_t retval = READSTAT_OK; if (ctx->initialized && !ctx->file_is_xmlish) return retval; char buffer[256]; size_t len = strlen(tag); if (ctx->io->read(buffer, len, ctx->io->io_ctx) != len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (strncmp(buffer, tag, len) != 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: return retval; } static int dta_compare_strls(const void *elem1, const void *elem2) { const dta_strl_t *key = (const dta_strl_t *)elem1; const dta_strl_t *target = *(const dta_strl_t **)elem2; if (key->o == target->o) return key->v - target->v; return key->o - target->o; } static dta_strl_t dta_interpret_strl_vo_bytes(dta_ctx_t *ctx, const unsigned char *vo_bytes) { dta_strl_t strl = {0}; if (ctx->strl_v_len == 2) { if (ctx->endianness == READSTAT_ENDIAN_BIG) { strl.v = (vo_bytes[0] << 8) + vo_bytes[1]; strl.o = (((uint64_t)vo_bytes[2] << 40) + ((uint64_t)vo_bytes[3] << 32) + ((uint64_t)vo_bytes[4] << 24) + (vo_bytes[5] << 16) + (vo_bytes[6] << 8) + vo_bytes[7]); } else { strl.v = vo_bytes[0] + (vo_bytes[1] << 8); strl.o = (vo_bytes[2] + (vo_bytes[3] << 8) + (vo_bytes[4] << 16) + ((uint64_t)vo_bytes[5] << 24) + ((uint64_t)vo_bytes[6] << 32) + ((uint64_t)vo_bytes[7] << 40)); } } else if (ctx->strl_v_len == 4) { uint32_t v, o; memcpy(&v, &vo_bytes[0], sizeof(uint32_t)); memcpy(&o, &vo_bytes[4], sizeof(uint32_t)); strl.v = ctx->bswap ? byteswap4(v) : v; strl.o = ctx->bswap ? byteswap4(o) : o; } return strl; } static readstat_error_t dta_117_read_strl(dta_ctx_t *ctx, dta_strl_t *strl) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; dta_117_strl_header_t header; if (io->read(&header, SIZEOF_DTA_117_STRL_HEADER_T, io->io_ctx) != SIZEOF_DTA_117_STRL_HEADER_T) { retval = READSTAT_ERROR_READ; goto cleanup; } strl->v = ctx->bswap ? byteswap4(header.v) : header.v; strl->o = ctx->bswap ? byteswap4(header.o) : header.o; strl->type = header.type; strl->len = ctx->bswap ? byteswap4(header.len) : header.len; cleanup: return retval; } static readstat_error_t dta_118_read_strl(dta_ctx_t *ctx, dta_strl_t *strl) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; dta_118_strl_header_t header; if (io->read(&header, SIZEOF_DTA_118_STRL_HEADER_T, io->io_ctx) != SIZEOF_DTA_118_STRL_HEADER_T) { retval = READSTAT_ERROR_READ; goto cleanup; } strl->v = ctx->bswap ? byteswap4(header.v) : header.v; strl->o = ctx->bswap ? byteswap8(header.o) : header.o; strl->type = header.type; strl->len = ctx->bswap ? byteswap4(header.len) : header.len; cleanup: return retval; } static readstat_error_t dta_read_strl(dta_ctx_t *ctx, dta_strl_t *strl) { if (ctx->strl_o_len > 4) { return dta_118_read_strl(ctx, strl); } return dta_117_read_strl(ctx, strl); } static readstat_error_t dta_read_strls(dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (io->seek(ctx->strls_offset, READSTAT_SEEK_SET, io->io_ctx) == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to strls section (offset=%" PRId64 ")", ctx->strls_offset); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_SEEK; goto cleanup; } retval = dta_read_tag(ctx, ""); if (retval != READSTAT_OK) goto cleanup; ctx->strls_capacity = 100; ctx->strls = readstat_malloc(ctx->strls_capacity * sizeof(dta_strl_t *)); while (1) { char tag[3]; if (io->read(tag, sizeof(tag), io->io_ctx) != sizeof(tag)) { retval = READSTAT_ERROR_READ; goto cleanup; } if (memcmp(tag, "GSO", sizeof(tag)) == 0) { dta_strl_t strl; retval = dta_read_strl(ctx, &strl); if (retval != READSTAT_OK) goto cleanup; if (strl.type != DTA_GSO_TYPE_ASCII) continue; if (ctx->strls_count == ctx->strls_capacity) { ctx->strls_capacity *= 2; if ((ctx->strls = readstat_realloc(ctx->strls, sizeof(dta_strl_t *) * ctx->strls_capacity)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } } dta_strl_t *strl_ptr = readstat_malloc(sizeof(dta_strl_t) + strl.len); if (strl_ptr == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } memcpy(strl_ptr, &strl, sizeof(dta_strl_t)); ctx->strls[ctx->strls_count++] = strl_ptr; if (io->read(&strl_ptr->data[0], strl_ptr->len, io->io_ctx) != strl_ptr->len) { retval = READSTAT_ERROR_READ; goto cleanup; } } else if (memcmp(tag, ""); if (retval != READSTAT_OK) goto cleanup; break; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } } cleanup: return retval; } static readstat_value_t dta_interpret_int8_bytes(dta_ctx_t *ctx, const void *buf) { readstat_value_t value = { .type = READSTAT_TYPE_INT8 }; int8_t byte = 0; memcpy(&byte, buf, sizeof(int8_t)); if (ctx->machine_is_twos_complement) { byte = ones_to_twos_complement1(byte); } if (byte > ctx->max_int8) { if (ctx->supports_tagged_missing && byte > DTA_113_MISSING_INT8) { value.tag = 'a' + (byte - DTA_113_MISSING_INT8_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i8_value = byte; return value; } static readstat_value_t dta_interpret_int16_bytes(dta_ctx_t *ctx, const void *buf) { readstat_value_t value = { .type = READSTAT_TYPE_INT16 }; int16_t num = 0; memcpy(&num, buf, sizeof(int16_t)); if (ctx->bswap) { num = byteswap2(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement2(num); } if (num > ctx->max_int16) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT16) { value.tag = 'a' + (num - DTA_113_MISSING_INT16_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i16_value = num; return value; } static readstat_value_t dta_interpret_int32_bytes(dta_ctx_t *ctx, const void *buf) { readstat_value_t value = { .type = READSTAT_TYPE_INT32 }; int32_t num = 0; memcpy(&num, buf, sizeof(int32_t)); if (ctx->bswap) { num = byteswap4(num); } if (ctx->machine_is_twos_complement) { num = ones_to_twos_complement4(num); } if (num > ctx->max_int32) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_INT32) { value.tag = 'a' + (num - DTA_113_MISSING_INT32_A); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } value.v.i32_value = num; return value; } static readstat_value_t dta_interpret_float_bytes(dta_ctx_t *ctx, const void *buf) { readstat_value_t value = { .type = READSTAT_TYPE_FLOAT }; float f_num = NAN; int32_t num = 0; memcpy(&num, buf, sizeof(int32_t)); if (ctx->bswap) { num = byteswap4(num); } if (num > ctx->max_float) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_FLOAT) { value.tag = 'a' + ((num - DTA_113_MISSING_FLOAT_A) >> 11); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else { memcpy(&f_num, &num, sizeof(int32_t)); } value.v.float_value = f_num; return value; } static readstat_value_t dta_interpret_double_bytes(dta_ctx_t *ctx, const void *buf) { readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE }; double d_num = NAN; int64_t num = 0; memcpy(&num, buf, sizeof(int64_t)); if (ctx->bswap) { num = byteswap8(num); } if (num > ctx->max_double) { if (ctx->supports_tagged_missing && num > DTA_113_MISSING_DOUBLE) { value.tag = 'a' + ((num - DTA_113_MISSING_DOUBLE_A) >> 40); value.is_tagged_missing = 1; } else { value.is_system_missing = 1; } } else { memcpy(&d_num, &num, sizeof(int64_t)); } value.v.double_value = d_num; return value; } static readstat_error_t dta_handle_row(const unsigned char *buf, dta_ctx_t *ctx) { char str_buf[2048]; int j; readstat_off_t offset = 0; readstat_error_t retval = READSTAT_OK; for (j=0; jnvar; j++) { size_t max_len; readstat_value_t value = { { 0 } }; retval = dta_type_info(ctx->typlist[j], ctx, &max_len, &value.type); if (retval != READSTAT_OK) goto cleanup; if (ctx->variables[j]->skip) { offset += max_len; continue; } if (offset + max_len > ctx->record_len) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if (value.type == READSTAT_TYPE_STRING) { if (max_len == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } size_t str_len = strnlen((const char *)&buf[offset], max_len); retval = readstat_convert(str_buf, sizeof(str_buf), (const char *)&buf[offset], str_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; value.v.string_value = str_buf; } else if (value.type == READSTAT_TYPE_STRING_REF) { dta_strl_t key = dta_interpret_strl_vo_bytes(ctx, &buf[offset]); dta_strl_t **found = bsearch(&key, ctx->strls, ctx->strls_count, sizeof(dta_strl_t *), &dta_compare_strls); if (found) { value.v.string_value = (*found)->data; } value.type = READSTAT_TYPE_STRING; } else if (value.type == READSTAT_TYPE_INT8) { value = dta_interpret_int8_bytes(ctx, &buf[offset]); } else if (value.type == READSTAT_TYPE_INT16) { value = dta_interpret_int16_bytes(ctx, &buf[offset]); } else if (value.type == READSTAT_TYPE_INT32) { value = dta_interpret_int32_bytes(ctx, &buf[offset]); } else if (value.type == READSTAT_TYPE_FLOAT) { value = dta_interpret_float_bytes(ctx, &buf[offset]); } else if (value.type == READSTAT_TYPE_DOUBLE) { value = dta_interpret_double_bytes(ctx, &buf[offset]); } if (ctx->handle.value(ctx->current_row, ctx->variables[j], value, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } offset += max_len; } cleanup: return retval; } static readstat_error_t dta_handle_rows(dta_ctx_t *ctx) { readstat_io_t *io = ctx->io; unsigned char *buf = NULL; int i; readstat_error_t retval = READSTAT_OK; if (ctx->record_len && (buf = readstat_malloc(ctx->record_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->row_offset) { if (io->seek(ctx->record_len * ctx->row_offset, READSTAT_SEEK_CUR, io->io_ctx) == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } } for (i=0; irow_limit; i++) { if (io->read(buf, ctx->record_len, io->io_ctx) != ctx->record_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = dta_handle_row(buf, ctx)) != READSTAT_OK) { goto cleanup; } ctx->current_row++; if ((retval = dta_update_progress(ctx)) != READSTAT_OK) { goto cleanup; } } if (ctx->row_limit < ctx->nobs - ctx->row_offset) { if (io->seek(ctx->record_len * (ctx->nobs - ctx->row_offset - ctx->row_limit), READSTAT_SEEK_CUR, io->io_ctx) == -1) retval = READSTAT_ERROR_SEEK; } cleanup: if (buf) free(buf); return retval; } static readstat_error_t dta_read_data(dta_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; if (!ctx->handle.value) { return READSTAT_OK; } if (io->seek(ctx->data_offset, READSTAT_SEEK_SET, io->io_ctx) == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to data section (offset=%" PRId64 ")", ctx->data_offset); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) goto cleanup; if ((retval = dta_update_progress(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_handle_rows(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t dta_read_header(dta_ctx_t *ctx, dta_header_t *header) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = ctx->io; int bswap = 0; if (io->read(header, sizeof(dta_header_t), io->io_ctx) != sizeof(dta_header_t)) { retval = READSTAT_ERROR_READ; goto cleanup; } bswap = (header->byteorder == DTA_LOHI) ^ machine_is_little_endian(); header->nvar = bswap ? byteswap2(header->nvar) : header->nvar; header->nobs = bswap ? byteswap4(header->nobs) : header->nobs; cleanup: return retval; } static readstat_error_t dta_read_xmlish_header(dta_ctx_t *ctx, dta_header64_t *header) { readstat_error_t retval = READSTAT_OK; if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_read_tag(ctx, "
")) != READSTAT_OK) { goto cleanup; } char ds_format[3]; if ((retval = dta_read_chunk(ctx, "", ds_format, sizeof(ds_format), "")) != READSTAT_OK) { goto cleanup; } header->ds_format = 100 * (ds_format[0] - '0') + 10 * (ds_format[1] - '0') + (ds_format[2] - '0'); char byteorder[3]; int byteswap = 0; if ((retval = dta_read_chunk(ctx, "", byteorder, sizeof(byteorder), "")) != READSTAT_OK) { goto cleanup; } if (strncmp(byteorder, "MSF", 3) == 0) { header->byteorder = DTA_HILO; } else if (strncmp(byteorder, "LSF", 3) == 0) { header->byteorder = DTA_LOHI; } else { retval = READSTAT_ERROR_PARSE; goto cleanup; } byteswap = (header->byteorder == DTA_LOHI) ^ machine_is_little_endian(); if (header->ds_format >= 119) { uint32_t nvar; if ((retval = dta_read_chunk(ctx, "", &nvar, sizeof(uint32_t), "")) != READSTAT_OK) { goto cleanup; } header->nvar = byteswap ? byteswap4(nvar) : nvar; } else { uint16_t nvar; if ((retval = dta_read_chunk(ctx, "", &nvar, sizeof(uint16_t), "")) != READSTAT_OK) { goto cleanup; } header->nvar = byteswap ? byteswap2(nvar) : nvar; } if (header->ds_format >= 118) { uint64_t nobs; if ((retval = dta_read_chunk(ctx, "", &nobs, sizeof(uint64_t), "")) != READSTAT_OK) { goto cleanup; } header->nobs = byteswap ? byteswap8(nobs) : nobs; } else { uint32_t nobs; if ((retval = dta_read_chunk(ctx, "", &nobs, sizeof(uint32_t), "")) != READSTAT_OK) { goto cleanup; } header->nobs = byteswap ? byteswap4(nobs) : nobs; } cleanup: return retval; } static readstat_error_t dta_read_label_and_timestamp(dta_ctx_t *ctx) { readstat_io_t *io = ctx->io; readstat_error_t retval = READSTAT_OK; char *data_label_buffer = NULL; char *timestamp_buffer = NULL; uint16_t label_len = 0; unsigned char timestamp_len = 0; char last_data_label_char = 0; struct tm timestamp = { .tm_isdst = -1 }; if (ctx->file_is_xmlish) { if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } if (io->read(×tamp_len, 1, io->io_ctx) != 1) { retval = READSTAT_ERROR_READ; goto cleanup; } } else { timestamp_len = ctx->timestamp_len; } if (timestamp_len) { timestamp_buffer = readstat_malloc(timestamp_len); if (io->read(timestamp_buffer, timestamp_len, io->io_ctx) != timestamp_len) { retval = READSTAT_ERROR_READ; goto cleanup; } if (!ctx->file_is_xmlish) timestamp_len--; if (timestamp_buffer[0]) { if (timestamp_buffer[timestamp_len-1] == '\0' && last_data_label_char != '\0') { /* Stupid hack for miswritten files with off-by-one timestamp, DTA 114 era? */ memmove(timestamp_buffer+1, timestamp_buffer, timestamp_len-1); timestamp_buffer[0] = last_data_label_char; } if (dta_parse_timestamp(timestamp_buffer, timestamp_len, ×tamp, ctx->handle.error, ctx->user_ctx) == READSTAT_OK) { ctx->timestamp = mktime(×tamp); } } } if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } cleanup: if (data_label_buffer) free(data_label_buffer); if (timestamp_buffer) free(timestamp_buffer); return retval; } static readstat_error_t dta_handle_variables(dta_ctx_t *ctx) { if (!ctx->handle.variable) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; int i; int index_after_skipping = 0; for (i=0; invar; i++) { size_t max_len; readstat_type_t type; retval = dta_type_info(ctx->typlist[i], ctx, &max_len, &type); if (retval != READSTAT_OK) goto cleanup; if (type == READSTAT_TYPE_STRING) max_len++; /* might append NULL */ if (type == READSTAT_TYPE_STRING_REF) { type = READSTAT_TYPE_STRING; max_len = 0; } ctx->variables[i] = dta_init_variable(ctx, i, index_after_skipping, type, max_len); const char *value_labels = NULL; if (ctx->lbllist[ctx->lbllist_entry_len*i]) value_labels = &ctx->lbllist[ctx->lbllist_entry_len*i]; int cb_retval = ctx->handle.variable(i, ctx->variables[i], value_labels, ctx->user_ctx); if (cb_retval == READSTAT_HANDLER_ABORT) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { ctx->variables[i]->skip = 1; } else { index_after_skipping++; } } cleanup: return retval; } static readstat_error_t dta_handle_value_labels(dta_ctx_t *ctx) { readstat_io_t *io = ctx->io; readstat_error_t retval = READSTAT_OK; char *table_buffer = NULL; char *utf8_buffer = NULL; if (io->seek(ctx->value_labels_offset, READSTAT_SEEK_SET, io->io_ctx) == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to value labels section (offset=%" PRId64 ")", ctx->value_labels_offset); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_SEEK; goto cleanup; } if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } if (!ctx->handle.value_label) { return READSTAT_OK; } while (1) { size_t len = 0; char labname[129]; uint32_t i = 0, n = 0; if (ctx->value_label_table_len_len == 2) { int16_t table_header_len; if (io->read(&table_header_len, sizeof(int16_t), io->io_ctx) < sizeof(int16_t)) break; len = table_header_len; if (ctx->bswap) len = byteswap2(table_header_len); n = len / 8; } else { if (dta_read_tag(ctx, "") != READSTAT_OK) { break; } int32_t table_header_len; if (io->read(&table_header_len, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) break; len = table_header_len; if (ctx->bswap) len = byteswap4(table_header_len); } if (io->read(labname, ctx->value_label_table_labname_len, io->io_ctx) < ctx->value_label_table_labname_len) break; if (io->seek(ctx->value_label_table_padding_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) break; if ((table_buffer = readstat_realloc(table_buffer, len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (io->read(table_buffer, len, io->io_ctx) < len) { break; } if (ctx->value_label_table_len_len == 2) { for (i=0; iconverter); if (retval != READSTAT_OK) goto cleanup; if (label_buf[0] && ctx->handle.value_label(labname, value, label_buf, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } else if (len >= 8) { if ((retval = dta_read_tag(ctx, "")) != READSTAT_OK) { goto cleanup; } n = *(uint32_t *)table_buffer; uint32_t txtlen = *((uint32_t *)table_buffer+1); if (ctx->bswap) { n = byteswap4(n); txtlen = byteswap4(txtlen); } if (txtlen > len - 8 || n > (len - 8 - txtlen) / 8) { break; } uint32_t *off = (uint32_t *)table_buffer+2; uint32_t *val = (uint32_t *)table_buffer+2+n; char *txt = &table_buffer[8LL*n+8]; size_t utf8_buffer_len = 4*txtlen+1; if (txtlen > MAX_VALUE_LABEL_LEN+1) utf8_buffer_len = 4*MAX_VALUE_LABEL_LEN+1; utf8_buffer = realloc(utf8_buffer, utf8_buffer_len); /* Much bigger than we need but whatever */ if (utf8_buffer == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (ctx->bswap) { for (i=0; i= txtlen) { retval = READSTAT_ERROR_PARSE; goto cleanup; } readstat_value_t value = dta_interpret_int32_bytes(ctx, &val[i]); size_t max_label_len = txtlen - off[i]; if (max_label_len > MAX_VALUE_LABEL_LEN) max_label_len = MAX_VALUE_LABEL_LEN; size_t label_len = strnlen(&txt[off[i]], max_label_len); retval = readstat_convert(utf8_buffer, utf8_buffer_len, &txt[off[i]], label_len, ctx->converter); if (retval != READSTAT_OK) goto cleanup; if (ctx->handle.value_label(labname, value, utf8_buffer, ctx->user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } } } cleanup: if (table_buffer) free(table_buffer); if (utf8_buffer) free(utf8_buffer); return retval; } readstat_error_t readstat_parse_dta(readstat_parser_t *parser, const char *path, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int i; dta_ctx_t *ctx; size_t file_size = 0; ctx = dta_ctx_alloc(io); if (io->open(path, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } char magic[4]; if (io->read(magic, 4, io->io_ctx) != 4) { retval = READSTAT_ERROR_READ; goto cleanup; } file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx); if (file_size == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to end of file"); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_SEEK; goto cleanup; } if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) { if (ctx->handle.error) { snprintf(ctx->error_buf, sizeof(ctx->error_buf), "Failed to seek to start of file"); ctx->handle.error(ctx->error_buf, ctx->user_ctx); } retval = READSTAT_ERROR_SEEK; goto cleanup; } if (strncmp(magic, "input_encoding, parser->output_encoding); } else { dta_header_t header; if ((retval = dta_read_header(ctx, &header)) != READSTAT_OK) { goto cleanup; } retval = dta_ctx_init(ctx, header.nvar, header.nobs, header.byteorder, header.ds_format, parser->input_encoding, parser->output_encoding); } if (retval != READSTAT_OK) { goto cleanup; } ctx->user_ctx = user_ctx; ctx->file_size = file_size; ctx->handle = parser->handlers; if (parser->row_offset > 0) ctx->row_offset = parser->row_offset; int64_t nobs_after_skipping = ctx->nobs - ctx->row_offset; if (nobs_after_skipping < 0) { nobs_after_skipping = 0; ctx->row_offset = ctx->nobs; } ctx->row_limit = nobs_after_skipping; if (parser->row_limit > 0 && parser->row_limit < nobs_after_skipping) ctx->row_limit = parser->row_limit; retval = dta_update_progress(ctx); if (retval != READSTAT_OK) goto cleanup; if ((retval = dta_read_label_and_timestamp(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_read_tag(ctx, "
")) != READSTAT_OK) { goto cleanup; } if (ctx->handle.metadata) { readstat_metadata_t metadata = { .row_count = ctx->row_limit, .var_count = ctx->nvar, .file_label = ctx->data_label, .creation_time = ctx->timestamp, .modified_time = ctx->timestamp, .file_format_version = ctx->ds_format, .is64bit = ctx->ds_format >= 118, .endianness = ctx->endianness }; if (ctx->handle.metadata(&metadata, user_ctx) != READSTAT_HANDLER_OK) { retval = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if ((retval = dta_read_map(ctx)) != READSTAT_OK) { retval = READSTAT_ERROR_READ; goto cleanup; } if ((retval = dta_read_descriptors(ctx)) != READSTAT_OK) { goto cleanup; } for (i=0; invar; i++) { size_t max_len; if ((retval = dta_type_info(ctx->typlist[i], ctx, &max_len, NULL)) != READSTAT_OK) goto cleanup; ctx->record_len += max_len; } if ((ctx->nvar > 0 || ctx->nobs > 0) && ctx->record_len == 0) { retval = READSTAT_ERROR_PARSE; goto cleanup; } if ((retval = dta_handle_variables(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_read_expansion_fields(ctx)) != READSTAT_OK) goto cleanup; if (!ctx->file_is_xmlish) { ctx->data_offset = io->seek(0, READSTAT_SEEK_CUR, io->io_ctx); if (ctx->data_offset == -1) { retval = READSTAT_ERROR_SEEK; goto cleanup; } ctx->value_labels_offset = ctx->data_offset + ctx->record_len * ctx->nobs; } if ((retval = dta_read_strls(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_read_data(ctx)) != READSTAT_OK) goto cleanup; if ((retval = dta_handle_value_labels(ctx)) != READSTAT_OK) goto cleanup; cleanup: io->close(io->io_ctx); if (ctx) dta_ctx_free(ctx); return retval; } ReadStat-1.1.7/src/stata/readstat_dta_write.c000066400000000000000000001417641410722155500211660ustar00rootroot00000000000000 #include #include #include #include #include #include #include "../readstat.h" #include "../readstat_bits.h" #include "../readstat_iconv.h" #include "../readstat_writer.h" #include "readstat_dta.h" #define DTA_DEFAULT_DISPLAY_WIDTH_BYTE 8 #define DTA_DEFAULT_DISPLAY_WIDTH_INT16 8 #define DTA_DEFAULT_DISPLAY_WIDTH_INT32 12 #define DTA_DEFAULT_DISPLAY_WIDTH_FLOAT 9 #define DTA_DEFAULT_DISPLAY_WIDTH_DOUBLE 10 #define DTA_DEFAULT_DISPLAY_WIDTH_STRING 9 #define DTA_FILE_VERSION_MIN 104 #define DTA_FILE_VERSION_MAX 119 #define DTA_FILE_VERSION_DEFAULT 118 #define DTA_OLD_MAX_WIDTH 128 #define DTA_111_MAX_WIDTH 244 #define DTA_117_MAX_WIDTH 2045 #define DTA_OLD_MAX_NAME_LEN 9 #define DTA_110_MAX_NAME_LEN 33 #define DTA_118_MAX_NAME_LEN 129 static readstat_error_t dta_113_write_missing_numeric(void *row, const readstat_variable_t *var); static readstat_error_t dta_write_tag(readstat_writer_t *writer, dta_ctx_t *ctx, const char *tag) { if (!ctx->file_is_xmlish) return READSTAT_OK; return readstat_write_string(writer, tag); } static readstat_error_t dta_write_chunk(readstat_writer_t *writer, dta_ctx_t *ctx, const char *start_tag, const void *bytes, size_t len, const char *end_tag) { readstat_error_t error = READSTAT_OK; if ((error = dta_write_tag(writer, ctx, start_tag)) != READSTAT_OK) goto cleanup; if ((error = readstat_write_bytes(writer, bytes, len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, end_tag)) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_header_data_label(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; char *data_label = NULL; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: if (data_label) free(data_label); return error; } static readstat_error_t dta_emit_header_time_stamp(readstat_writer_t *writer, dta_ctx_t *ctx) { if (!ctx->timestamp_len) return READSTAT_OK; readstat_error_t error = READSTAT_OK; time_t now = writer->timestamp; struct tm *time_s = localtime(&now); char *timestamp = calloc(1, ctx->timestamp_len); /* There are locale/portability issues with strftime so hack something up */ char months[][4] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; if (!time_s) { error = READSTAT_ERROR_BAD_TIMESTAMP_VALUE; goto cleanup; } if (!timestamp) { error = READSTAT_ERROR_MALLOC; goto cleanup; } uint8_t actual_timestamp_len = snprintf(timestamp, ctx->timestamp_len, "%02d %3s %04d %02d:%02d", time_s->tm_mday, months[time_s->tm_mon], time_s->tm_year + 1900, time_s->tm_hour, time_s->tm_min); if (actual_timestamp_len == 0) { error = READSTAT_ERROR_WRITE; goto cleanup; } if (ctx->file_is_xmlish) { if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; if ((error = readstat_write_bytes(writer, &actual_timestamp_len, sizeof(uint8_t))) != READSTAT_OK) goto cleanup; if ((error = readstat_write_bytes(writer, timestamp, actual_timestamp_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; } else { error = readstat_write_bytes(writer, timestamp, ctx->timestamp_len); } cleanup: free(timestamp); return error; } static readstat_error_t dta_111_typecode_for_variable(readstat_variable_t *r_variable, uint16_t *out_typecode) { readstat_error_t retval = READSTAT_OK; size_t max_len = r_variable->storage_width; uint16_t typecode = 0; switch (r_variable->type) { case READSTAT_TYPE_INT8: typecode = DTA_111_TYPE_CODE_INT8; break; case READSTAT_TYPE_INT16: typecode = DTA_111_TYPE_CODE_INT16; break; case READSTAT_TYPE_INT32: typecode = DTA_111_TYPE_CODE_INT32; break; case READSTAT_TYPE_FLOAT: typecode = DTA_111_TYPE_CODE_FLOAT; break; case READSTAT_TYPE_DOUBLE: typecode = DTA_111_TYPE_CODE_DOUBLE; break; case READSTAT_TYPE_STRING: typecode = max_len; break; case READSTAT_TYPE_STRING_REF: retval = READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED; break; } if (out_typecode && retval == READSTAT_OK) *out_typecode = typecode; return retval; } static readstat_error_t dta_117_typecode_for_variable(readstat_variable_t *r_variable, uint16_t *out_typecode) { readstat_error_t retval = READSTAT_OK; size_t max_len = r_variable->storage_width; uint16_t typecode = 0; switch (r_variable->type) { case READSTAT_TYPE_INT8: typecode = DTA_117_TYPE_CODE_INT8; break; case READSTAT_TYPE_INT16: typecode = DTA_117_TYPE_CODE_INT16; break; case READSTAT_TYPE_INT32: typecode = DTA_117_TYPE_CODE_INT32; break; case READSTAT_TYPE_FLOAT: typecode = DTA_117_TYPE_CODE_FLOAT; break; case READSTAT_TYPE_DOUBLE: typecode = DTA_117_TYPE_CODE_DOUBLE; break; case READSTAT_TYPE_STRING: typecode = max_len; break; case READSTAT_TYPE_STRING_REF: typecode = DTA_117_TYPE_CODE_STRL; break; } if (out_typecode) *out_typecode = typecode; return retval; } static readstat_error_t dta_old_typecode_for_variable(readstat_variable_t *r_variable, uint16_t *out_typecode) { readstat_error_t retval = READSTAT_OK; size_t max_len = r_variable->storage_width; uint16_t typecode = 0; switch (r_variable->type) { case READSTAT_TYPE_INT8: typecode = DTA_OLD_TYPE_CODE_INT8; break; case READSTAT_TYPE_INT16: typecode = DTA_OLD_TYPE_CODE_INT16; break; case READSTAT_TYPE_INT32: typecode = DTA_OLD_TYPE_CODE_INT32; break; case READSTAT_TYPE_FLOAT: typecode = DTA_OLD_TYPE_CODE_FLOAT; break; case READSTAT_TYPE_DOUBLE: typecode = DTA_OLD_TYPE_CODE_DOUBLE; break; case READSTAT_TYPE_STRING: typecode = max_len + 0x7F; break; case READSTAT_TYPE_STRING_REF: retval = READSTAT_ERROR_STRING_REFS_NOT_SUPPORTED; break; } if (out_typecode && retval == READSTAT_OK) *out_typecode = typecode; return retval; } static readstat_error_t dta_typecode_for_variable(readstat_variable_t *r_variable, int typlist_version, uint16_t *typecode) { if (typlist_version == 111) { return dta_111_typecode_for_variable(r_variable, typecode); } if (typlist_version == 117) { return dta_117_typecode_for_variable(r_variable, typecode); } return dta_old_typecode_for_variable(r_variable, typecode); } static readstat_error_t dta_emit_typlist(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; for (i=0; invar; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); uint16_t typecode = 0; error = dta_typecode_for_variable(r_variable, ctx->typlist_version, &typecode); if (error != READSTAT_OK) goto cleanup; ctx->typlist[i] = typecode; } for (i=0; invar; i++) { if (ctx->typlist_entry_len == 1) { uint8_t byte = ctx->typlist[i]; error = readstat_write_bytes(writer, &byte, sizeof(uint8_t)); } else if (ctx->typlist_entry_len == 2) { uint16_t val = ctx->typlist[i]; error = readstat_write_bytes(writer, &val, sizeof(uint16_t)); } if (error != READSTAT_OK) goto cleanup; } if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_validate_name_chars(const char *name, int unicode) { /* TODO check Unicode class */ int j; for (j=0; name[j]; j++) { if ((name[j] > 0 || !unicode) && name[j] != '_' && !(name[j] >= 'a' && name[j] <= 'z') && !(name[j] >= 'A' && name[j] <= 'Z') && !(name[j] >= '0' && name[j] <= '9')) { return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; } } char first_char = name[0]; if ((first_char > 0 || !unicode) && first_char != '_' && !(first_char >= 'a' && first_char <= 'z') && !(first_char >= 'A' && first_char <= 'Z')) { return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER; } return READSTAT_OK; } static readstat_error_t dta_validate_name_unreserved(const char *name) { if (strcmp(name, "_all") == 0 || strcmp(name, "_b") == 0 || strcmp(name, "byte") == 0 || strcmp(name, "_coef") == 0 || strcmp(name, "_cons") == 0 || strcmp(name, "double") == 0 || strcmp(name, "float") == 0 || strcmp(name, "if") == 0 || strcmp(name, "in") == 0 || strcmp(name, "int") == 0 || strcmp(name, "long") == 0 || strcmp(name, "_n") == 0 || strcmp(name, "_N") == 0 || strcmp(name, "_pi") == 0 || strcmp(name, "_pred") == 0 || strcmp(name, "_rc") == 0 || strcmp(name, "_skip") == 0 || strcmp(name, "strL") == 0 || strcmp(name, "using") == 0 || strcmp(name, "with") == 0) { return READSTAT_ERROR_NAME_IS_RESERVED_WORD; } int len; if (sscanf(name, "str%d", &len) == 1) return READSTAT_ERROR_NAME_IS_RESERVED_WORD; return READSTAT_OK; } static readstat_error_t dta_validate_name(const char *name, int unicode, size_t max_len) { readstat_error_t error = READSTAT_OK; if (strlen(name) > max_len) return READSTAT_ERROR_NAME_IS_TOO_LONG; if (strlen(name) == 0) return READSTAT_ERROR_NAME_IS_ZERO_LENGTH; if ((error = dta_validate_name_chars(name, unicode)) != READSTAT_OK) return error; return dta_validate_name_unreserved(name); } static readstat_error_t dta_old_variable_ok(const readstat_variable_t *variable) { return dta_validate_name(readstat_variable_get_name(variable), 0, DTA_OLD_MAX_NAME_LEN); } static readstat_error_t dta_110_variable_ok(const readstat_variable_t *variable) { return dta_validate_name(readstat_variable_get_name(variable), 0, DTA_110_MAX_NAME_LEN); } static readstat_error_t dta_118_variable_ok(const readstat_variable_t *variable) { return dta_validate_name(readstat_variable_get_name(variable), 1, DTA_118_MAX_NAME_LEN); } static readstat_error_t dta_emit_varlist(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; for (i=0; invar; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); strncpy(&ctx->varlist[ctx->variable_name_len*i], r_variable->name, ctx->variable_name_len); } if ((error = readstat_write_bytes(writer, ctx->varlist, ctx->varlist_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_srtlist(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; memset(ctx->srtlist, '\0', ctx->srtlist_len); if ((error = readstat_write_bytes(writer, ctx->srtlist, ctx->srtlist_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_fmtlist(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; for (i=0; invar; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); if (r_variable->format[0]) { strncpy(&ctx->fmtlist[ctx->fmtlist_entry_len*i], r_variable->format, ctx->fmtlist_entry_len); } else { char format_letter = 'g'; int display_width = r_variable->display_width; if (readstat_type_class(r_variable->type) == READSTAT_TYPE_CLASS_STRING) { format_letter = 's'; } if (!display_width) { if (r_variable->type == READSTAT_TYPE_INT8) { display_width = DTA_DEFAULT_DISPLAY_WIDTH_BYTE; } else if (r_variable->type == READSTAT_TYPE_INT16) { display_width = DTA_DEFAULT_DISPLAY_WIDTH_INT16; } else if (r_variable->type == READSTAT_TYPE_INT32) { display_width = DTA_DEFAULT_DISPLAY_WIDTH_INT32; } else if (r_variable->type == READSTAT_TYPE_FLOAT) { display_width = DTA_DEFAULT_DISPLAY_WIDTH_FLOAT; } else if (r_variable->type == READSTAT_TYPE_DOUBLE) { display_width = DTA_DEFAULT_DISPLAY_WIDTH_DOUBLE; } else { display_width = DTA_DEFAULT_DISPLAY_WIDTH_STRING; } } char format[64]; if (format_letter == 'g') { sprintf(format, "%%%s%d.0g", r_variable->alignment == READSTAT_ALIGNMENT_LEFT ? "-" : "", display_width); } else { sprintf(format, "%%%s%ds", r_variable->alignment == READSTAT_ALIGNMENT_LEFT ? "-" : "", display_width); } strncpy(&ctx->fmtlist[ctx->fmtlist_entry_len*i], format, ctx->fmtlist_entry_len); } } if ((error = readstat_write_bytes(writer, ctx->fmtlist, ctx->fmtlist_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_lbllist(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; for (i=0; invar; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); if (r_variable->label_set) { strncpy(&ctx->lbllist[ctx->lbllist_entry_len*i], r_variable->label_set->name, ctx->lbllist_entry_len); } else { memset(&ctx->lbllist[ctx->lbllist_entry_len*i], '\0', ctx->lbllist_entry_len); } } if ((error = readstat_write_bytes(writer, ctx->lbllist, ctx->lbllist_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_descriptors(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; error = dta_emit_typlist(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_varlist(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_srtlist(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_fmtlist(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_lbllist(writer, ctx); if (error != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_variable_labels(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; for (i=0; invar; i++) { readstat_variable_t *r_variable = readstat_get_variable(writer, i); strncpy(&ctx->variable_labels[ctx->variable_labels_entry_len*i], r_variable->label, ctx->variable_labels_entry_len); } if ((error = readstat_write_bytes(writer, ctx->variable_labels, ctx->variable_labels_len)) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_characteristics(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; int i; char *buffer = NULL; if (ctx->expansion_len_len == 0) return READSTAT_OK; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) return error; buffer = malloc(ctx->ch_metadata_len); for (i=0; inotes_count; i++) { if (ctx->file_is_xmlish) { error = dta_write_tag(writer, ctx, ""); } else { char data_type = 1; error = readstat_write_bytes(writer, &data_type, 1); } if (error != READSTAT_OK) goto cleanup; size_t len = strlen(writer->notes[i]); if (ctx->expansion_len_len == 2) { int16_t len16 = 2*ctx->ch_metadata_len + len + 1; error = readstat_write_bytes(writer, &len16, sizeof(len16)); } else if (ctx->expansion_len_len == 4) { int32_t len32 = 2*ctx->ch_metadata_len + len + 1; error = readstat_write_bytes(writer, &len32, sizeof(len32)); } if (error != READSTAT_OK) goto cleanup; strncpy(buffer, "_dta", ctx->ch_metadata_len); error = readstat_write_bytes(writer, buffer, ctx->ch_metadata_len); if (error != READSTAT_OK) goto cleanup; snprintf(buffer, ctx->ch_metadata_len, "note%d", i+1); error = readstat_write_bytes(writer, buffer, ctx->ch_metadata_len); if (error != READSTAT_OK) goto cleanup; error = readstat_write_bytes(writer, writer->notes[i], len + 1); if (error != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; } if (ctx->file_is_xmlish) { error = dta_write_tag(writer, ctx, ""); } else { error = readstat_write_zeros(writer, 1 + ctx->expansion_len_len); } if (error != READSTAT_OK) goto cleanup; cleanup: free(buffer); return error; } static readstat_error_t dta_117_emit_strl_header(readstat_writer_t *writer, readstat_string_ref_t *ref) { dta_117_strl_header_t header = { .v = ref->first_v, .o = ref->first_o, .type = DTA_GSO_TYPE_ASCII, .len = ref->len }; return readstat_write_bytes(writer, &header, SIZEOF_DTA_117_STRL_HEADER_T); } static readstat_error_t dta_118_emit_strl_header(readstat_writer_t *writer, readstat_string_ref_t *ref) { dta_118_strl_header_t header = { .v = ref->first_v, .o = ref->first_o, .type = DTA_GSO_TYPE_ASCII, .len = ref->len }; return readstat_write_bytes(writer, &header, SIZEOF_DTA_118_STRL_HEADER_T); } static readstat_error_t dta_emit_strls(readstat_writer_t *writer, dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return READSTAT_OK; readstat_error_t retval = READSTAT_OK; retval = readstat_write_string(writer, ""); if (retval != READSTAT_OK) goto cleanup; int i; for (i=0; istring_refs_count; i++) { readstat_string_ref_t *ref = writer->string_refs[i]; retval = readstat_write_string(writer, "GSO"); if (retval != READSTAT_OK) goto cleanup; if (ctx->strl_o_len > 4) { retval = dta_118_emit_strl_header(writer, ref); } else { retval = dta_117_emit_strl_header(writer, ref); } if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &ref->data[0], ref->len); if (retval != READSTAT_OK) goto cleanup; } retval = readstat_write_string(writer, ""); if (retval != READSTAT_OK) goto cleanup; cleanup: return retval; } static readstat_error_t dta_old_emit_value_labels(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t retval = READSTAT_OK; int i, j; char labname[12+2]; char *label_buffer = NULL; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); int32_t max_value = 0; for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); if (value_label->tag) { retval = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED; goto cleanup; } if (value_label->int32_key < 0 || value_label->int32_key > 1024) { retval = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; goto cleanup; } if (value_label->int32_key > max_value) { max_value = value_label->int32_key; } } int16_t table_len = 8*(max_value + 1); retval = readstat_write_bytes(writer, &table_len, sizeof(int16_t)); if (retval != READSTAT_OK) goto cleanup; memset(labname, 0, sizeof(labname)); strncpy(labname, r_label_set->name, ctx->value_label_table_labname_len); retval = readstat_write_bytes(writer, labname, ctx->value_label_table_labname_len + ctx->value_label_table_padding_len); if (retval != READSTAT_OK) goto cleanup; label_buffer = realloc(label_buffer, table_len); memset(label_buffer, 0, table_len); for (j=0; jvalue_labels_count; j++) { readstat_value_label_t *value_label = readstat_get_value_label(r_label_set, j); size_t len = value_label->label_len; if (len > 8) len = 8; memcpy(&label_buffer[8*value_label->int32_key], value_label->label, len); } retval = readstat_write_bytes(writer, label_buffer, table_len); if (retval != READSTAT_OK) goto cleanup; } cleanup: if (label_buffer) free(label_buffer); return retval; } static int dta_compare_value_labels(const readstat_value_label_t *vl1, const readstat_value_label_t *vl2) { if (vl1->tag) { if (vl2->tag) { return vl1->tag - vl2->tag; } return 1; } if (vl2->tag) { return -1; } return vl1->int32_key - vl2->int32_key; } static readstat_error_t dta_emit_value_labels(readstat_writer_t *writer, dta_ctx_t *ctx) { if (ctx->value_label_table_len_len == 2) return dta_old_emit_value_labels(writer, ctx); readstat_error_t retval = READSTAT_OK; int i, j; int32_t *off = NULL; int32_t *val = NULL; char *txt = NULL; char *labname = calloc(1, ctx->value_label_table_labname_len + ctx->value_label_table_padding_len); retval = dta_write_tag(writer, ctx, ""); if (retval != READSTAT_OK) goto cleanup; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); int32_t n = r_label_set->value_labels_count; int32_t txtlen = 0; for (j=0; jlabel_len + 1; } retval = dta_write_tag(writer, ctx, ""); if (retval != READSTAT_OK) goto cleanup; int32_t table_len = 8 + 8*n + txtlen; retval = readstat_write_bytes(writer, &table_len, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; strncpy(labname, r_label_set->name, ctx->value_label_table_labname_len); retval = readstat_write_bytes(writer, labname, ctx->value_label_table_labname_len + ctx->value_label_table_padding_len); if (retval != READSTAT_OK) goto cleanup; if (txtlen == 0) { retval = readstat_write_bytes(writer, &txtlen, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &txtlen, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = dta_write_tag(writer, ctx, ""); if (retval != READSTAT_OK) goto cleanup; continue; } off = realloc(off, 4*n); val = realloc(val, 4*n); txt = realloc(txt, txtlen); readstat_off_t offset = 0; readstat_sort_label_set(r_label_set, &dta_compare_value_labels); for (j=0; jlabel; size_t label_data_len = value_label->label_len; off[j] = offset; if (value_label->tag) { if (writer->version < 113) { retval = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED; goto cleanup; } val[j] = DTA_113_MISSING_INT32_A + (value_label->tag - 'a'); } else { val[j] = value_label->int32_key; } memcpy(txt + offset, label, label_data_len); offset += label_data_len; txt[offset++] = '\0'; } retval = readstat_write_bytes(writer, &n, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, &txtlen, sizeof(int32_t)); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, off, 4*n); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, val, 4*n); if (retval != READSTAT_OK) goto cleanup; retval = readstat_write_bytes(writer, txt, txtlen); if (retval != READSTAT_OK) goto cleanup; retval = dta_write_tag(writer, ctx, ""); if (retval != READSTAT_OK) goto cleanup; } retval = dta_write_tag(writer, ctx, ""); if (retval != READSTAT_OK) goto cleanup; cleanup: if (off) free(off); if (val) free(val); if (txt) free(txt); if (labname) free(labname); return retval; } static size_t dta_numeric_variable_width(readstat_type_t type, size_t user_width) { size_t len = 0; if (type == READSTAT_TYPE_DOUBLE) { len = 8; } else if (type == READSTAT_TYPE_FLOAT) { len = 4; } else if (type == READSTAT_TYPE_INT32) { len = 4; } else if (type == READSTAT_TYPE_INT16) { len = 2; } else if (type == READSTAT_TYPE_INT8) { len = 1; } return len; } static size_t dta_111_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { if (user_width > DTA_111_MAX_WIDTH || user_width == 0) user_width = DTA_111_MAX_WIDTH; return user_width; } return dta_numeric_variable_width(type, user_width); } static size_t dta_117_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { if (user_width > DTA_117_MAX_WIDTH || user_width == 0) user_width = DTA_117_MAX_WIDTH; return user_width; } if (type == READSTAT_TYPE_STRING_REF) return 8; return dta_numeric_variable_width(type, user_width); } static size_t dta_old_variable_width(readstat_type_t type, size_t user_width) { if (type == READSTAT_TYPE_STRING) { if (user_width > DTA_OLD_MAX_WIDTH || user_width == 0) user_width = DTA_OLD_MAX_WIDTH; return user_width; } return dta_numeric_variable_width(type, user_width); } static readstat_error_t dta_emit_xmlish_header(readstat_writer_t *writer, dta_ctx_t *ctx) { readstat_error_t error = READSTAT_OK; if ((error = dta_write_tag(writer, ctx, "")) != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "
")) != READSTAT_OK) goto cleanup; char release[128]; snprintf(release, sizeof(release), "%ld", writer->version); if ((error = readstat_write_string(writer, release)) != READSTAT_OK) goto cleanup; error = dta_write_chunk(writer, ctx, "", machine_is_little_endian() ? "LSF" : "MSF", sizeof("MSF")-1, ""); if (error != READSTAT_OK) goto cleanup; if (writer->version >= 119) { uint32_t nvar = writer->variables_count; error = dta_write_chunk(writer, ctx, "", &nvar, sizeof(uint32_t), ""); if (error != READSTAT_OK) goto cleanup; } else { uint16_t nvar = writer->variables_count; error = dta_write_chunk(writer, ctx, "", &nvar, sizeof(uint16_t), ""); if (error != READSTAT_OK) goto cleanup; } if (writer->version >= 118) { uint64_t nobs = writer->row_count; error = dta_write_chunk(writer, ctx, "", &nobs, sizeof(uint64_t), ""); if (error != READSTAT_OK) goto cleanup; } else { uint32_t nobs = writer->row_count; error = dta_write_chunk(writer, ctx, "", &nobs, sizeof(uint32_t), ""); if (error != READSTAT_OK) goto cleanup; } error = dta_emit_header_data_label(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_header_time_stamp(writer, ctx); if (error != READSTAT_OK) goto cleanup; if ((error = dta_write_tag(writer, ctx, "
")) != READSTAT_OK) goto cleanup; cleanup: return error; } static readstat_error_t dta_emit_header(readstat_writer_t *writer, dta_ctx_t *ctx) { if (ctx->file_is_xmlish) { return dta_emit_xmlish_header(writer, ctx); } readstat_error_t error = READSTAT_OK; dta_header_t header = {0}; header.ds_format = writer->version; header.byteorder = machine_is_little_endian() ? DTA_LOHI : DTA_HILO; header.filetype = 0x01; header.unused = 0x00; header.nvar = writer->variables_count; header.nobs = writer->row_count; if (writer->variables_count > 32767) { error = READSTAT_ERROR_TOO_MANY_COLUMNS; goto cleanup; } if ((error = readstat_write_bytes(writer, &header, sizeof(dta_header_t))) != READSTAT_OK) goto cleanup; if ((error = dta_emit_header_data_label(writer, ctx)) != READSTAT_OK) goto cleanup; if ((error = dta_emit_header_time_stamp(writer, ctx)) != READSTAT_OK) goto cleanup; cleanup: return READSTAT_OK; } static size_t dta_measure_tag(dta_ctx_t *ctx, const char *tag) { if (!ctx->file_is_xmlish) return 0; return strlen(tag); } static size_t dta_measure_map(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + 14 * sizeof(uint64_t) + dta_measure_tag(ctx, "")); } static size_t dta_measure_typlist(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->typlist_entry_len * ctx->nvar + dta_measure_tag(ctx, "")); } static size_t dta_measure_varlist(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->varlist_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_srtlist(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->srtlist_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_fmtlist(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->fmtlist_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_lbllist(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->lbllist_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_variable_labels(dta_ctx_t *ctx) { return (dta_measure_tag(ctx, "") + ctx->variable_labels_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_characteristics(readstat_writer_t *writer, dta_ctx_t *ctx) { size_t characteristics_len = 0; int i; for (i=0; inotes_count; i++) { size_t ch_len = dta_measure_tag(ctx, "") + ctx->expansion_len_len + 2 * ctx->ch_metadata_len + strlen(writer->notes[i]) + 1 + dta_measure_tag(ctx, ""); characteristics_len += ch_len; } return (dta_measure_tag(ctx, "") + characteristics_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_data(readstat_writer_t *writer, dta_ctx_t *ctx) { int i; for (i=0; invar; i++) { size_t max_len = 0; readstat_variable_t *r_variable = readstat_get_variable(writer, i); uint16_t typecode = 0; dta_typecode_for_variable(r_variable, ctx->typlist_version, &typecode); if (dta_type_info(typecode, ctx, &max_len, NULL) == READSTAT_OK) ctx->record_len += max_len; } return (dta_measure_tag(ctx, "") + ctx->record_len * ctx->nobs + dta_measure_tag(ctx, "")); } static size_t dta_measure_strls(readstat_writer_t *writer, dta_ctx_t *ctx) { int i; size_t strls_len = 0; for (i=0; istring_refs_count; i++) { readstat_string_ref_t *ref = writer->string_refs[i]; if (ctx->strl_o_len > 4) { strls_len += sizeof("GSO") - 1 + SIZEOF_DTA_118_STRL_HEADER_T + ref->len; } else { strls_len += sizeof("GSO") - 1 + SIZEOF_DTA_117_STRL_HEADER_T + ref->len; } } return (dta_measure_tag(ctx, "") + strls_len + dta_measure_tag(ctx, "")); } static size_t dta_measure_value_labels(readstat_writer_t *writer, dta_ctx_t *ctx) { size_t len = dta_measure_tag(ctx, ""); int i, j; for (i=0; ilabel_sets_count; i++) { readstat_label_set_t *r_label_set = readstat_get_label_set(writer, i); int32_t n = r_label_set->value_labels_count; int32_t txtlen = 0; for (j=0; jlabel_len + 1; } len += dta_measure_tag(ctx, ""); len += sizeof(int32_t); len += ctx->value_label_table_labname_len; len += ctx->value_label_table_padding_len; len += 8 + 8*n + txtlen; len += dta_measure_tag(ctx, ""); } len += dta_measure_tag(ctx, ""); return len; } static readstat_error_t dta_emit_map(readstat_writer_t *writer, dta_ctx_t *ctx) { if (!ctx->file_is_xmlish) return READSTAT_OK; uint64_t map[14]; map[0] = 0; /* */ map[1] = writer->bytes_written; /* */ map[2] = map[1] + dta_measure_map(ctx); /* */ map[3] = map[2] + dta_measure_typlist(ctx); /* */ map[4] = map[3] + dta_measure_varlist(ctx); /* */ map[5] = map[4] + dta_measure_srtlist(ctx); /* */ map[6] = map[5] + dta_measure_fmtlist(ctx); /* */ map[7] = map[6] + dta_measure_lbllist(ctx); /* */ map[8] = map[7] + dta_measure_variable_labels(ctx); /* */ map[9] = map[8] + dta_measure_characteristics(writer, ctx); /* */ map[10]= map[9] + dta_measure_data(writer, ctx); /* */ map[11]= map[10]+ dta_measure_strls(writer, ctx); /* */ map[12]= map[11]+ dta_measure_value_labels(writer, ctx); /* */ map[13]= map[12]+ dta_measure_tag(ctx, "
"); return dta_write_chunk(writer, ctx, "", map, sizeof(map), ""); } static readstat_error_t dta_begin_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; readstat_error_t error = READSTAT_OK; if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; dta_ctx_t *ctx = dta_ctx_alloc(NULL); error = dta_ctx_init(ctx, writer->variables_count, writer->row_count, machine_is_little_endian() ? DTA_LOHI : DTA_HILO, writer->version, NULL, NULL); if (error != READSTAT_OK) goto cleanup; error = dta_emit_header(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_map(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_descriptors(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_variable_labels(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_characteristics(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_write_tag(writer, ctx, ""); if (error != READSTAT_OK) goto cleanup; cleanup: if (error != READSTAT_OK) { dta_ctx_free(ctx); } else { writer->module_ctx = ctx; } return error; } static readstat_error_t dta_write_raw_int8(void *row, int8_t value) { memcpy(row, &value, sizeof(char)); return READSTAT_OK; } static readstat_error_t dta_write_raw_int16(void *row, int16_t value) { memcpy(row, &value, sizeof(int16_t)); return READSTAT_OK; } static readstat_error_t dta_write_raw_int32(void *row, int32_t value) { memcpy(row, &value, sizeof(int32_t)); return READSTAT_OK; } static readstat_error_t dta_write_raw_int64(void *row, int64_t value) { memcpy(row, &value, sizeof(int64_t)); return READSTAT_OK; } static readstat_error_t dta_write_raw_float(void *row, float value) { memcpy(row, &value, sizeof(float)); return READSTAT_OK; } static readstat_error_t dta_write_raw_double(void *row, double value) { memcpy(row, &value, sizeof(double)); return READSTAT_OK; } static readstat_error_t dta_113_write_int8(void *row, const readstat_variable_t *var, int8_t value) { if (value > DTA_113_MAX_INT8) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int8(row, value); } static readstat_error_t dta_old_write_int8(void *row, const readstat_variable_t *var, int8_t value) { if (value > DTA_OLD_MAX_INT8) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int8(row, value); } static readstat_error_t dta_113_write_int16(void *row, const readstat_variable_t *var, int16_t value) { if (value > DTA_113_MAX_INT16) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int16(row, value); } static readstat_error_t dta_old_write_int16(void *row, const readstat_variable_t *var, int16_t value) { if (value > DTA_OLD_MAX_INT16) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int16(row, value); } static readstat_error_t dta_113_write_int32(void *row, const readstat_variable_t *var, int32_t value) { if (value > DTA_113_MAX_INT32) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int32(row, value); } static readstat_error_t dta_old_write_int32(void *row, const readstat_variable_t *var, int32_t value) { if (value > DTA_OLD_MAX_INT32) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } return dta_write_raw_int32(row, value); } static readstat_error_t dta_write_float(void *row, const readstat_variable_t *var, float value) { int32_t max_flt_i32 = DTA_113_MAX_FLOAT; float max_flt; memcpy(&max_flt, &max_flt_i32, sizeof(float)); if (value > max_flt) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } else if (isnan(value)) { return dta_113_write_missing_numeric(row, var); } return dta_write_raw_float(row, value); } static readstat_error_t dta_write_double(void *row, const readstat_variable_t *var, double value) { int64_t max_dbl_i64 = DTA_113_MAX_DOUBLE; double max_dbl; memcpy(&max_dbl, &max_dbl_i64, sizeof(double)); if (value > max_dbl) { return READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE; } else if (isnan(value)) { return dta_113_write_missing_numeric(row, var); } return dta_write_raw_double(row, value); } static readstat_error_t dta_write_string(void *row, const readstat_variable_t *var, const char *value) { size_t max_len = var->storage_width; if (value == NULL || value[0] == '\0') { memset(row, '\0', max_len); } else { size_t value_len = strlen(value); if (value_len > max_len) return READSTAT_ERROR_STRING_VALUE_IS_TOO_LONG; strncpy((char *)row, value, max_len); } return READSTAT_OK; } static readstat_error_t dta_118_write_string_ref(void *row, const readstat_variable_t *var, readstat_string_ref_t *ref) { if (ref == NULL) return READSTAT_ERROR_STRING_REF_IS_REQUIRED; int16_t v = ref->first_v; int64_t o = ref->first_o; char *row_bytes = (char *)row; memcpy(&row_bytes[0], &v, sizeof(int16_t)); if (!machine_is_little_endian()) { o <<= 16; } memcpy(&row_bytes[2], &o, 6); return READSTAT_OK; } static readstat_error_t dta_117_write_string_ref(void *row, const readstat_variable_t *var, readstat_string_ref_t *ref) { if (ref == NULL) return READSTAT_ERROR_STRING_REF_IS_REQUIRED; int32_t v = ref->first_v; int32_t o = ref->first_o; char *row_bytes = (char *)row; memcpy(&row_bytes[0], &v, sizeof(int32_t)); memcpy(&row_bytes[4], &o, sizeof(int32_t)); return READSTAT_OK; } static readstat_error_t dta_113_write_missing_numeric(void *row, const readstat_variable_t *var) { readstat_error_t retval = READSTAT_OK; if (var->type == READSTAT_TYPE_INT8) { retval = dta_write_raw_int8(row, DTA_113_MISSING_INT8); } else if (var->type == READSTAT_TYPE_INT16) { retval = dta_write_raw_int16(row, DTA_113_MISSING_INT16); } else if (var->type == READSTAT_TYPE_INT32) { retval = dta_write_raw_int32(row, DTA_113_MISSING_INT32); } else if (var->type == READSTAT_TYPE_FLOAT) { retval = dta_write_raw_int32(row, DTA_113_MISSING_FLOAT); } else if (var->type == READSTAT_TYPE_DOUBLE) { retval = dta_write_raw_int64(row, DTA_113_MISSING_DOUBLE); } return retval; } static readstat_error_t dta_old_write_missing_numeric(void *row, const readstat_variable_t *var) { readstat_error_t retval = READSTAT_OK; if (var->type == READSTAT_TYPE_INT8) { retval = dta_write_raw_int8(row, DTA_OLD_MISSING_INT8); } else if (var->type == READSTAT_TYPE_INT16) { retval = dta_write_raw_int16(row, DTA_OLD_MISSING_INT16); } else if (var->type == READSTAT_TYPE_INT32) { retval = dta_write_raw_int32(row, DTA_OLD_MISSING_INT32); } else if (var->type == READSTAT_TYPE_FLOAT) { retval = dta_write_raw_int32(row, DTA_OLD_MISSING_FLOAT); } else if (var->type == READSTAT_TYPE_DOUBLE) { retval = dta_write_raw_int64(row, DTA_OLD_MISSING_DOUBLE); } return retval; } static readstat_error_t dta_write_missing_string(void *row, const readstat_variable_t *var) { return dta_write_string(row, var, NULL); } static readstat_error_t dta_113_write_missing_tagged(void *row, const readstat_variable_t *var, char tag) { readstat_error_t retval = READSTAT_OK; if (tag < 'a' || tag > 'z') return READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE; if (var->type == READSTAT_TYPE_INT8) { retval = dta_write_raw_int8(row, DTA_113_MISSING_INT8_A + (tag - 'a')); } else if (var->type == READSTAT_TYPE_INT16) { retval = dta_write_raw_int16(row, DTA_113_MISSING_INT16_A + (tag - 'a')); } else if (var->type == READSTAT_TYPE_INT32) { retval = dta_write_raw_int32(row, DTA_113_MISSING_INT32_A + (tag - 'a')); } else if (var->type == READSTAT_TYPE_FLOAT) { retval = dta_write_raw_int32(row, DTA_113_MISSING_FLOAT_A + ((tag - 'a') << 11)); } else if (var->type == READSTAT_TYPE_DOUBLE) { retval = dta_write_raw_int64(row, DTA_113_MISSING_DOUBLE_A + ((int64_t)(tag - 'a') << 40)); } else { retval = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED; } return retval; } static readstat_error_t dta_end_data(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; dta_ctx_t *ctx = writer->module_ctx; readstat_error_t error = READSTAT_OK; if (!writer->initialized) return READSTAT_ERROR_WRITER_NOT_INITIALIZED; error = dta_write_tag(writer, ctx, ""); if (error != READSTAT_OK) goto cleanup; error = dta_emit_strls(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_emit_value_labels(writer, ctx); if (error != READSTAT_OK) goto cleanup; error = dta_write_tag(writer, ctx, "
"); if (error != READSTAT_OK) goto cleanup; cleanup: return error; } static void dta_module_ctx_free(void *module_ctx) { dta_ctx_free(module_ctx); } readstat_error_t dta_metadata_ok(void *writer_ctx) { readstat_writer_t *writer = (readstat_writer_t *)writer_ctx; if (writer->compression != READSTAT_COMPRESS_NONE) return READSTAT_ERROR_UNSUPPORTED_COMPRESSION; if (writer->version > DTA_FILE_VERSION_MAX || writer->version < DTA_FILE_VERSION_MIN) return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; return READSTAT_OK; } readstat_error_t readstat_begin_writing_dta(readstat_writer_t *writer, void *user_ctx, long row_count) { if (writer->version == 0) writer->version = DTA_FILE_VERSION_DEFAULT; writer->callbacks.metadata_ok = &dta_metadata_ok; if (writer->version >= 117) { writer->callbacks.variable_width = &dta_117_variable_width; } else if (writer->version >= 111) { writer->callbacks.variable_width = &dta_111_variable_width; } else { writer->callbacks.variable_width = &dta_old_variable_width; } if (writer->version >= 118) { writer->callbacks.variable_ok = &dta_118_variable_ok; } else if (writer->version >= 110) { writer->callbacks.variable_ok = &dta_110_variable_ok; } else { writer->callbacks.variable_ok = &dta_old_variable_ok; } if (writer->version >= 118) { writer->callbacks.write_string_ref = &dta_118_write_string_ref; } else if (writer->version == 117) { writer->callbacks.write_string_ref = &dta_117_write_string_ref; } if (writer->version >= 113) { writer->callbacks.write_int8 = &dta_113_write_int8; writer->callbacks.write_int16 = &dta_113_write_int16; writer->callbacks.write_int32 = &dta_113_write_int32; writer->callbacks.write_missing_number = &dta_113_write_missing_numeric; writer->callbacks.write_missing_tagged = &dta_113_write_missing_tagged; } else { writer->callbacks.write_int8 = &dta_old_write_int8; writer->callbacks.write_int16 = &dta_old_write_int16; writer->callbacks.write_int32 = &dta_old_write_int32; writer->callbacks.write_missing_number = &dta_old_write_missing_numeric; } writer->callbacks.write_float = &dta_write_float; writer->callbacks.write_double = &dta_write_double; writer->callbacks.write_string = &dta_write_string; writer->callbacks.write_missing_string = &dta_write_missing_string; writer->callbacks.begin_data = &dta_begin_data; writer->callbacks.end_data = &dta_end_data; writer->callbacks.module_ctx_free = &dta_module_ctx_free; return readstat_begin_writing_file(writer, user_ctx, row_count); } ReadStat-1.1.7/src/test/000077500000000000000000000000001410722155500147775ustar00rootroot00000000000000ReadStat-1.1.7/src/test/test_buffer.c000066400000000000000000000015511410722155500174550ustar00rootroot00000000000000#include #include "test_buffer.h" rt_buffer_t *buffer_init() { rt_buffer_t *buffer = calloc(1, sizeof(rt_buffer_t)); buffer->size = 1024; buffer->bytes = malloc(buffer->size); return buffer; } void buffer_reset(rt_buffer_t *buffer) { buffer->used = 0; } void buffer_grow(rt_buffer_t *buffer, size_t len) { while (len > buffer->size - buffer->used) { buffer->size *= 2; } buffer->bytes = realloc(buffer->bytes, buffer->size); } void buffer_free(rt_buffer_t *buffer) { free(buffer->bytes); free(buffer); } rt_buffer_ctx_t *buffer_ctx_init(rt_buffer_t *buffer) { rt_buffer_ctx_t *buffer_ctx = calloc(1, sizeof(rt_buffer_ctx_t)); buffer_ctx->buffer = buffer; return buffer_ctx; } void buffer_ctx_reset(rt_buffer_ctx_t *buffer_ctx) { buffer_reset(buffer_ctx->buffer); buffer_ctx->pos = 0; } ReadStat-1.1.7/src/test/test_buffer.h000066400000000000000000000007401410722155500174610ustar00rootroot00000000000000 typedef struct rt_buffer_s { size_t used; size_t size; char *bytes; } rt_buffer_t; typedef struct rt_buffer_ctx_s { rt_buffer_t *buffer; size_t pos; } rt_buffer_ctx_t; rt_buffer_t *buffer_init(); void buffer_reset(rt_buffer_t *buffer); void buffer_grow(rt_buffer_t *buffer, size_t len); void buffer_free(rt_buffer_t *buffer); rt_buffer_ctx_t *buffer_ctx_init(rt_buffer_t *buffer); void buffer_ctx_reset(rt_buffer_ctx_t *buffer_ctx); ReadStat-1.1.7/src/test/test_buffer_io.c000066400000000000000000000034421410722155500201450ustar00rootroot00000000000000#include #include "../readstat.h" #include "test_buffer.h" #include "test_buffer_io.h" int rt_open_handler(const char *path, void *io_ctx) { return 0; } int rt_close_handler(void *io_ctx) { return 0; } readstat_off_t rt_seek_handler(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx) { rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; readstat_off_t newpos = -1; if (whence == READSTAT_SEEK_SET) { newpos = offset; } else if (whence == READSTAT_SEEK_CUR) { newpos = buffer_ctx->pos + offset; } else if (whence == READSTAT_SEEK_END) { newpos = buffer_ctx->buffer->used + offset; } if (newpos < 0) return -1; if (newpos > buffer_ctx->buffer->used) return -1; buffer_ctx->pos = newpos; return newpos; } ssize_t rt_read_handler(void *buf, size_t nbytes, void *io_ctx) { rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; ssize_t bytes_copied = 0; ssize_t bytes_left = buffer_ctx->buffer->used - buffer_ctx->pos; if (nbytes <= bytes_left) { memcpy(buf, buffer_ctx->buffer->bytes + buffer_ctx->pos, nbytes); bytes_copied = nbytes; } else if (bytes_left > 0) { memcpy(buf, buffer_ctx->buffer->bytes + buffer_ctx->pos, bytes_left); bytes_copied = bytes_left; } buffer_ctx->pos += bytes_copied; return bytes_copied; } readstat_error_t rt_update_handler(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx) { if (!progress_handler) return READSTAT_OK; rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; if (progress_handler(1.0 * buffer_ctx->pos / buffer_ctx->buffer->used, user_ctx)) return READSTAT_ERROR_USER_ABORT; return READSTAT_OK; } ReadStat-1.1.7/src/test/test_buffer_io.h000066400000000000000000000006121410722155500201460ustar00rootroot00000000000000 int rt_open_handler(const char *path, void *io_ctx); int rt_close_handler(void *io_ctx); readstat_off_t rt_seek_handler(readstat_off_t offset, readstat_io_flags_t whence, void *io_ctx); ssize_t rt_read_handler(void *buf, size_t nbytes, void *io_ctx); readstat_error_t rt_update_handler(long file_size, readstat_progress_handler progress_handler, void *user_ctx, void *io_ctx); ReadStat-1.1.7/src/test/test_double_decimals.c000066400000000000000000000017731410722155500213250ustar00rootroot00000000000000#include #include #include #include "../bin/write/double_decimals.h" int main(int argc, char *argv[]) { #define EXPECT_DECIMALS(v, expected) \ printf("%s:%d Expecting %.14f to have %d decimals ... \n", __FILE__, __LINE__, v, expected); \ if (double_decimals(v) != expected) { \ printf("%s:%d error got %d decimals, expected %d\n", __FILE__, __LINE__, double_decimals(v), expected); \ exit(EXIT_FAILURE); \ } else { \ printf("%s:%d OK got %d decimals\n", __FILE__, __LINE__, expected); \ } EXPECT_DECIMALS(-123.123, 3); EXPECT_DECIMALS(-100.0, 0); EXPECT_DECIMALS(0.0, 0); EXPECT_DECIMALS(123.0, 0); EXPECT_DECIMALS(123.56, 2); EXPECT_DECIMALS(123.123, 3); EXPECT_DECIMALS(123.123456789, 9); EXPECT_DECIMALS(123.1234567891, 10); EXPECT_DECIMALS(123.123456789012, 12); EXPECT_DECIMALS(123.100000000012, 12); EXPECT_DECIMALS(123.12345678901234, 14); return 0; } ReadStat-1.1.7/src/test/test_dta.c000066400000000000000000000015031410722155500167510ustar00rootroot00000000000000#include #include "../readstat.h" #include "test_readstat.h" long dta_file_format_version(long format_code) { long version = -1; if (format_code == RT_FORMAT_DTA_104) { version = 104; } else if (format_code == RT_FORMAT_DTA_105) { version = 105; } else if (format_code == RT_FORMAT_DTA_108) { version = 108; } else if (format_code == RT_FORMAT_DTA_110) { version = 110; } else if (format_code == RT_FORMAT_DTA_111) { version = 111; } else if (format_code == RT_FORMAT_DTA_114) { version = 114; } else if (format_code == RT_FORMAT_DTA_117) { version = 117; } else if (format_code == RT_FORMAT_DTA_118) { version = 118; } else if (format_code == RT_FORMAT_DTA_119) { version = 119; } return version; } ReadStat-1.1.7/src/test/test_dta.h000066400000000000000000000000611410722155500167540ustar00rootroot00000000000000 long dta_file_format_version(long format_code); ReadStat-1.1.7/src/test/test_dta_days.c000066400000000000000000000043041410722155500177730ustar00rootroot00000000000000#include #include #include #include "../bin/util/readstat_dta_days.h" static inline int is_leap(int year) { return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0); } void test_dta_dates() { int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31}; char buf[1024]; char buf2[1024]; for (int yr=1600; yr<2050; yr++) { for (int month=0; month<12; month++) { int max_days = is_leap(yr) ? daysPerMonthLeap[month] : daysPerMonth[month]; for (int days=1; days<=max_days; days++) { snprintf(buf, sizeof(buf), "%04d-%02d-%02d", yr, month+1, days); char *dest; int numdays = readstat_dta_num_days(buf, &dest); if (dest == buf) { fprintf(stderr, "failure to parse date\n"); exit(EXIT_FAILURE); } readstat_dta_days_string(numdays, buf2, sizeof(buf2)-1); if (0 != strncmp(buf, buf2, strlen(buf))) { fprintf(stderr, "failure. Expected %s, got %s\n", buf, buf2); exit(EXIT_FAILURE); } } } } { char *dest; char b[] = "2016-00-01"; readstat_dta_num_days(b, &dest); if (dest != b) { fprintf(stderr, "Expected failure!\n"); exit(EXIT_FAILURE); } } { char *dest; char b[] = ""; readstat_dta_num_days(b, &dest); if (dest != b) { fprintf(stderr, "Expected failure!\n"); exit(EXIT_FAILURE); } } { char *dest; char b[] = "2016-13-01"; readstat_dta_num_days(b, &dest); if (dest != b) { fprintf(stderr, "Expected failure!\n"); exit(EXIT_FAILURE); } } { char *dest; char b[] = "2016-04-31"; readstat_dta_num_days(b, &dest); if (dest != b) { fprintf(stderr, "Expected failure!\n"); exit(EXIT_FAILURE); } } } int main(int argc, char *argv[]) { test_dta_dates(); return 0; } ReadStat-1.1.7/src/test/test_error.c000066400000000000000000000160421410722155500173360ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "test_buffer.h" #include "test_types.h" #include "test_error.h" #include "test_readstat.h" int strings_equal(const char *expected, const char *received) { if ((expected == NULL || expected[0] == '\0') && (received == NULL || received[0] == '\0')) return 1; return (expected && received && strcmp(expected, received) == 0); } int doubles_equal(double expected, double received) { if (isnan(expected) && isnan(received)) return 1; return (expected == received); } static readstat_value_t copy_value(rt_parse_ctx_t *ctx, readstat_value_t value) { if (value.type == READSTAT_TYPE_STRING) { if (value.v.string_value) { size_t len = strlen(value.v.string_value); ctx->strings = realloc(ctx->strings, ctx->strings_len + len + 1); memcpy(&ctx->strings[ctx->strings_len], value.v.string_value, len + 1); value.v.string_value = &ctx->strings[ctx->strings_len]; ctx->strings_len += len + 1; } } return value; } void push_error(rt_parse_ctx_t *ctx, readstat_value_t expected, readstat_value_t received, const char *msg) { ctx->errors = realloc(ctx->errors, (ctx->errors_count+1) * sizeof(rt_error_t)); rt_error_t *error = &ctx->errors[ctx->errors_count]; error->expected = copy_value(ctx, expected); error->received = copy_value(ctx, received); error->file = ctx->file; error->file_format = ctx->file_format; error->file_extension = ctx->file_extension; error->pos = ((rt_buffer_ctx_t *)ctx->buffer_ctx)->pos; error->var_index = ctx->var_index; error->obs_index = ctx->obs_index; snprintf(error->msg, sizeof(error->msg), "%s", msg); ctx->errors_count++; } void push_error_if_strings_differ(rt_parse_ctx_t *ctx, const char *expected, const char *received, const char *msg) { if (strings_equal(expected, received)) return; readstat_value_t expected_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = expected } }; readstat_value_t received_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = received } }; push_error(ctx, expected_value, received_value, msg); } void push_error_if_strings_differ_n(rt_parse_ctx_t *ctx, const char *expected, const char *received, size_t len, const char *msg) { if ((expected == NULL || expected[0] == '\0') && (received == NULL || received[0] == '\0')) return; if (expected && received && strncmp(expected, received, len) == 0) return; readstat_value_t expected_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = expected } }; readstat_value_t received_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = received } }; push_error(ctx, expected_value, received_value, msg); } int values_equal(readstat_value_t expected, readstat_value_t received) { readstat_type_t expected_type = readstat_value_type(expected); readstat_type_t received_type = readstat_value_type(received); if (expected_type == READSTAT_TYPE_STRING) { if (received_type == READSTAT_TYPE_STRING) { return strings_equal(readstat_string_value(expected), readstat_string_value(received)); } else { return 0; } } else if (received_type != READSTAT_TYPE_STRING) { if (readstat_value_is_tagged_missing(expected) || readstat_value_is_tagged_missing(received)) { return readstat_value_tag(expected) == readstat_value_tag(received); } else if (readstat_value_is_system_missing(expected) || readstat_value_is_system_missing(received)) { return readstat_value_is_system_missing(expected) == readstat_value_is_system_missing(received); } else if (received_type == READSTAT_TYPE_DOUBLE || received_type == READSTAT_TYPE_FLOAT) { return doubles_equal(readstat_double_value(expected), readstat_double_value(received)); } else { return readstat_int32_value(expected) == readstat_int32_value(received); } } else { return 0; } return 1; } void push_error_if_values_differ(rt_parse_ctx_t *ctx, readstat_value_t expected, readstat_value_t received, const char *msg) { if (values_equal(expected, received)) return; push_error(ctx, expected, received, msg); } void push_error_if_doubles_differ(rt_parse_ctx_t *ctx, double expected, double received, const char *msg) { if (doubles_equal(expected, received)) return; readstat_value_t expected_value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = expected } }; readstat_value_t received_value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = received } }; push_error(ctx, expected_value, received_value, msg); } void push_error_if_codes_differ(rt_parse_ctx_t *ctx, readstat_error_t expected, readstat_error_t received) { if (expected == received) return; readstat_value_t expected_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = readstat_error_message(expected) } }; readstat_value_t received_value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = readstat_error_message(received) } }; push_error(ctx, expected_value, received_value, "Error codes"); } static void print_value(readstat_value_t value) { if (value.type == READSTAT_TYPE_STRING) { const char *string = readstat_string_value(value); if (string) { printf("\"%s\" (length=%" PRId64 ")", string, (int64_t)strlen(string)); } else { printf("(null)"); } } else if (value.tag) { printf(".%c (tagged)", readstat_value_tag(value)); } else if (value.type == READSTAT_TYPE_DOUBLE) { printf("%lf (double)", readstat_double_value(value)); } else if (value.type == READSTAT_TYPE_FLOAT) { printf("%f (float)", readstat_float_value(value)); } else if (value.type == READSTAT_TYPE_INT8) { printf("%" PRId8 " (int8)", readstat_int8_value(value)); } else if (value.type == READSTAT_TYPE_INT16) { printf("%" PRId16 " (int16)", readstat_int16_value(value)); } else if (value.type == READSTAT_TYPE_INT32) { printf("%" PRId32 " (int32)", readstat_int32_value(value)); } } void print_error(rt_error_t *error) { if (error->file) { printf("Test \"%s\" failed: %s\n", error->file->label, error->msg); } else { printf("Test failed: %s\n", error->msg); } printf(" * Format: %s (0x%04lx)\n", error->file_extension, error->file_format); printf(" * Expected: "); print_value(error->expected); printf("\n"); printf(" * Received: "); print_value(error->received); printf("\n"); if (error->obs_index != -1) { printf(" * Row: %ld\n", error->obs_index + 1); } if (error->var_index != -1) { printf(" * Column: %ld\n", error->var_index + 1); } } ReadStat-1.1.7/src/test/test_error.h000066400000000000000000000014741410722155500173460ustar00rootroot00000000000000 int values_equal(readstat_value_t expected, readstat_value_t received); void push_error_if_values_differ(rt_parse_ctx_t *ctx, readstat_value_t expected, readstat_value_t received, const char *msg); void push_error_if_doubles_differ(rt_parse_ctx_t *ctx, double expected, double received, const char *msg); void push_error_if_strings_differ(rt_parse_ctx_t *ctx, const char *expected, const char *received, const char *msg); void push_error_if_strings_differ_n(rt_parse_ctx_t *ctx, const char *expected, const char *received, size_t len, const char *msg); void push_error_if_codes_differ(rt_parse_ctx_t *ctx, readstat_error_t expected, readstat_error_t received); void print_error(rt_error_t *error); ReadStat-1.1.7/src/test/test_list.h000066400000000000000000002637631410722155500172030ustar00rootroot00000000000000 #define RT_FORMAT_TEST_TIMESTAMPS (RT_FORMAT_DTA_105_AND_NEWER | RT_FORMAT_SPSS | RT_FORMAT_SAS7BDAT) static rt_test_group_t _test_groups[] = { { .label = "Table name", .tests = { { .label = "Legal name", .test_formats = RT_FORMAT_XPORT, .table_name = "hello", .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable" } } }, { .label = "Illegal name", .test_formats = RT_FORMAT_XPORT, .table_name = "#&(@!", .write_error = READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable" } } }, } }, { .label = "Notes", .tests = { { .label = "Short notes", .test_formats = RT_FORMAT_DTA_105_AND_NEWER | RT_FORMAT_SPSS, .notes_count = 2, .notes = { "This is a note", "This is another note" }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable" } } }, { .label = "Long notes", .write_error = READSTAT_ERROR_NOTE_IS_TOO_LONG, .test_formats = RT_FORMAT_SPSS, .notes_count = 1, .notes = { "This is a note that is longer than the 80-byte line length of " "the SPSS document record, and will produce an error." }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable" } } } } }, { .label = "Compression tests", .tests = { { .label = "SAV row compression", .test_formats = RT_FORMAT_SAV_COMP, .rows = 3, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable", .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .is_system_missing = 1 } } }, { .name = "VAR2", .type = READSTAT_TYPE_STRING, .label = "String variable", .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "spaces-> <-- here" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blah" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blahblahblah" } } } } } }, { .label = "SAV short row compression", .test_formats = RT_FORMAT_SAV_COMP, .rows = 4, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable", .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } } } } } }, { .label = "SAS7BDAT RLE compression", .test_formats = RT_FORMAT_SAS7BDAT_COMP_ROWS, .rows = 10, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label = "Double-precision variable", .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .is_system_missing = 1 }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 0.0 } } } }, { .name = "VAR2", .type = READSTAT_TYPE_STRING, .label = "String variable", .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "spaces-> <-- here" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "spaces-> <-- here" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blah" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blahblahblahblahblah" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blahblahblahblahblahblahblahblahbba" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "blahblahblahsafhuweyeoyraewayfeawopyfhewuhafeywfdhsfdsaf" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "atsyms->@@@@@@@@<--FFFFFFFFFFFFFFFFFFFFFFFFFF" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "atsyms->@@@@@@@@@@@@@@@@@@@@@@@@@@@@@<--FFFFF" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "jiafojdsaufwejfiewnfiabfiuaewbfiuwhfeiuwfuienawuifnwauiefnhfuiwheufhwfuiewfjwuifewuif" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Fchars->GGGGGGGGGGGGGGGGGGGGGGGGGGGGG<-- here" } } } } } } } }, { .label = "Long strings and string refs", .tests = { { .label = "25x-byte strings in SAV", .test_formats = RT_FORMAT_SAV, .rows = 1, .columns = { { .name = "VAR252", .type = READSTAT_TYPE_STRING, .display_width = 252, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 252 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "01" } } } }, { .name = "VAR253", .type = READSTAT_TYPE_STRING, .display_width = 253, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 253 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "012" } } } }, { .name = "VAR254", .type = READSTAT_TYPE_STRING, .display_width = 254, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 254 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123" } } } }, { .name = "VAR255", .type = READSTAT_TYPE_STRING, .display_width = 255, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 255 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "01234" } } } }, { .name = "VAR256", .type = READSTAT_TYPE_STRING, .display_width = 256, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 256 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "012345" } } } } } }, { .label = "300-byte string in SAV, new DTA, and SAS7BDAT", .test_formats = RT_FORMAT_DTA_117_AND_NEWER | RT_FORMAT_SAV | RT_FORMAT_SAS7BDAT, .rows = 1, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 300 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" } } } } } }, { .label = "Four 1024-byte strings in SAS7BDAT", /* Test 4096+ byte rows */ .test_formats = RT_FORMAT_SAS7BDAT, .rows = 1, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 1024 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123" } } } }, { .name = "VAR2", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 1024 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123" } } } }, { .name = "VAR3", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 1024 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123" } } } }, { .name = "VAR4", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = /* 1024 bytes long */ "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123" } } } } } }, { .label = "String refs in new DTA", .test_formats = RT_FORMAT_DTA_117_AND_NEWER, .string_refs_count = 3, .string_refs = { "Hello", "Goodbye", "Hello again" }, .rows = 2, .columns = { { .name = "var1", .type = READSTAT_TYPE_STRING_REF, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 0 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 1 } } } }, { .name = "var2", .type = READSTAT_TYPE_STRING_REF, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 2 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 0 } } } } } } } }, { .label = "ASCII tests", .tests = { { .label = "Alphanumeric", .test_formats = RT_FORMAT_ALL, .rows = 3, .columns = { { .name = "VAR1", .label = "String variable", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "abcdefghijklmnopqrstuvwxyz" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "0123456789" } } } } } }, { .label = "Symbol characters", .test_formats = RT_FORMAT_ALL, .rows = 10, .columns = { { .name = "VAR1", .label = "String variable", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "!@#" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "$%^" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "&*()" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "`~" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "-=_+" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "[]{}" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = ";:" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "'\"" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "<>,./?" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "\\|" } } } } } } } }, { .label = "UTF-8 tests", .tests = { { .label = "UTF-8 value", .test_formats = RT_FORMAT_DTA_118_AND_NEWER | RT_FORMAT_SAV | RT_FORMAT_SAS7BDAT, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_STRING, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Stra" "\xc3\x9f" "e" } } } } } }, { .label = "UTF-8 column name", .test_formats = RT_FORMAT_SAV, .rows = 0, .columns = { { .name = "stra" "\xc3\x9f" "e", .type = READSTAT_TYPE_DOUBLE }, { /* https://github.com/WizardMac/ReadStat/issues/206 */ .name = "\xd7\x95\xd7\xaa\xd7\xa7_\xd7\x91", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "UTF-8 column label", .test_formats = RT_FORMAT_DTA_118_AND_NEWER | RT_FORMAT_SAV | RT_FORMAT_SAS7BDAT, .rows = 0, .columns = { { .name = "strasse", .label = "Stra" "\xc3\x9f" "e", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "UTF-8 value label", .test_formats = RT_FORMAT_DTA_118_AND_NEWER, .rows = 0, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 1, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 6 } }, .label = "F" "\xc3\xbc" "nf" } } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" } } } } }, { .label = "Illegal column names", .tests = { { .label = "Column name begins with number", .write_error = READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER, .test_formats = RT_FORMAT_DTA | RT_FORMAT_SAS | RT_FORMAT_SAV, .rows = 0, .columns = { { .name = "1var", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "Column name contains dollar sign", .write_error = READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER, .test_formats = RT_FORMAT_DTA | RT_FORMAT_SAS, .rows = 0, .columns = { { .name = "var$", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "DTA column name is a reserved word", .write_error = READSTAT_ERROR_NAME_IS_RESERVED_WORD, .test_formats = RT_FORMAT_DTA, .rows = 0, .columns = { { .name = "double", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "DTA column name is a reserved pattern", .write_error = READSTAT_ERROR_NAME_IS_RESERVED_WORD, .test_formats = RT_FORMAT_DTA, .rows = 0, .columns = { { .name = "str123", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "DTA 13-byte column name is too long", .write_error = READSTAT_ERROR_NAME_IS_TOO_LONG, .test_formats = RT_FORMAT_DTA_108_AND_OLDER, .columns = { { .name = "VAR1234567890", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "DTA 34-byte column name is too long", .write_error = READSTAT_ERROR_NAME_IS_TOO_LONG, .test_formats = RT_FORMAT_DTA_117_AND_OLDER, .columns = { { .name = "VAR1234567890123456789012345678901", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "SAS column name is a reserved word", .write_error = READSTAT_ERROR_NAME_IS_RESERVED_WORD, .test_formats = RT_FORMAT_SAS, .rows = 0, .columns = { { .name = "_NUMERIC_", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "SAS column name is too long", .write_error = READSTAT_ERROR_NAME_IS_TOO_LONG, .test_formats = RT_FORMAT_SAS, .columns = { { .name = "VAR123456789012345678901234567890", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "POR column name is too long", .write_error = READSTAT_ERROR_NAME_IS_TOO_LONG, .test_formats = RT_FORMAT_POR, .columns = { { .name = "VAR123456789", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "POR column name starts with number", .write_error = READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER, .test_formats = RT_FORMAT_POR, .columns = { { .name = "1VAR", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "POR column name has lower-case letter", .write_error = READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER, .test_formats = RT_FORMAT_POR, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "SAV column name is a reserved word", .write_error = READSTAT_ERROR_NAME_IS_RESERVED_WORD, .test_formats = RT_FORMAT_SAV, .columns = { { .name = "ALL", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "SAV column name contains punctuation", .write_error = READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER, .test_formats = RT_FORMAT_SAV, .columns = { { .name = "VAR!", .type = READSTAT_TYPE_DOUBLE } } } } }, { .label = "Variable labels", .tests = { { .label = "XPORT long variable label", .test_formats = RT_FORMAT_XPORT_8, .columns = { { .name = "VAR1", .label = "This is a variable label that is longer than 40 bytes!" } } } } }, { .label = "Display widths", .tests = { { .label = "Display width", .test_formats = RT_FORMAT_SPSS | RT_FORMAT_DTA | RT_FORMAT_XPORT, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .display_width = 12 }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .display_width = 100 }, { .name = "VAR3", .type = READSTAT_TYPE_STRING, .display_width = 255 }, { .name = "VAR4", .type = READSTAT_TYPE_STRING, .display_width = 1000 } } }, }, }, { .label = "Formats", .tests = { { .label = "SPSS basic formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "F6.4" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "F8.2" }, { .name = "VAR3", .type = READSTAT_TYPE_STRING, .format = "A12" } } }, { .label = "SPSS bad format", .write_error = READSTAT_ERROR_BAD_FORMAT_STRING, .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "BARF6.4" } } }, { .label = "SPSS date formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "DATE11" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "TIME10" }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "DATETIME15" }, { .name = "VAR4", .type = READSTAT_TYPE_DOUBLE, .format = "ADATE10" }, { .name = "VAR5", .type = READSTAT_TYPE_DOUBLE, .format = "JDATE10" }, { .name = "VAR6", .type = READSTAT_TYPE_DOUBLE, .format = "DTIME10" }, { .name = "VAR7", .type = READSTAT_TYPE_DOUBLE, .format = "EDATE10" }, { .name = "VAR8", .type = READSTAT_TYPE_DOUBLE, .format = "SDATE10" } } }, { .label = "SPSS calendar formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "WKDAY11" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "MONTH11" }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "MOYR11" }, { .name = "VAR4", .type = READSTAT_TYPE_DOUBLE, .format = "QYR11" }, { .name = "VAR5", .type = READSTAT_TYPE_DOUBLE, .format = "WKYR11" } } }, { .label = "SPSS extended formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "AHEX6.4" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "COMMA6.4" }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "Z10" }, { .name = "VAR4", .type = READSTAT_TYPE_DOUBLE, .format = "N10" }, { .name = "VAR5", .type = READSTAT_TYPE_DOUBLE, .format = "E10" }, { .name = "VAR6", .type = READSTAT_TYPE_DOUBLE, .format = "PCT10" }, { .name = "VAR7", .type = READSTAT_TYPE_DOUBLE, .format = "DOT10" } } }, { .label = "SPSS CC formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "CCA10" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "CCB10" }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "CCC10" }, { .name = "VAR4", .type = READSTAT_TYPE_DOUBLE, .format = "CCD10" }, { .name = "VAR5", .type = READSTAT_TYPE_DOUBLE, .format = "CCE10" } } }, { .label = "SPSS currency formats", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "DOLLAR6.2" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .format = "IB6.2" }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "PIBHEX9" }, { .name = "VAR4", .type = READSTAT_TYPE_DOUBLE, .format = "P9" }, { .name = "VAR5", .type = READSTAT_TYPE_DOUBLE, .format = "PIB9" }, { .name = "VAR6", .type = READSTAT_TYPE_DOUBLE, .format = "PK9" }, { .name = "VAR7", .type = READSTAT_TYPE_DOUBLE, .format = "RB9" }, { .name = "VAR8", .type = READSTAT_TYPE_DOUBLE, .format = "RBHEX9" } } }, { .label = "DTA formats", .test_formats = RT_FORMAT_DTA, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "%6.4g" }, { .name = "VAR2", .type = READSTAT_TYPE_STRING, .format = "%20s" } } }, { .label = "SAS formats", .test_formats = RT_FORMAT_SAS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .format = "10.3", .label_set = "10.3" }, { .name = "VAR2", .type = READSTAT_TYPE_STRING, .format = "$CHAR3.", .label_set = "$CHAR3." } } }, { .label = "SAS long format", .test_formats = RT_FORMAT_SAS7BDAT | RT_FORMAT_XPORT_8, .columns = { { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .format = "FAKEFORMAT12.8", .label_set = "FAKEFORMAT12.8" } } } } }, { .label = "Missing value definitions", .tests = { { .label = "SPSS missing values", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 1, .missing_ranges= { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } } } } }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 3, .missing_ranges= { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } } }, { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } } }, { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 2.5 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 2.5 } } } } }, { .name = "VAR3", .type = READSTAT_TYPE_STRING, .missing_ranges_count = 1, .missing_ranges= { { .lo = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING" } }, .hi = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING" } } } } } } }, { .label = "SPSS missing ranges", .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 1, .missing_ranges = { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -HUGE_VAL } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } } } } }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 1, .missing_ranges = { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = HUGE_VAL } } } } }, { .name = "VAR3", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 1, .missing_ranges = { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } } } } }, { .name = "VAR4", .type = READSTAT_TYPE_STRING, .missing_ranges_count = 1, .missing_ranges = { { .lo = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "AAA" } }, .hi = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "ZZZ" } } } } } } }, { .label = "SPSS too many missing ranges", .write_error = READSTAT_ERROR_TOO_MANY_MISSING_VALUE_DEFINITIONS, .test_formats = RT_FORMAT_SPSS, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .missing_ranges_count = 2, .missing_ranges = { { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 1.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } } }, { .lo = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -100.0 } }, .hi = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -1.0 } } }, } } } }, { .label = "SPSS missing values for long strings", .test_formats = RT_FORMAT_SPSS, .rows = 3, .columns = { { .name = "VAR3", .type = READSTAT_TYPE_STRING, .missing_ranges_count = 2, .missing_ranges= { { .lo = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING" } }, .hi = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING" } } }, { .lo = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING2" } }, .hi = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING2" } } } }, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "MISSING2" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "NOT MISSING!!!!!!!!!" } } } } } } } }, { .label = "Tagged missing values", .tests = { { .label = "SAV tagged missing values", .write_error = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED, .test_formats = RT_FORMAT_SAV, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'a' } } } } }, { .label = "Old DTA tagged missing values", .write_error = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED, .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'a' } } } } }, { .label = "Out-of-range tagged missing values", .write_error = READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE, .test_formats = RT_FORMAT_DTA_114_AND_NEWER | RT_FORMAT_SAS, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = '$' } } } } }, { .label = "SAS in-range tagged missing doubles", .test_formats = RT_FORMAT_SAS, .rows = 7, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'A' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'B' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'C' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'X' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'Y' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'Z' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = '_' } } } } }, { .label = "DTA in-range tagged missing doubles", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 6, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'a' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'b' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'c' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'x' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'y' }, { .type = READSTAT_TYPE_DOUBLE, .is_tagged_missing = 1, .tag = 'z' } } } } }, { .label = "DTA in-range tagged missing floats", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 6, .columns = { { .name = "var2", .type = READSTAT_TYPE_FLOAT, .values = { { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'a' }, { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'b' }, { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'c' }, { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'x' }, { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'y' }, { .type = READSTAT_TYPE_FLOAT, .is_tagged_missing = 1, .tag = 'z' } } } } }, { .label = "DTA in-range tagged missing int32s", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 6, .columns = { { .name = "var3", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'a' }, { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'b' }, { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'c' }, { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'x' }, { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'y' }, { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'z' } } } } }, { .label = "DTA in-range tagged missing int16s", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 6, .columns = { { .name = "var4", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'a' }, { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'b' }, { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'c' }, { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'x' }, { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'y' }, { .type = READSTAT_TYPE_INT16, .is_tagged_missing = 1, .tag = 'z' } } } } }, { .label = "DTA in-range tagged missing int8s", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 6, .columns = { { .name = "var5", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'a' }, { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'b' }, { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'c' }, { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'x' }, { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'y' }, { .type = READSTAT_TYPE_INT8, .is_tagged_missing = 1, .tag = 'z' } } } } } }, }, { .label = "Value labels", .tests = { { .label = "SAS numeric value labels", .test_formats = RT_FORMAT_SAS7BCAT, .label_sets_count = 1, .label_sets = { { .name = "LabelSet", .type = READSTAT_TYPE_DOUBLE, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 1.0 } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 2.0 } }, .label = "Two" }, } } } }, { .label = "SAS string value labels", .test_formats = RT_FORMAT_SAS7BCAT, .label_sets_count = 1, .label_sets = { { .name = "$StringLabelSet", .type = READSTAT_TYPE_STRING, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "1" } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "2" } }, .label = "Two" }, } } } }, { .label = "DTA ancient value labels", .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .test_formats = RT_FORMAT_DTA_104, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 1, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = -1 } }, .label = "One" } } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" } } }, { .label = "DTA integer value labels", .test_formats = RT_FORMAT_DTA, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 1 } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 2 } }, .label = "Two" } } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" }, { .name = "var2", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" }, } }, { .label = "DTA negative value labels", .test_formats = RT_FORMAT_DTA_105_AND_NEWER, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = -1 } }, .label = "Negative One" }, { .value = { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 1 } }, .label = "Positive One" } } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" } } }, { .label = "DTA tagged value labels", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'a' }, .label = "One" }, { .value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'b' }, .label = "Two" }, } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" } } }, { .label = "DTA unsupported tagged value labels", .write_error = READSTAT_ERROR_TAGGED_VALUES_NOT_SUPPORTED, .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .label_sets_count = 1, .label_sets = { { .name = "somelbl", .type = READSTAT_TYPE_INT32, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'a' }, .label = "One" }, { .value = { .type = READSTAT_TYPE_INT32, .is_tagged_missing = 1, .tag = 'b' }, .label = "Two" }, } } }, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .label_set = "somelbl" } } }, { .label = "SPSS numeric value labels", .test_formats = RT_FORMAT_SPSS, .label_sets_count = 2, .label_sets = { { .name = "labels0", .type = READSTAT_TYPE_DOUBLE, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 1 } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 2 } }, .label = "Two" } } }, { .name = "labels1", .type = READSTAT_TYPE_DOUBLE, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 3 } }, .label = "Three" }, { .value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 4 } }, .label = "Four" } } } }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE, .label_set = "labels0" }, { .name = "VAR2", .type = READSTAT_TYPE_DOUBLE, .label_set = "labels1" } } }, { .label = "SPSS string value labels", .test_formats = RT_FORMAT_SPSS, .label_sets_count = 1, .label_sets = { { .name = "labels0", .type = READSTAT_TYPE_STRING, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "1" } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "2" } }, .label = "Two" } } } }, .columns = { { .name = "VAR2", .type = READSTAT_TYPE_STRING, .label_set = "labels0" } } }, { .label = "SPSS short labels for long values", .test_formats = RT_FORMAT_SPSS, .label_sets_count = 1, .label_sets = { { .name = "labels0", .type = READSTAT_TYPE_STRING, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "1,000,000" } }, .label = "One million" }, { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "2,000,000" } }, .label = "Two million" } } } }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_STRING, .label_set = "labels0" } } }, { .label = "SPSS long labels for short values", .test_formats = RT_FORMAT_SPSS, .label_sets_count = 1, .label_sets = { { .name = "labels0", .type = READSTAT_TYPE_STRING, .value_labels_count = 2, .value_labels = { { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "One" } }, .label = "One" }, { .value = { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Two" } }, .label = "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" "0123456789" } } } }, .columns = { { .name = "VAR2", .type = READSTAT_TYPE_STRING, .label_set = "labels0" } } } } }, { .label = "Out-of-range floating-point values", .tests = { { .label = "DTA out-of-range double value", .test_formats = RT_FORMAT_DTA, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = HUGE_VAL } } } } } }, { .label = "DTA out-of-range float value", .test_formats = RT_FORMAT_DTA, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_FLOAT, .values = { { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = HUGE_VALF } } } } } } } }, { .label = "Out-of-range integer values (pre-113 DTA)", .tests = { { .label = "Pre-113 DTA out-of-range int32 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = DTA_OLD_MAX_INT32+1 } } } } } }, { .label = "Pre-113 DTA in-range int32 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = DTA_OLD_MAX_INT32 } } } } } }, { .label = "Pre-113 DTA out-of-range int16 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = DTA_OLD_MAX_INT16+1 } } } } } }, { .label = "Pre-113 DTA in-range int16 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = DTA_OLD_MAX_INT16 } } } } } }, { .label = "Pre-113 DTA out-of-range int8 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = DTA_OLD_MAX_INT8+1 } } } } } }, { .label = "Pre-113 DTA in-range int8 value", .test_formats = RT_FORMAT_DTA_111_AND_OLDER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = DTA_OLD_MAX_INT8 } } } } } } } }, { .label = "Out-of-range integer values (post-113 DTA)", .tests = { { .label = "Post-113 DTA out-of-range int32 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = DTA_113_MAX_INT32+1 } } } } } }, { .label = "Post-113 DTA in-range int32 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = DTA_113_MAX_INT32 } } } } } }, { .label = "Post-113 DTA out-of-range int16 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = DTA_113_MAX_INT16+1 } } } } } }, { .label = "Post-113 DTA in-range int16 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = DTA_113_MAX_INT16 } } } } } }, { .label = "Post-113 DTA out-of-range int8 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .write_error = READSTAT_ERROR_NUMERIC_VALUE_IS_OUT_OF_RANGE, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = DTA_113_MAX_INT8+1 } } } } } }, { .label = "Post-113 DTA in-range int8 value", .test_formats = RT_FORMAT_DTA_114_AND_NEWER, .rows = 1, .columns = { { .name = "var1", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = DTA_113_MAX_INT8 } } } } } } } }, { .label = "Timestamps", .tests = { { .label = "January 2, 1970", /* Windows localtime can't handle negative UNIX timestamps */ .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */70, .tm_mon = 0, .tm_mday = 2, .tm_hour = 0, .tm_min = 0 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "February 16, 1988", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */88, .tm_mon = 1, .tm_mday = 16, .tm_hour = 9, .tm_min = 30 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "March 14, 1990", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */90, .tm_mon = 2, .tm_mday = 14, .tm_hour = 15, .tm_min = 15 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "April 15, 1995", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */95, .tm_mon = 3, .tm_mday = 15, .tm_hour = 12, .tm_min = 0 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "May 1, 1995", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */95, .tm_mon = 4, .tm_mday = 1, .tm_hour = 0, .tm_min = 0 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "June 6, 1994", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */94, .tm_mon = 5, .tm_mday = 6, .tm_hour = 5, .tm_min = 30 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "July 4, 1976", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */76, .tm_mon = 6, .tm_mday = 4, .tm_hour = 10, .tm_min = 30 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "August 2, 1984", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */84, .tm_mon = 7, .tm_mday = 2, .tm_hour = 3, .tm_min = 4 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "September 20, 1999", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */99, .tm_mon = 8, .tm_mday = 20, .tm_hour = 3, .tm_min = 4 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "October 31, 1992", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */92, .tm_mon = 9, .tm_mday = 31, .tm_hour = 23, .tm_min = 59 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "November 3, 1986", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */86, .tm_mon = 10, .tm_mday = 3, .tm_hour = 16, .tm_min = 30 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "December 25, 2020", .test_formats = RT_FORMAT_TEST_TIMESTAMPS, .timestamp = { .tm_year = /* 19 */120, .tm_mon = 11, .tm_mday = 25, .tm_hour = 6, .tm_min = 0 }, .columns = { { .name = "VAR1", .type = READSTAT_TYPE_DOUBLE } } } } }, { .label = "Frequency weights", .tests = { { .label = "Good frequency weight", .test_formats = RT_FORMAT_SPSS, .fweight = "VAR1", .columns = { { .name = "VAR1", .label = "Double-precision variable", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "Non-existent frequency weight", .write_error = READSTAT_ERROR_BAD_FREQUENCY_WEIGHT, .test_formats = RT_FORMAT_SPSS, .fweight = "VAR2", .columns = { { .name = "VAR1", .label = "Double-precision variable", .type = READSTAT_TYPE_DOUBLE } } }, { .label = "String frequency weight", .write_error = READSTAT_ERROR_BAD_FREQUENCY_WEIGHT, .test_formats = RT_FORMAT_SPSS, .fweight = "VAR1", .columns = { { .name = "VAR1", .label = "String variable", .type = READSTAT_TYPE_STRING } } } } }, { .label = "Generic tests", .tests = { { .label = "Generic test file with all column types", .test_formats = RT_FORMAT_ALL, .write_error = READSTAT_OK, .rows = 6, .columns = { { .name = "VAR1", .label = "Double-precision variable", .type = READSTAT_TYPE_DOUBLE, .alignment = READSTAT_ALIGNMENT_CENTER, .measure = READSTAT_MEASURE_SCALE, .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 100.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 30.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = 10.0 } }, { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -3.14159, } }, { .type = READSTAT_TYPE_DOUBLE, .is_system_missing = 1, .v = { .double_value = NAN } }, { .type = READSTAT_TYPE_DOUBLE, .is_system_missing = 1 }, } }, { .name = "VAR2", .label = "Single-precision variable", .type = READSTAT_TYPE_FLOAT, .alignment = READSTAT_ALIGNMENT_CENTER, .measure = READSTAT_MEASURE_SCALE, .values = { { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = 30.0 } }, { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = 20.0 } }, { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = 15.0 } }, { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = 3.14159 } }, { .type = READSTAT_TYPE_FLOAT, .is_system_missing = 1, .v = { .float_value = NAN } }, { .type = READSTAT_TYPE_FLOAT, .is_system_missing = 1 } } }, { .name = "VAR3", .label = "Int32 variable", .type = READSTAT_TYPE_INT32, .alignment = READSTAT_ALIGNMENT_CENTER, .measure = READSTAT_MEASURE_SCALE, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 30 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 20 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = 15 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = -281817 } }, { .type = READSTAT_TYPE_INT32, .v = { .i32_value = DTA_113_MAX_INT32 } }, { .type = READSTAT_TYPE_INT32, .is_system_missing = 1 } } }, { .name = "VAR4", .label = "Int16 variable", .type = READSTAT_TYPE_INT16, .alignment = READSTAT_ALIGNMENT_CENTER, .measure = READSTAT_MEASURE_SCALE, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = 30 } }, { .type = READSTAT_TYPE_INT16, .v = { .i16_value = 20 } }, { .type = READSTAT_TYPE_INT16, .v = { .i16_value = 15 } }, { .type = READSTAT_TYPE_INT16, .v = { .i16_value = -28117 } }, { .type = READSTAT_TYPE_INT16, .v = { .i16_value = DTA_113_MAX_INT16 } }, { .type = READSTAT_TYPE_INT16, .is_system_missing = 1 } } }, { .name = "VAR5", .label = "Int8 variable", .type = READSTAT_TYPE_INT8, .alignment = READSTAT_ALIGNMENT_CENTER, .measure = READSTAT_MEASURE_SCALE, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = 30 } }, { .type = READSTAT_TYPE_INT8, .v = { .i8_value = 20 } }, { .type = READSTAT_TYPE_INT8, .v = { .i8_value = 15 } }, { .type = READSTAT_TYPE_INT8, .v = { .i8_value = -28 } }, { .type = READSTAT_TYPE_INT8, .v = { .i8_value = DTA_113_MAX_INT8 } }, { .type = READSTAT_TYPE_INT8, .is_system_missing = 1 } } }, { .name = "VAR6", .label = "String variable", .type = READSTAT_TYPE_STRING, .alignment = READSTAT_ALIGNMENT_LEFT, .measure = READSTAT_MEASURE_ORDINAL, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Hello" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Hello" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Goodbye" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Goodbye" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "Goodbye" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, } }, { .name = "VAR7", .label = "Empty string variable", .type = READSTAT_TYPE_STRING, .alignment = READSTAT_ALIGNMENT_LEFT, .measure = READSTAT_MEASURE_ORDINAL, .values = { { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, { .type = READSTAT_TYPE_STRING, .v = { .string_value = "" } }, } } } }, { .label = "Extreme values", .test_formats = RT_FORMAT_ALL, .rows = 1, .columns = { { .name = "VAR1", .label = "Double-precision variable", .type = READSTAT_TYPE_DOUBLE, .values = { { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = -HUGE_VAL } } } }, { .name = "VAR2", .label = "Single-precision variable", .type = READSTAT_TYPE_FLOAT, .values = { { .type = READSTAT_TYPE_FLOAT, .v = { .float_value = -HUGE_VALF } } } }, { .name = "VAR3", .label = "Int32 variable", .type = READSTAT_TYPE_INT32, .values = { { .type = READSTAT_TYPE_INT32, .v = { .i32_value = INT32_MIN } } } }, { .name = "VAR4", .label = "Int16 variable", .type = READSTAT_TYPE_INT16, .values = { { .type = READSTAT_TYPE_INT16, .v = { .i16_value = INT16_MIN } } } }, { .name = "VAR5", .label = "Int8 variable", .type = READSTAT_TYPE_INT8, .values = { { .type = READSTAT_TYPE_INT8, .v = { .i8_value = INT8_MIN } } } } } } } } }; ReadStat-1.1.7/src/test/test_read.c000066400000000000000000000307371410722155500171270ustar00rootroot00000000000000#include #include "../readstat.h" #include "test_buffer.h" #include "test_types.h" #include "test_error.h" #include "test_buffer_io.h" #include "test_readstat.h" #include "test_read.h" #include "test_dta.h" #include "test_sas.h" #include "test_sav.h" char *file_extension(long format) { if (format == RT_FORMAT_DTA_104) return "dta104"; if (format == RT_FORMAT_DTA_105) return "dta105"; if (format == RT_FORMAT_DTA_108) return "dta108"; if (format == RT_FORMAT_DTA_110) return "dta110"; if (format == RT_FORMAT_DTA_111) return "dta111"; if (format == RT_FORMAT_DTA_114) return "dta114"; if (format == RT_FORMAT_DTA_117) return "dta117"; if (format == RT_FORMAT_DTA_118) return "dta118"; if (format == RT_FORMAT_DTA_119) return "dta119"; if (format == RT_FORMAT_SAV_COMP_NONE) return "sav"; if (format == RT_FORMAT_SAV_COMP_ROWS) return "savrow"; if (format == RT_FORMAT_SAV_COMP_ZLIB) return "zsav"; if (format == RT_FORMAT_POR) return "por"; if (format == RT_FORMAT_SAS7BCAT) return "sas7bcat"; if (format == RT_FORMAT_SAS7BDAT_32BIT_COMP_NONE) return "sas7bdat32"; if (format == RT_FORMAT_SAS7BDAT_32BIT_COMP_ROWS) return "sas7bdat32row"; if (format == RT_FORMAT_SAS7BDAT_64BIT_COMP_NONE) return "sas7bdat64"; if (format == RT_FORMAT_SAS7BDAT_64BIT_COMP_ROWS) return "sas7bdat64row"; if (format == RT_FORMAT_XPORT_5) return "xpt5"; if (format == RT_FORMAT_XPORT_8) return "xpt8"; return "data"; } rt_parse_ctx_t *parse_ctx_init(rt_buffer_t *buffer, rt_test_file_t *file, rt_test_args_t *args) { rt_parse_ctx_t *parse_ctx = calloc(1, sizeof(rt_parse_ctx_t)); parse_ctx->buffer_ctx = buffer_ctx_init(buffer); parse_ctx->file = file; parse_ctx->args = args; return parse_ctx; } void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format) { parse_ctx->file_format = file_format; parse_ctx->file_extension = file_extension(file_format); if ((file_format & RT_FORMAT_DTA_118_AND_NEWER)) { parse_ctx->max_file_label_len = 321; } else if ((file_format & RT_FORMAT_DTA_105_AND_OLDER)) { parse_ctx->max_file_label_len = 32; } else if ((file_format & RT_FORMAT_DTA)) { parse_ctx->max_file_label_len = 81; } else if ((file_format & RT_FORMAT_SAV)) { parse_ctx->max_file_label_len = 64; } else if ((file_format & RT_FORMAT_SAS7BDAT)) { parse_ctx->max_table_name_len = 32; parse_ctx->max_file_label_len = 256; } else { parse_ctx->max_file_label_len = 20; } if ((file_format & RT_FORMAT_XPORT_5)) { parse_ctx->max_table_name_len = 8; } else if ((file_format & RT_FORMAT_XPORT_8)) { parse_ctx->max_table_name_len = 32; parse_ctx->max_file_label_len = 256; } parse_ctx->var_index = -1; parse_ctx->obs_index = -1; parse_ctx->notes_count = 0; parse_ctx->variables_count = 0; parse_ctx->value_labels_count = 0; buffer_ctx_reset(parse_ctx->buffer_ctx); } void parse_ctx_free(rt_parse_ctx_t *parse_ctx) { if (parse_ctx->buffer_ctx) { free(parse_ctx->buffer_ctx); } free(parse_ctx); } long expected_row_count(rt_parse_ctx_t *parse_ctx) { long expected_rows = parse_ctx->file->rows; if (parse_ctx->args->row_offset > 0) expected_rows -= parse_ctx->args->row_offset; if (expected_rows < 0) expected_rows = 0; if (parse_ctx->args->row_limit > 0 && parse_ctx->args->row_limit < expected_rows) expected_rows = parse_ctx->args->row_limit; return expected_rows; } static int handle_metadata(readstat_metadata_t *metadata, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; rt_ctx->var_index = -1; rt_ctx->obs_index = -1; int var_count = readstat_get_var_count(metadata); int obs_count = readstat_get_row_count(metadata); const char *file_label = readstat_get_file_label(metadata); const char *table_name = readstat_get_table_name(metadata); time_t timestamp = readstat_get_creation_time(metadata); long format_version = readstat_get_file_format_version(metadata); push_error_if_doubles_differ(rt_ctx, rt_ctx->file->columns_count, var_count, "Number of variables"); if (obs_count != -1) { push_error_if_doubles_differ(rt_ctx, expected_row_count(rt_ctx), obs_count, "Number of observations"); } push_error_if_strings_differ_n(rt_ctx, rt_ctx->file->label, file_label, rt_ctx->max_file_label_len-1, "File labels"); if (table_name == NULL || strcmp(table_name, "DATASET") != 0) { push_error_if_strings_differ_n(rt_ctx, rt_ctx->file->table_name, table_name, rt_ctx->max_table_name_len, "Table names"); } if (rt_ctx->file->timestamp.tm_year) { struct tm timestamp_s = rt_ctx->file->timestamp; timestamp_s.tm_isdst = -1; push_error_if_doubles_differ(rt_ctx, mktime(×tamp_s), timestamp, "File timestamps"); } if (rt_ctx->file_format_version) { push_error_if_doubles_differ(rt_ctx, rt_ctx->file_format_version, format_version, "Format versions"); } return READSTAT_HANDLER_OK; } static int handle_note(int index, const char *note, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; push_error_if_strings_differ(rt_ctx, rt_ctx->file->notes[rt_ctx->notes_count++], note, "Note"); return READSTAT_HANDLER_OK; } static int handle_fweight(readstat_variable_t *variable, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; int var_index = readstat_variable_get_index(variable); rt_column_t *column = &rt_ctx->file->columns[var_index]; push_error_if_strings_differ(rt_ctx, rt_ctx->file->fweight, column->name, "Frequency weight"); return READSTAT_HANDLER_OK; } static int handle_variable(int index, readstat_variable_t *variable, const char *val_labels, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; rt_column_t *column = &rt_ctx->file->columns[index]; rt_ctx->var_index = index; push_error_if_strings_differ(rt_ctx, column->label_set, val_labels, "Column label sets"); push_error_if_strings_differ(rt_ctx, column->name, readstat_variable_get_name(variable), "Column names"); push_error_if_strings_differ(rt_ctx, column->label, readstat_variable_get_label(variable), "Column labels"); if (column->format[0]) push_error_if_strings_differ(rt_ctx, column->format, readstat_variable_get_format(variable), "Column formats"); if (column->display_width) push_error_if_doubles_differ(rt_ctx, column->display_width, readstat_variable_get_display_width(variable), "Column display widths"); push_error_if_doubles_differ(rt_ctx, column->missing_ranges_count, readstat_variable_get_missing_ranges_count(variable), "Missing values count"); long i; for (i=0; imissing_ranges_count; i++) { push_error_if_values_differ(rt_ctx, column->missing_ranges[i].lo, readstat_variable_get_missing_range_lo(variable, i), "Missing range definition (lo value)"); push_error_if_values_differ(rt_ctx, column->missing_ranges[i].hi, readstat_variable_get_missing_range_hi(variable, i), "Missing range definition (hi value)"); } rt_ctx->variables_count++; return READSTAT_HANDLER_OK; } static int handle_value_label(const char *val_labels, readstat_value_t value, const char *label, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; long i, j; for (i=0; ifile->label_sets_count; i++) { rt_label_set_t *label_set = &rt_ctx->file->label_sets[i]; if (strcmp(val_labels, label_set->name) == 0) { for (j=0; jvalue_labels_count; j++) { if (values_equal(value, label_set->value_labels[j].value)) { push_error_if_strings_differ(rt_ctx, label_set->value_labels[j].label, label, "Value label"); break; } } if (j == label_set->value_labels_count) { push_error_if_strings_differ(rt_ctx, NULL, label, "Value label (no match)"); } break; } } if (i == rt_ctx->file->label_sets_count) { push_error_if_strings_differ(rt_ctx, NULL, val_labels, "Label set"); } rt_ctx->value_labels_count++; return READSTAT_HANDLER_OK; } static int handle_value(int obs_index, readstat_variable_t *variable, readstat_value_t value, void *ctx) { rt_parse_ctx_t *rt_ctx = (rt_parse_ctx_t *)ctx; rt_ctx->obs_index = obs_index; rt_ctx->var_index = readstat_variable_get_index(variable); long file_obs_index = obs_index + rt_ctx->args->row_offset; rt_column_t *column = &rt_ctx->file->columns[rt_ctx->var_index]; if (column->type == READSTAT_TYPE_STRING_REF) { push_error_if_strings_differ(rt_ctx, rt_ctx->file->string_refs[readstat_int32_value(column->values[file_obs_index])], readstat_string_value(value), "String ref values"); } else { push_error_if_values_differ(rt_ctx, column->values[file_obs_index], value, "Data values"); } return READSTAT_HANDLER_OK; } static void handle_error(const char *error_message, void *ctx) { printf("%s\n", error_message); } readstat_error_t read_file(rt_parse_ctx_t *parse_ctx, long format) { readstat_error_t error = READSTAT_OK; readstat_parser_t *parser = readstat_parser_init(); readstat_set_open_handler(parser, rt_open_handler); readstat_set_close_handler(parser, rt_close_handler); readstat_set_seek_handler(parser, rt_seek_handler); readstat_set_read_handler(parser, rt_read_handler); readstat_set_update_handler(parser, rt_update_handler); readstat_set_io_ctx(parser, parse_ctx->buffer_ctx); readstat_set_metadata_handler(parser, &handle_metadata); readstat_set_note_handler(parser, &handle_note); readstat_set_variable_handler(parser, &handle_variable); readstat_set_fweight_handler(parser, &handle_fweight); readstat_set_value_handler(parser, &handle_value); readstat_set_value_label_handler(parser, &handle_value_label); readstat_set_error_handler(parser, &handle_error); readstat_set_row_limit(parser, parse_ctx->args->row_limit); readstat_set_row_offset(parser, parse_ctx->args->row_offset); if ((format & RT_FORMAT_DTA)) { parse_ctx->file_format_version = dta_file_format_version(format); error = readstat_parse_dta(parser, NULL, parse_ctx); } else if ((format & RT_FORMAT_SAV)) { parse_ctx->file_format_version = sav_file_format_version(format); error = readstat_parse_sav(parser, NULL, parse_ctx); } else if (format == RT_FORMAT_POR) { parse_ctx->file_format_version = 0; error = readstat_parse_por(parser, NULL, parse_ctx); } else if ((format & RT_FORMAT_SAS7BDAT)) { parse_ctx->file_format_version = sas_file_format_version(format); error = readstat_parse_sas7bdat(parser, NULL, parse_ctx); } else if ((format & RT_FORMAT_SAS7BCAT)) { error = readstat_parse_sas7bcat(parser, NULL, parse_ctx); } else if ((format & RT_FORMAT_XPORT)) { parse_ctx->file_format_version = sas_file_format_version(format); error = readstat_parse_xport(parser, NULL, parse_ctx); } if (error != READSTAT_OK) goto cleanup; push_error_if_doubles_differ(parse_ctx, parse_ctx->file->notes_count, parse_ctx->notes_count, "Note count"); push_error_if_doubles_differ(parse_ctx, parse_ctx->file->columns_count, parse_ctx->variables_count, "Column count"); push_error_if_doubles_differ(parse_ctx, expected_row_count(parse_ctx), parse_ctx->obs_index + 1, "Row count"); long value_labels_count = 0; long i; for (i=0; ifile->label_sets_count; i++) { value_labels_count += parse_ctx->file->label_sets[i].value_labels_count; } push_error_if_doubles_differ(parse_ctx, value_labels_count, parse_ctx->value_labels_count, "Value labels count"); cleanup: readstat_parser_free(parser); return error; } ReadStat-1.1.7/src/test/test_read.h000066400000000000000000000004751410722155500171300ustar00rootroot00000000000000 rt_parse_ctx_t *parse_ctx_init(rt_buffer_t *buffer, rt_test_file_t *file, rt_test_args_t *args); void parse_ctx_reset(rt_parse_ctx_t *parse_ctx, long file_format); void parse_ctx_free(rt_parse_ctx_t *parse_ctx); char *file_extension(long format); readstat_error_t read_file(rt_parse_ctx_t *parse_ctx, long format); ReadStat-1.1.7/src/test/test_readstat.c000066400000000000000000000067531410722155500200240ustar00rootroot00000000000000 #include #include #include #include #if !defined(_MSC_VER) # include #endif #include "../readstat.h" #include "../readstat_iconv.h" #include "../stata/readstat_dta.h" #include "test_buffer.h" #include "test_types.h" #include "test_error.h" #include "test_readstat.h" #include "test_read.h" #include "test_write.h" #include "test_list.h" static rt_test_args_t _test_args[] = { { .row_limit = 0, .row_offset = 0, }, { .row_limit = 1, .row_offset = 1, } }; static void dump_buffer(rt_buffer_t *buffer, long format) { char filename[128]; #if !defined _MSC_VER snprintf(filename, sizeof(filename), "/tmp/test_readstat.%s", file_extension(format)); #else snprintf(filename, sizeof(filename), "test_readstat.%s", file_extension(format)); #endif #if DEBUG printf("Writing file buffer to %s\n", filename); FILE *file = fopen(filename, "wb"); int bytes_written = fwrite(buffer->bytes, 1, buffer->used, file); if (bytes_written != buffer->used) printf("Failed to write file!\n"); fclose(file); #endif } int main(int argc, char *argv[]) { rt_buffer_t *buffer = buffer_init(); readstat_error_t error = READSTAT_OK; int g, t, a, f; for (g=0; gtest_formats & f)) continue; int old_errors_count = parse_ctx->errors_count; parse_ctx_reset(parse_ctx, f); error = write_file_to_buffer(file, buffer, f); if (error != file->write_error) { push_error_if_codes_differ(parse_ctx, file->write_error, error); error = READSTAT_OK; continue; } if (error != READSTAT_OK) { error = READSTAT_OK; continue; } error = read_file(parse_ctx, f); if (error != READSTAT_OK) break; if (old_errors_count != parse_ctx->errors_count) dump_buffer(buffer, f); } if (parse_ctx->errors_count) { int i; for (i=0; ierrors_count; i++) { print_error(&parse_ctx->errors[i]); } parse_ctx_free(parse_ctx); buffer_free(buffer); return 1; } parse_ctx_free(parse_ctx); if (error != READSTAT_OK) goto cleanup; } } } cleanup: if (error != READSTAT_OK) { dump_buffer(buffer, f); printf("Error running test \"%s\" (format=%s): %s\n", _test_groups[g].tests[t].label, file_extension(f), readstat_error_message(error)); buffer_free(buffer); return 1; } buffer_free(buffer); return 0; } ReadStat-1.1.7/src/test/test_readstat.h000066400000000000000000000060061410722155500200200ustar00rootroot00000000000000 #define RT_FORMAT_DTA_104 0x000001 #define RT_FORMAT_DTA_105 0x000002 #define RT_FORMAT_DTA_108 0x000004 #define RT_FORMAT_DTA_110 0x000008 #define RT_FORMAT_DTA_111 0x000010 #define RT_FORMAT_DTA_114 0x000020 #define RT_FORMAT_DTA_117 0x000040 #define RT_FORMAT_DTA_118 0x000080 #define RT_FORMAT_DTA_119 0x000100 #define RT_FORMAT_DTA_105_AND_OLDER (RT_FORMAT_DTA_105 | RT_FORMAT_DTA_104) #define RT_FORMAT_DTA_108_AND_OLDER (RT_FORMAT_DTA_108 | RT_FORMAT_DTA_105_AND_OLDER) #define RT_FORMAT_DTA_110_AND_OLDER (RT_FORMAT_DTA_110 | RT_FORMAT_DTA_108_AND_OLDER) #define RT_FORMAT_DTA_111_AND_OLDER (RT_FORMAT_DTA_111 | RT_FORMAT_DTA_110_AND_OLDER) #define RT_FORMAT_DTA_114_AND_OLDER (RT_FORMAT_DTA_114 | RT_FORMAT_DTA_111_AND_OLDER) #define RT_FORMAT_DTA_117_AND_OLDER (RT_FORMAT_DTA_117 | RT_FORMAT_DTA_114_AND_OLDER) #define RT_FORMAT_DTA_118_AND_OLDER (RT_FORMAT_DTA_118 | RT_FORMAT_DTA_117_AND_OLDER) #define RT_FORMAT_DTA_118_AND_NEWER (RT_FORMAT_DTA_118 | RT_FORMAT_DTA_119) #define RT_FORMAT_DTA_117_AND_NEWER (RT_FORMAT_DTA_117 | RT_FORMAT_DTA_118_AND_NEWER) #define RT_FORMAT_DTA_114_AND_NEWER (RT_FORMAT_DTA_114 | RT_FORMAT_DTA_117_AND_NEWER) #define RT_FORMAT_DTA_111_AND_NEWER (RT_FORMAT_DTA_111 | RT_FORMAT_DTA_114_AND_NEWER) #define RT_FORMAT_DTA_110_AND_NEWER (RT_FORMAT_DTA_110 | RT_FORMAT_DTA_111_AND_NEWER) #define RT_FORMAT_DTA_108_AND_NEWER (RT_FORMAT_DTA_108 | RT_FORMAT_DTA_110_AND_NEWER) #define RT_FORMAT_DTA_105_AND_NEWER (RT_FORMAT_DTA_105 | RT_FORMAT_DTA_108_AND_NEWER) #define RT_FORMAT_DTA (RT_FORMAT_DTA_104 | RT_FORMAT_DTA_105_AND_NEWER) #define RT_FORMAT_SAV_COMP_NONE 0x001000 #define RT_FORMAT_SAV_COMP_ROWS 0x002000 #define RT_FORMAT_SAV_COMP_ZLIB 0x004000 #define RT_FORMAT_POR 0x008000 #define RT_FORMAT_SAV_COMP (RT_FORMAT_SAV_COMP_ROWS | RT_FORMAT_SAV_COMP_ZLIB) #define RT_FORMAT_SAV (RT_FORMAT_SAV_COMP_NONE | RT_FORMAT_SAV_COMP) #define RT_FORMAT_SPSS (RT_FORMAT_SAV | RT_FORMAT_POR) #define RT_FORMAT_SAS7BDAT_32BIT_COMP_NONE 0x010000 #define RT_FORMAT_SAS7BDAT_32BIT_COMP_ROWS 0x020000 #define RT_FORMAT_SAS7BDAT_32BIT (RT_FORMAT_SAS7BDAT_32BIT_COMP_NONE | RT_FORMAT_SAS7BDAT_32BIT_COMP_ROWS) #define RT_FORMAT_SAS7BDAT_64BIT_COMP_NONE 0x040000 #define RT_FORMAT_SAS7BDAT_64BIT_COMP_ROWS 0x080000 #define RT_FORMAT_SAS7BDAT_64BIT (RT_FORMAT_SAS7BDAT_64BIT_COMP_NONE | RT_FORMAT_SAS7BDAT_64BIT_COMP_ROWS) #define RT_FORMAT_SAS7BDAT_COMP_NONE (RT_FORMAT_SAS7BDAT_32BIT_COMP_NONE | RT_FORMAT_SAS7BDAT_64BIT_COMP_NONE) #define RT_FORMAT_SAS7BDAT_COMP_ROWS (RT_FORMAT_SAS7BDAT_32BIT_COMP_ROWS | RT_FORMAT_SAS7BDAT_64BIT_COMP_ROWS) #define RT_FORMAT_SAS7BDAT (RT_FORMAT_SAS7BDAT_32BIT | RT_FORMAT_SAS7BDAT_64BIT) #define RT_FORMAT_SAS7BCAT 0x100000 #define RT_FORMAT_XPORT_5 0x200000 #define RT_FORMAT_XPORT_8 0x400000 #define RT_FORMAT_XPORT (RT_FORMAT_XPORT_5 | RT_FORMAT_XPORT_8) #define RT_FORMAT_SAS (RT_FORMAT_SAS7BDAT | RT_FORMAT_XPORT) #define RT_FORMAT_ALL (RT_FORMAT_DTA | RT_FORMAT_SPSS | RT_FORMAT_SAS) ReadStat-1.1.7/src/test/test_sas.c000066400000000000000000000005151410722155500167710ustar00rootroot00000000000000#include #include "../readstat.h" #include "test_readstat.h" long sas_file_format_version(long format_code) { if ((format_code & RT_FORMAT_XPORT_5)) return 5; if ((format_code & RT_FORMAT_XPORT_8)) return 8; if ((format_code & RT_FORMAT_SAS7BDAT_32BIT)) return 8; return 9; } ReadStat-1.1.7/src/test/test_sas.h000066400000000000000000000000611410722155500167720ustar00rootroot00000000000000 long sas_file_format_version(long format_code); ReadStat-1.1.7/src/test/test_sav.c000066400000000000000000000003201410722155500167660ustar00rootroot00000000000000#include #include "../readstat.h" #include "test_readstat.h" long sav_file_format_version(long format_code) { if ((format_code & RT_FORMAT_SAV_COMP_ZLIB)) return 3; return 2; } ReadStat-1.1.7/src/test/test_sav.h000066400000000000000000000000611410722155500167750ustar00rootroot00000000000000 long sav_file_format_version(long format_code); ReadStat-1.1.7/src/test/test_sav_date.c000066400000000000000000000055661410722155500200040ustar00rootroot00000000000000#include #include #include #include "../bin/util/readstat_sav_date.h" static inline int is_leap(int year) { return ((year % 4 == 0 && year % 100 != 0) || year % 400 ==0); } int main(int argc, char *argv[]) { int daysPerMonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; int daysPerMonthLeap[] = {31,29,31,30,31,30,31,31,30,31,30,31}; { char *dest; char s[] = "1582-10-14"; double v = readstat_sav_date_parse(s, &dest); if (s == dest) { fprintf(stderr, "parse error\n"); exit(EXIT_FAILURE); } if (v != 0) { fprintf(stderr, "expected value zero, was %lf\n", v); exit(EXIT_FAILURE); } } { char *dest; char s[] = "1582-10-15"; double v = readstat_sav_date_parse(s, &dest); if (s == dest) { fprintf(stderr, "parse error\n"); exit(EXIT_FAILURE); } if (v != 86400) { fprintf(stderr, "expected value 86400, was %lf\n", v); exit(EXIT_FAILURE); } } double expected = 0; char *dest; char buf[1024]; char buf2[1024]; int count = 0; for (int year=1582; year<2050; year++) { int start_month = year==1582 ? (10-1) : 0; for (int month=start_month; month<12; month++) { int start_day = (year==1582 && month==10-1) ? 14 : 1; int max_days = is_leap(year) ? daysPerMonthLeap[month] : daysPerMonth[month]; for (int day=start_day; day<=max_days; day++) { snprintf(buf, sizeof(buf), "%04d-%02d-%02d", year, month+1, day); double v = readstat_sav_date_parse(buf, &dest); if (buf == dest) { fprintf(stderr, "parse error\n"); exit(EXIT_FAILURE); } if (v != expected) { fprintf(stderr, "got %lf but expected %lf for date %s\n", v, expected, buf); exit(EXIT_FAILURE); } char *s = readstat_sav_date_string(v, buf2, sizeof(buf2)-1); if (!s) { fprintf(stderr, "could not make string of spss double date %lf, expected date was %s\n", v, buf); exit(EXIT_FAILURE); } if (0 != strcmp(buf2, buf)) { fprintf(stderr, "Expected %s, got %s\n", buf, s); exit(EXIT_FAILURE); } if ((++count % 1000) == 0) { fprintf(stdout, "verified date %s => %lf => %s OK\n", buf, v, buf2); } expected+=86400.0; } } } char *s = readstat_sav_date_string(1.0, buf2, sizeof(buf2)-1); if (s!=NULL) { fprintf(stderr, "expected parse failure!\n"); exit(EXIT_FAILURE); } return 0; } ReadStat-1.1.7/src/test/test_types.h000066400000000000000000000063241410722155500173600ustar00rootroot00000000000000#include #define RT_MAX_ROWS 10 #define RT_MAX_COLS 10 #define RT_MAX_LABEL_SETS 2 #define RT_MAX_NOTES 2 #define RT_MAX_STRING_REFS 3 #define RT_MAX_NOTE_SIZE 120 #define RT_MAX_VALUE_LABELS 2 #define RT_MAX_STRING 64 #define RT_MAX_VALUE_LABEL_STRING 121 #define MAX_TESTS_PER_GROUP 20 typedef struct rt_label_set_s { char name[RT_MAX_STRING]; readstat_type_t type; struct { readstat_value_t value; char label[RT_MAX_VALUE_LABEL_STRING]; } value_labels[RT_MAX_VALUE_LABELS]; long value_labels_count; } rt_label_set_t; typedef struct rt_column_s { char name[RT_MAX_STRING]; char label[RT_MAX_STRING]; char format[RT_MAX_STRING]; int display_width; readstat_alignment_t alignment; readstat_measure_t measure; readstat_type_t type; readstat_value_t values[RT_MAX_ROWS]; struct { readstat_value_t lo; readstat_value_t hi; } missing_ranges[3]; long missing_ranges_count; char label_set[RT_MAX_STRING]; } rt_column_t; typedef struct rt_test_file_s { readstat_error_t write_error; long test_formats; char label[80]; char table_name[32]; struct tm timestamp; long rows; rt_column_t columns[RT_MAX_COLS]; long columns_count; rt_label_set_t label_sets[RT_MAX_LABEL_SETS]; long label_sets_count; char notes[RT_MAX_NOTES][RT_MAX_NOTE_SIZE]; long notes_count; char string_refs[RT_MAX_STRING_REFS][RT_MAX_STRING]; long string_refs_count; char fweight[RT_MAX_STRING]; } rt_test_file_t; typedef struct rt_test_group_s { char label[80]; rt_test_file_t tests[MAX_TESTS_PER_GROUP]; } rt_test_group_t; typedef struct rt_test_args_s { long row_limit; long row_offset; } rt_test_args_t; typedef struct rt_error_s { readstat_value_t received; readstat_value_t expected; rt_test_file_t *file; long file_format; const char *file_extension; size_t pos; long var_index; long obs_index; char msg[256]; } rt_error_t; typedef struct rt_parse_ctx_s { rt_error_t *errors; long errors_count; char *strings; size_t strings_len; long var_index; long obs_index; long variables_count; long value_labels_count; long notes_count; rt_test_args_t *args; rt_test_file_t *file; long file_format; long file_format_version; const char *file_extension; size_t max_file_label_len; size_t max_table_name_len; void *buffer_ctx; } rt_parse_ctx_t; ReadStat-1.1.7/src/test/test_write.c000066400000000000000000000237421410722155500173440ustar00rootroot00000000000000#include #include "../readstat.h" #include "../CKHashTable.h" #include "test_buffer.h" #include "test_types.h" #include "test_readstat.h" #include "test_dta.h" #include "test_sas.h" static void handle_error(const char *error_message, void *ctx) { printf("%s\n", error_message); } static ssize_t write_data(const void *bytes, size_t len, void *ctx) { rt_buffer_t *buffer = (rt_buffer_t *)ctx; buffer_grow(buffer, len); if (buffer->bytes == NULL) { return -1; } memcpy(buffer->bytes + buffer->used, bytes, len); buffer->used += len; return len; } readstat_error_t write_file_to_buffer(rt_test_file_t *file, rt_buffer_t *buffer, long format) { readstat_error_t error = READSTAT_OK; ck_hash_table_t *label_sets = ck_hash_table_init(100, 16); readstat_writer_t *writer = readstat_writer_init(); readstat_set_data_writer(writer, &write_data); if ((format & RT_FORMAT_SAS7BCAT)) { strncpy(file->label, "", 1); } else readstat_writer_set_file_label(writer, file->label); readstat_writer_set_table_name(writer, file->table_name); readstat_writer_set_error_handler(writer, &handle_error); if (file->timestamp.tm_year) { struct tm timestamp = file->timestamp; timestamp.tm_isdst = -1; readstat_writer_set_file_timestamp(writer, mktime(×tamp)); } int i, j; int did_set_fweight = 0; for (j=0; jnotes_count; j++) { readstat_add_note(writer, file->notes[j]); } for (j=0; jlabel_sets_count; j++) { rt_label_set_t *label_set = &file->label_sets[j]; readstat_label_set_t *r_label_set = readstat_add_label_set(writer, label_set->type, label_set->name); for (i=0; ivalue_labels_count; i++) { if (readstat_value_is_tagged_missing(label_set->value_labels[i].value)) { readstat_label_tagged_value(r_label_set, readstat_value_tag(label_set->value_labels[i].value), label_set->value_labels[i].label); } else if (label_set->type == READSTAT_TYPE_DOUBLE) { readstat_label_double_value(r_label_set, readstat_double_value(label_set->value_labels[i].value), label_set->value_labels[i].label); } else if (label_set->type == READSTAT_TYPE_INT32) { readstat_label_int32_value(r_label_set, readstat_int32_value(label_set->value_labels[i].value), label_set->value_labels[i].label); } else if (label_set->type == READSTAT_TYPE_STRING) { readstat_label_string_value(r_label_set, readstat_string_value(label_set->value_labels[i].value), label_set->value_labels[i].label); } } ck_str_hash_insert(label_set->name, r_label_set, label_sets); } for (j=0; jstring_refs_count; j++) { readstat_add_string_ref(writer, file->string_refs[j]); } if (file->columns_count == 0) { int c; for (c=0; ccolumns[c].name[0]) break; file->columns_count++; } } for (j=0; jcolumns_count; j++) { rt_column_t *column = &file->columns[j]; readstat_label_set_t *label_set = (readstat_label_set_t *)ck_str_hash_lookup(column->label_set, label_sets); size_t max_len = 0; if (column->type == READSTAT_TYPE_STRING) { max_len = 8; for (i=0; irows; i++) { const char *value = readstat_string_value(column->values[i]); if (value) { size_t len = strlen(value); if (len > max_len) max_len = len; } } if (label_set) { for (i=0; ivalue_labels_count; i++) { if (label_set->value_labels[i].string_key_len > max_len) max_len = label_set->value_labels[i].string_key_len; } } } readstat_variable_t *variable = readstat_add_variable(writer, column->name, column->type, max_len); readstat_variable_set_alignment(variable, column->alignment); readstat_variable_set_measure(variable, column->measure); readstat_variable_set_label(variable, column->label); readstat_variable_set_label_set(variable, label_set); if (column->format[0]) readstat_variable_set_format(variable, column->format); if (column->display_width) readstat_variable_set_display_width(variable, column->display_width); if (column->type == READSTAT_TYPE_STRING) { for (i=0; imissing_ranges_count; i++) { readstat_variable_add_missing_string_range(variable, readstat_string_value(column->missing_ranges[i].lo), readstat_string_value(column->missing_ranges[i].hi)); } } else { for (i=0; imissing_ranges_count; i++) { readstat_variable_add_missing_double_range(variable, readstat_double_value(column->missing_ranges[i].lo), readstat_double_value(column->missing_ranges[i].hi)); } } if (strcmp(column->name, file->fweight) == 0) { error = readstat_writer_set_fweight_variable(writer, variable); if (error != READSTAT_OK) goto cleanup; did_set_fweight = 1; } } if (file->fweight[0] && !did_set_fweight) { error = READSTAT_ERROR_BAD_FREQUENCY_WEIGHT; goto cleanup; } if ((format & RT_FORMAT_DTA)) { long version = dta_file_format_version(format); if (version == -1) { error = READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; goto cleanup; } readstat_writer_set_file_format_version(writer, version); error = readstat_begin_writing_dta(writer, buffer, file->rows); } else if ((format & RT_FORMAT_SAS7BDAT)) { if ((format & RT_FORMAT_SAS7BDAT_COMP_ROWS)) { readstat_writer_set_compression(writer, READSTAT_COMPRESS_ROWS); } readstat_writer_set_file_format_version(writer, sas_file_format_version(format)); readstat_writer_set_file_format_is_64bit(writer, !!(format & RT_FORMAT_SAS7BDAT_64BIT)); error = readstat_begin_writing_sas7bdat(writer, buffer, file->rows); } else if ((format & RT_FORMAT_SAS7BCAT)) { error = readstat_begin_writing_sas7bcat(writer, buffer); } else if ((format & RT_FORMAT_XPORT)) { readstat_writer_set_file_format_version(writer, sas_file_format_version(format)); error = readstat_begin_writing_xport(writer, buffer, file->rows); } else if ((format & RT_FORMAT_SAV)) { if (format == RT_FORMAT_SAV_COMP_ROWS) { readstat_writer_set_compression(writer, READSTAT_COMPRESS_ROWS); } else if (format == RT_FORMAT_SAV_COMP_ZLIB) { readstat_writer_set_compression(writer, READSTAT_COMPRESS_BINARY); } error = readstat_begin_writing_sav(writer, buffer, file->rows); } else if (format == RT_FORMAT_POR) { error = readstat_begin_writing_por(writer, buffer, file->rows); } else { error = READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION; } if (error != READSTAT_OK) goto cleanup; for (i=0; irows; i++) { error = readstat_begin_row(writer); if (error != READSTAT_OK) goto cleanup; for (j=0; jcolumns_count; j++) { rt_column_t *column = &file->columns[j]; readstat_variable_t *variable = readstat_get_variable(writer, j); if (readstat_value_is_tagged_missing(column->values[i])) { error = readstat_insert_tagged_missing_value(writer, variable, readstat_value_tag(column->values[i])); } else if (readstat_value_is_system_missing(column->values[i])) { error = readstat_insert_missing_value(writer, variable); } else if (column->type == READSTAT_TYPE_STRING) { error = readstat_insert_string_value(writer, variable, readstat_string_value(column->values[i])); } else if (column->type == READSTAT_TYPE_STRING_REF) { error = readstat_insert_string_ref(writer, variable, readstat_get_string_ref(writer, readstat_int32_value(column->values[i]))); } else if (column->type == READSTAT_TYPE_DOUBLE) { error = readstat_insert_double_value(writer, variable, readstat_double_value(column->values[i])); } else if (column->type == READSTAT_TYPE_FLOAT) { error = readstat_insert_float_value(writer, variable, readstat_float_value(column->values[i])); } else if (column->type == READSTAT_TYPE_INT32) { error = readstat_insert_int32_value(writer, variable, readstat_int32_value(column->values[i])); } else if (column->type == READSTAT_TYPE_INT16) { error = readstat_insert_int16_value(writer, variable, readstat_int16_value(column->values[i])); } else if (column->type == READSTAT_TYPE_INT8) { error = readstat_insert_int8_value(writer, variable, readstat_int8_value(column->values[i])); } if (error != READSTAT_OK) { goto cleanup; } } error = readstat_end_row(writer); if (error != READSTAT_OK) goto cleanup; } error = readstat_end_writing(writer); if (error != READSTAT_OK) goto cleanup; cleanup: ck_hash_table_free(label_sets); readstat_writer_free(writer); return error; } ReadStat-1.1.7/src/test/test_write.h000066400000000000000000000001401410722155500173340ustar00rootroot00000000000000 readstat_error_t write_file_to_buffer(rt_test_file_t *file, rt_buffer_t *buffer, long format); ReadStat-1.1.7/src/txt/000077500000000000000000000000001410722155500146375ustar00rootroot00000000000000ReadStat-1.1.7/src/txt/commands_util.c000066400000000000000000000053551410722155500176510ustar00rootroot00000000000000#include #include "../readstat.h" #include "readstat_schema.h" #include "commands_util.h" readstat_error_t submit_value_label(readstat_parser_t *parser, const char *labelset, label_type_t label_type, int64_t first_integer, int64_t last_integer, double double_value, const char *string_value, const char *buf, void *user_ctx) { if (!parser->handlers.value_label) return READSTAT_OK; int cb_retval = READSTAT_HANDLER_OK; if (label_type == LABEL_TYPE_RANGE) { int64_t i; for (i=first_integer; i<=last_integer; i++) { readstat_value_t value = { .type = READSTAT_TYPE_DOUBLE, .v = { .double_value = i } }; cb_retval = parser->handlers.value_label(labelset, value, buf, user_ctx); if (cb_retval != READSTAT_HANDLER_OK) goto cleanup; } } else if (label_type != LABEL_TYPE_OTHER) { readstat_value_t value = { { 0 } }; if (label_type == LABEL_TYPE_DOUBLE) { value.type = READSTAT_TYPE_DOUBLE; value.v.double_value = double_value; } else if (label_type == LABEL_TYPE_STRING) { value.type = READSTAT_TYPE_STRING; value.v.string_value = string_value; } else if (label_type == LABEL_TYPE_NAN) { value.type = READSTAT_TYPE_DOUBLE; value.v.double_value = NAN; } cb_retval = parser->handlers.value_label(labelset, value, buf, user_ctx); } cleanup: return (cb_retval == READSTAT_HANDLER_OK) ? READSTAT_OK : READSTAT_ERROR_USER_ABORT; } readstat_error_t submit_columns(readstat_parser_t *parser, readstat_schema_t *dct, void *user_ctx) { int i; int partial_entry_count = 0; for (i=0; ientry_count; i++) { readstat_schema_entry_t *entry = &dct->entries[i]; if (dct->rows_per_observation < entry->row + 1) { dct->rows_per_observation = entry->row + 1; } } if (!parser->handlers.variable) return READSTAT_OK; for (i=0; ientry_count; i++) { readstat_schema_entry_t *entry = &dct->entries[i]; entry->variable.index = i; entry->variable.index_after_skipping = partial_entry_count; if (entry->variable.type == READSTAT_TYPE_STRING) entry->variable.storage_width = entry->len; int cb_retval = parser->handlers.variable(i, &entry->variable, entry->labelset[0] ? entry->labelset : NULL, user_ctx); if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) { entry->skip = 1; } else if (cb_retval == READSTAT_HANDLER_ABORT) { return READSTAT_ERROR_USER_ABORT; } else { partial_entry_count++; } } return READSTAT_OK; } ReadStat-1.1.7/src/txt/commands_util.h000066400000000000000000000007631410722155500176540ustar00rootroot00000000000000 typedef enum { LABEL_TYPE_NAN = -1, LABEL_TYPE_DOUBLE, LABEL_TYPE_STRING, LABEL_TYPE_RANGE, LABEL_TYPE_OTHER } label_type_t; readstat_error_t submit_columns(readstat_parser_t *parser, readstat_schema_t *dct, void *user_ctx); readstat_error_t submit_value_label(readstat_parser_t *parser, const char *labelset, label_type_t label_type, int64_t first_integer, int64_t last_integer, double double_value, const char *string_value, const char *buf, void *user_ctx); ReadStat-1.1.7/src/txt/readstat_copy.c000066400000000000000000000023001410722155500176370ustar00rootroot00000000000000#include #include #include void readstat_copy(char *buf, size_t buf_len, const char *str_start, size_t str_len) { size_t this_len = str_len; if (this_len >= buf_len) { this_len = buf_len - 1; } memcpy(buf, str_start, this_len); buf[this_len] = '\0'; } void readstat_copy_lower(char *buf, size_t buf_len, const char *str_start, size_t str_len) { int i; readstat_copy(buf, buf_len, str_start, str_len); for (i=0; i= buf_len) { this_len = buf_len - 1; } size_t i=0; size_t j=0; int slash = 0; for (i=0; i #include "../readstat.h" #include "../readstat_strings.h" #include "readstat_schema.h" #include "readstat_copy.h" #include "commands_util.h" #line 13 "src/txt/readstat_sas_commands_read.c" static const signed char _sas_commands_actions[] = { 0, 1, 0, 1, 1, 1, 2, 1, 3, 1, 7, 1, 12, 1, 13, 1, 16, 1, 18, 1, 19, 1, 20, 1, 21, 1, 22, 1, 23, 1, 24, 1, 25, 1, 26, 1, 28, 1, 33, 1, 34, 2, 0, 1, 2, 0, 23, 2, 1, 0, 2, 5, 32, 2, 6, 32, 2, 7, 12, 2, 7, 15, 2, 7, 17, 2, 7, 23, 2, 18, 19, 2, 20, 21, 2, 22, 0, 2, 22, 23, 2, 22, 34, 2, 24, 6, 2, 24, 8, 2, 24, 10, 2, 24, 11, 2, 24, 12, 2, 24, 23, 2, 29, 9, 2, 34, 23, 3, 4, 6, 13, 3, 5, 24, 6, 3, 5, 32, 14, 3, 6, 32, 14, 3, 7, 15, 13, 3, 7, 30, 9, 3, 13, 0, 1, 3, 18, 0, 2, 3, 22, 0, 1, 3, 22, 0, 23, 3, 22, 34, 23, 3, 24, 0, 1, 3, 24, 1, 0, 3, 27, 0, 1, 3, 29, 9, 31, 4, 4, 5, 13, 6, 4, 4, 6, 13, 5, 4, 5, 32, 14, 13, 4, 5, 32, 24, 6, 4, 6, 32, 14, 13, 4, 22, 27, 0, 1, 0 }; static const short _sas_commands_key_offsets[] = { 0, 0, 1, 3, 5, 7, 12, 23, 34, 35, 48, 54, 60, 61, 62, 63, 65, 76, 87, 88, 89, 90, 96, 102, 108, 109, 110, 111, 113, 114, 115, 116, 117, 119, 132, 133, 134, 136, 137, 142, 144, 145, 146, 148, 150, 152, 154, 156, 158, 163, 174, 185, 186, 198, 207, 216, 217, 218, 219, 221, 223, 225, 227, 229, 231, 237, 243, 244, 245, 246, 248, 255, 262, 263, 264, 265, 267, 270, 278, 294, 310, 311, 312, 313, 315, 329, 343, 357, 371, 385, 398, 408, 418, 419, 420, 421, 423, 427, 429, 431, 433, 439, 445, 446, 447, 448, 450, 457, 464, 465, 466, 467, 473, 474, 475, 476, 477, 479, 481, 483, 485, 487, 493, 499, 500, 501, 502, 504, 512, 520, 521, 528, 535, 536, 537, 538, 540, 548, 549, 550, 552, 560, 576, 590, 604, 618, 631, 641, 651, 652, 653, 654, 656, 670, 684, 698, 712, 725, 735, 745, 746, 747, 748, 750, 758, 759, 760, 762, 764, 766, 768, 779, 791, 803, 804, 820, 833, 846, 847, 853, 854, 855, 857, 858, 859, 871, 872, 873, 888, 900, 916, 917, 918, 920, 924, 926, 928, 930, 932, 934, 936, 941, 954, 967, 968, 981, 995, 1009, 1010, 1011, 1012, 1014, 1030, 1046, 1047, 1048, 1049, 1054, 1055, 1056, 1058, 1071, 1076, 1082, 1089, 1090, 1097, 1103, 1104, 1105, 1107, 1114, 1118, 1120, 1122, 1124, 1126, 1128, 1133, 1140, 1147, 1148, 1149, 1150, 1152, 1154, 1156, 1158, 1163, 1174, 1185, 1186, 1198, 1219, 1240, 1241, 1249, 1255, 1267, 1279, 1280, 1281, 1282, 1284, 1285, 1286, 1288, 1291, 1299, 1307, 1318, 1328, 1337, 1347, 1355, 1363, 1372, 1381, 1390, 1399, 1408, 1417, 1426, 1436, 1444, 1452, 1461, 1470, 1479, 1488, 1497, 1507, 1516, 1525, 1534, 1543, 1553, 1563, 1572, 1581, 1591, 1600, 1611, 1620, 1629, 1638, 1647, 1656, 1665, 1674, 1683, 1692, 1701, 1711, 1720, 1729, 1738, 1747, 1756, 1765, 1774, 1784, 1793, 1802, 1811, 1820, 1830, 1839, 1848, 1857, 1866, 1875, 1885, 1895, 1904, 1913, 1923, 1932, 1943, 1954, 1955, 1956, 1958, 1962, 1979, 1997, 2015, 2016, 2034, 2036, 2054, 2055, 2056, 2058, 2076, 2082, 2084, 2086, 2088, 2100, 2114, 2128, 2129, 2130, 2131, 2143, 2155, 2167, 2168, 2182, 2195, 2208, 2209, 2210, 2211, 2213, 2229, 2245, 2246, 2247, 2248, 2254, 2255, 2256, 2258, 2272, 2278, 2284, 2291, 2292, 2299, 2306, 2307, 2308, 2310, 2318, 2319, 2320, 2321, 2322, 2324, 2326, 2328, 2333, 2346, 2359, 2360, 2362, 2370, 2384, 2398, 2399, 2412, 2428, 2444, 2445, 2458, 2474, 2490, 2491, 2492, 2493, 2495, 2498, 2500, 2508, 2509, 2510, 2512, 2514, 2521, 2532, 2543, 2544, 2556, 2569, 2582, 2583, 2597, 2604, 2611, 2612, 2613, 2614, 2616, 2619, 2627, 2635, 2643, 2651, 2660, 2669, 2678, 2686, 2694, 2695, 2696, 2698, 2701, 2702, 2703, 2705, 2706, 2707, 2709, 2712, 2714, 2722, 2735, 2751, 2767, 2768, 2781, 2797, 2813, 2814, 2815, 2816, 2818, 2821, 2823, 2831, 2832, 2833, 2835, 2836, 2837, 2839, 2841, 2843, 2845, 2847, 2852, 2863, 2874, 2875, 2887, 2894, 2901, 2902, 2903, 2904, 2910, 2916, 2917, 2918, 2919, 2921, 2927, 2933, 2934, 2942, 2950, 2958, 2959, 2960, 2961, 2963, 2964, 2965, 2966, 2967, 2973, 2979, 2980, 2981, 2982, 2984, 2990, 2996, 2997, 2998, 3008, 3011, 3014, 3017, 3018, 3024, 3027, 3029, 3030, 3033, 3034, 3035, 3036, 3037, 3038, 3039, 3040, 3041, 3045, 3047, 3048, 3049, 3050, 3051, 3052, 3054, 3055, 3056, 3057, 3058, 3060, 3062, 3063, 3064, 3067, 3068, 3071, 3072, 3073, 3074, 3075, 3076, 3077, 3078, 3079, 3080, 3082, 3084, 3085, 3086, 3088, 3089, 3090, 3091, 3092, 3094, 3095, 3096, 3097, 3098, 3100, 3101, 3102, 3103, 3104, 3105, 3107, 3109, 3110, 3111, 3113, 3114, 3118, 3122, 3123, 3124, 3126, 3127, 3128, 3130, 3131, 3132, 3134, 3135, 3136, 3138, 3144, 3146, 3148, 3150, 3155, 3166, 3177, 3178, 3191, 3197, 3203, 3204, 3205, 3206, 3208, 3215, 3222, 3223, 3224, 3225, 3231, 3243, 3255, 3256, 3257, 3258, 3260, 3261, 3262, 3263, 3264, 3266, 3267, 3268, 3270, 3272, 3274, 3276, 3278, 3283, 3294, 3305, 3306, 3318, 3326, 3334, 3335, 3342, 3349, 3350, 3351, 3352, 3354, 3362, 3374, 3386, 3387, 3388, 3389, 3391, 3392, 3393, 3395, 3403, 3404, 3405, 3407, 3409, 3411, 3413, 3415, 3417, 3422, 3433, 3444, 3445, 3457, 3469, 3481, 3482, 3483, 3484, 3490, 3491, 3492, 3493, 3494, 3496, 3498, 3500, 3502, 3504, 3506, 3508, 3510, 3512, 3514, 3516, 3517, 3526, 3535, 3536, 3537, 3538, 3540, 3541, 3542, 3544, 3546, 3548, 3550, 3552, 3554, 3556, 3561, 3572, 3583, 3584, 3597, 3598, 3599, 3601, 3603, 3605, 3607, 3609, 3611, 3613, 3618, 3629, 3640, 3641, 3642, 3643, 3645, 3647, 3649, 3651, 3656, 3667, 3678, 3679, 3680, 3681, 3683, 3685, 3687, 3689, 3691, 3693, 3695, 3697, 3699, 3701, 3703, 3705, 3717, 3729, 3730, 3744, 3757, 3770, 3771, 3772, 3773, 3775, 3783, 3784, 3785, 3786, 3788, 3790, 3792, 3794, 3796, 3798, 3800, 3805, 3816, 3827, 3828, 3840, 3856, 3872, 3873, 3874, 3875, 3881, 3887, 3893, 3894, 3895, 3896, 3898, 3905, 3912, 3913, 3914, 3915, 3921, 3937, 3953, 3954, 3955, 3956, 3958, 3966, 3967, 3968, 3970, 3978, 3985, 3992, 3993, 3998, 4005, 4012, 4013, 4014, 4015, 4017, 4025, 4026, 4027, 4029, 4042, 4056, 4070, 4084, 4098, 4111, 4112, 4113, 4114, 4115, 4117, 4123, 4137, 4149, 4161, 4162, 4163, 4164, 4166, 4182, 4198, 4199, 4200, 4201, 4207, 4218, 4229, 4230, 4231, 4232, 4234, 4249, 4264, 4265, 4266, 4267, 4269, 4283, 4289, 4295, 4302, 4303, 4310, 4317, 4318, 4319, 4321, 4329, 4330, 4331, 4333, 4334, 4335, 4337, 4353, 4369, 4370, 4371, 4372, 4378, 4379, 4380, 4382, 4396, 4402, 4408, 4415, 4416, 4423, 4430, 4431, 4432, 4434, 4442, 4444, 4446, 4448, 4450, 4455, 4466, 4477, 4478, 4492, 4506, 4520, 4521, 4527, 4541, 4553, 4565, 4566, 4567, 4568, 4570, 4586, 4602, 4603, 4604, 4605, 4611, 4622, 4633, 4634, 4635, 4636, 4638, 4652, 4658, 4664, 4671, 4672, 4679, 4686, 4687, 4688, 4690, 4698, 4699, 4700, 4702, 4718, 4734, 4735, 4736, 4737, 4743, 4756, 4769, 4770, 4771, 4772, 4774, 4788, 4794, 4800, 4807, 4808, 4815, 4822, 4823, 4824, 4826, 4834, 4835, 4836, 4838, 4840, 4842, 4844, 4846, 4851, 4862, 4873, 4874, 4886, 4902, 4918, 4919, 4920, 4921, 4927, 4933, 4939, 4940, 4941, 4942, 4944, 4951, 4958, 4959, 4960, 4961, 4967, 4983, 4999, 5000, 5001, 5002, 5004, 5012, 5013, 5014, 5016, 5024, 5031, 5038, 5039, 5044, 5051, 5058, 5059, 5060, 5061, 5063, 5071, 5072, 5073, 5075, 5088, 5102, 5116, 5130, 5144, 5157, 5158, 5159, 5160, 5161, 5163, 5169, 5183, 5195, 5207, 5208, 5209, 5210, 5212, 5228, 5244, 5245, 5246, 5247, 5253, 5264, 5275, 5276, 5277, 5278, 5280, 5295, 5310, 5311, 5312, 5313, 5315, 5329, 5335, 5341, 5348, 5349, 5356, 5363, 5364, 5365, 5367, 5375, 5376, 5377, 5379, 5380, 5381, 5383, 5410, 5437, 5465, 0 }; static const char _sas_commands_trans_keys[] = { 10, 76, 108, 69, 101, 84, 116, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 47, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 47, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 47, 59, 10, 42, 42, 42, 47, 39, 39, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 95, 46, 57, 65, 90, 97, 122, 42, 42, 42, 47, 59, 9, 10, 13, 32, 59, 10, 59, 42, 42, 42, 47, 84, 116, 84, 116, 82, 114, 73, 105, 66, 98, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 70, 76, 102, 108, 9, 10, 13, 32, 47, 70, 76, 102, 108, 10, 42, 42, 42, 47, 79, 111, 82, 114, 77, 109, 65, 97, 84, 116, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 46, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 36, 47, 59, 70, 76, 95, 102, 108, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 70, 76, 95, 102, 108, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 79, 95, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 82, 95, 114, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 77, 95, 109, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 65, 95, 97, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 47, 84, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 10, 42, 42, 42, 47, 65, 69, 97, 101, 66, 98, 69, 101, 76, 108, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 47, 9, 10, 13, 32, 34, 39, 47, 10, 34, 34, 9, 10, 13, 32, 47, 59, 39, 39, 42, 42, 42, 47, 78, 110, 71, 103, 84, 116, 72, 104, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 36, 47, 48, 57, 9, 10, 13, 32, 36, 47, 48, 57, 10, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 47, 65, 69, 95, 97, 101, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 47, 66, 95, 98, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 69, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 76, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 78, 95, 110, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 71, 95, 103, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 84, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 72, 95, 104, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 9, 10, 13, 32, 47, 61, 70, 76, 102, 108, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 65, 97, 84, 116, 65, 97, 9, 10, 13, 32, 34, 39, 47, 65, 90, 97, 122, 9, 10, 13, 32, 36, 38, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 38, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 34, 39, 46, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 38, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 38, 47, 59, 95, 65, 90, 97, 122, 10, 36, 95, 65, 90, 97, 122, 42, 42, 42, 47, 34, 34, 9, 10, 13, 32, 34, 39, 47, 59, 65, 90, 97, 122, 39, 39, 9, 10, 13, 32, 34, 39, 47, 59, 95, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 47, 59, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 46, 47, 59, 95, 48, 57, 65, 90, 97, 122, 42, 42, 42, 47, 73, 79, 105, 111, 76, 108, 69, 101, 78, 110, 65, 97, 77, 109, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 34, 36, 39, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 39, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 39, 47, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 39, 47, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 42, 42, 42, 47, 9, 10, 13, 32, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 47, 120, 42, 42, 42, 47, 9, 10, 13, 32, 47, 48, 57, 79, 82, 111, 114, 84, 116, 78, 110, 79, 111, 84, 116, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 34, 39, 47, 9, 10, 13, 32, 34, 39, 47, 10, 42, 42, 42, 47, 77, 109, 65, 97, 84, 116, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 68, 74, 77, 80, 81, 84, 87, 89, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 68, 74, 77, 80, 81, 84, 87, 89, 95, 48, 57, 65, 90, 97, 122, 10, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 42, 42, 42, 47, 46, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 46, 65, 68, 79, 95, 48, 57, 66, 90, 97, 122, 46, 84, 89, 95, 48, 57, 65, 90, 97, 122, 46, 69, 95, 48, 57, 65, 90, 97, 122, 46, 57, 84, 95, 48, 56, 65, 90, 97, 122, 9, 10, 13, 32, 47, 59, 48, 57, 46, 95, 48, 57, 65, 90, 97, 122, 46, 73, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 69, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 89, 95, 48, 57, 65, 90, 97, 122, 46, 89, 95, 48, 57, 65, 90, 97, 122, 46, 78, 83, 95, 48, 57, 65, 90, 97, 122, 46, 95, 48, 57, 65, 90, 97, 122, 46, 95, 48, 57, 65, 90, 97, 122, 46, 87, 95, 48, 57, 65, 90, 97, 122, 46, 78, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 85, 95, 48, 57, 65, 90, 97, 122, 46, 76, 95, 48, 57, 65, 90, 97, 122, 46, 68, 73, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 89, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 78, 95, 48, 57, 65, 90, 97, 122, 46, 77, 79, 95, 48, 57, 65, 90, 97, 122, 46, 68, 89, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 89, 95, 48, 57, 65, 90, 97, 122, 46, 78, 83, 95, 48, 57, 65, 90, 97, 122, 46, 78, 95, 48, 57, 65, 90, 97, 122, 46, 78, 84, 89, 95, 48, 57, 65, 90, 97, 122, 46, 72, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 70, 95, 48, 57, 65, 90, 97, 122, 46, 74, 95, 48, 57, 65, 90, 97, 122, 46, 85, 95, 48, 57, 65, 90, 97, 122, 46, 76, 95, 48, 57, 65, 90, 97, 122, 46, 71, 95, 48, 57, 65, 90, 97, 122, 46, 84, 95, 48, 57, 65, 90, 97, 122, 46, 82, 95, 48, 57, 65, 90, 97, 122, 46, 82, 95, 48, 57, 65, 90, 97, 122, 46, 73, 79, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 69, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 80, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 69, 79, 95, 48, 57, 65, 90, 97, 122, 46, 69, 95, 48, 57, 65, 90, 97, 122, 46, 75, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 84, 89, 95, 48, 57, 65, 90, 97, 122, 46, 82, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 68, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 84, 95, 48, 57, 65, 90, 97, 122, 46, 69, 88, 95, 48, 57, 65, 90, 97, 122, 46, 69, 89, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 82, 95, 48, 57, 65, 90, 97, 122, 46, 77, 81, 95, 48, 57, 65, 90, 97, 122, 46, 77, 95, 48, 57, 65, 90, 97, 122, 46, 68, 78, 83, 95, 48, 57, 65, 90, 97, 122, 46, 78, 82, 83, 95, 48, 57, 65, 90, 97, 122, 42, 42, 42, 47, 70, 78, 102, 110, 9, 10, 13, 32, 36, 45, 46, 47, 95, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 45, 46, 47, 59, 95, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 45, 46, 47, 59, 95, 40, 41, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 36, 45, 46, 47, 59, 95, 40, 41, 48, 57, 65, 90, 97, 122, 48, 57, 9, 10, 13, 32, 36, 45, 46, 47, 59, 95, 40, 41, 48, 57, 65, 90, 97, 122, 42, 42, 42, 47, 9, 10, 13, 32, 36, 45, 46, 47, 59, 95, 40, 41, 48, 57, 65, 90, 97, 122, 70, 80, 86, 102, 112, 118, 73, 105, 76, 108, 69, 101, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 39, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 39, 47, 59, 95, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 59, 42, 42, 42, 47, 9, 10, 13, 32, 46, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 59, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 47, 59, 120, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 39, 39, 42, 42, 42, 47, 85, 117, 84, 116, 9, 10, 13, 32, 47, 9, 10, 13, 32, 35, 36, 47, 64, 95, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 64, 95, 65, 90, 97, 122, 10, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 10, 42, 42, 42, 47, 45, 48, 57, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 47, 67, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 46, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 46, 95, 48, 57, 65, 90, 97, 122, 46, 95, 48, 57, 65, 90, 97, 122, 46, 72, 95, 48, 57, 65, 90, 97, 122, 46, 65, 95, 48, 57, 66, 90, 97, 122, 46, 82, 95, 48, 57, 65, 90, 97, 122, 46, 95, 48, 57, 65, 90, 97, 122, 46, 95, 48, 57, 65, 90, 97, 122, 42, 42, 42, 47, 46, 48, 57, 42, 42, 42, 47, 42, 42, 42, 47, 45, 48, 57, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 35, 36, 47, 59, 64, 95, 48, 57, 65, 90, 97, 122, 10, 42, 42, 42, 47, 45, 48, 57, 48, 57, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 42, 42, 42, 47, 65, 97, 76, 108, 85, 117, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 39, 47, 79, 9, 10, 13, 32, 39, 47, 79, 10, 39, 39, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 9, 10, 13, 32, 47, 59, 65, 90, 9, 10, 13, 32, 39, 47, 59, 79, 9, 10, 13, 32, 39, 47, 59, 79, 10, 42, 42, 42, 47, 84, 72, 69, 82, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 40, 47, 9, 10, 13, 32, 40, 47, 10, 124, 68, 74, 77, 80, 81, 84, 87, 89, 48, 57, 46, 48, 57, 124, 48, 57, 124, 48, 57, 41, 9, 10, 13, 32, 47, 59, 65, 68, 79, 84, 89, 69, 46, 57, 84, 46, 73, 77, 69, 77, 77, 89, 89, 78, 83, 48, 57, 48, 57, 87, 78, 65, 85, 76, 68, 73, 65, 89, 65, 78, 77, 79, 68, 89, 68, 89, 46, 78, 83, 78, 78, 84, 89, 72, 68, 70, 74, 85, 76, 71, 84, 82, 46, 82, 73, 79, 77, 69, 46, 65, 77, 80, 77, 68, 69, 79, 69, 75, 68, 65, 84, 89, 82, 68, 68, 65, 84, 69, 88, 69, 89, 65, 82, 77, 81, 77, 46, 68, 78, 83, 46, 78, 82, 83, 42, 42, 42, 47, 42, 42, 42, 47, 42, 42, 42, 47, 42, 42, 42, 47, 65, 69, 73, 97, 101, 105, 66, 98, 69, 101, 76, 108, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 47, 9, 10, 13, 32, 34, 39, 47, 10, 34, 34, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 39, 39, 42, 42, 42, 47, 42, 42, 42, 47, 78, 110, 71, 103, 84, 116, 72, 104, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 48, 57, 9, 10, 13, 32, 36, 47, 48, 57, 10, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 66, 98, 78, 110, 65, 97, 77, 109, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 47, 67, 76, 95, 99, 108, 9, 10, 13, 32, 34, 39, 47, 67, 76, 95, 99, 108, 10, 34, 34, 9, 10, 13, 32, 47, 59, 39, 39, 42, 42, 42, 47, 76, 108, 69, 101, 65, 97, 82, 114, 73, 105, 83, 115, 84, 116, 65, 97, 76, 108, 76, 108, 95, 9, 10, 13, 32, 47, 67, 76, 99, 108, 9, 10, 13, 32, 47, 67, 76, 99, 108, 10, 42, 42, 42, 47, 42, 42, 42, 47, 73, 105, 83, 115, 83, 115, 73, 105, 78, 110, 71, 103, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 95, 48, 57, 65, 90, 97, 122, 42, 42, 42, 47, 80, 112, 84, 116, 73, 105, 79, 111, 78, 110, 83, 115, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 82, 114, 79, 111, 67, 99, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 67, 70, 80, 99, 102, 112, 9, 10, 13, 32, 47, 67, 70, 80, 99, 102, 112, 10, 42, 42, 42, 47, 79, 111, 78, 110, 84, 116, 69, 101, 78, 110, 84, 116, 79, 111, 82, 114, 77, 109, 65, 97, 84, 116, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 59, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 86, 118, 10, 42, 42, 42, 47, 85, 117, 78, 110, 65, 97, 76, 108, 85, 117, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 47, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 47, 111, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 47, 9, 10, 13, 32, 34, 39, 47, 10, 34, 34, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 34, 39, 45, 47, 59, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 47, 59, 111, 48, 57, 65, 90, 97, 122, 10, 39, 39, 48, 57, 9, 10, 13, 32, 47, 61, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 48, 57, 9, 10, 13, 32, 45, 47, 61, 9, 10, 13, 32, 45, 47, 61, 10, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 95, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 116, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 104, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 101, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 114, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 46, 57, 65, 90, 97, 122, 39, 39, 42, 42, 42, 47, 36, 95, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 41, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 45, 47, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 47, 111, 48, 57, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 41, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 41, 47, 120, 42, 42, 42, 47, 9, 10, 13, 32, 41, 47, 48, 57, 42, 42, 42, 47, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 59, 42, 42, 42, 47, 9, 10, 13, 32, 46, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 59, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 47, 59, 120, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 82, 114, 73, 105, 78, 110, 84, 116, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 59, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 40, 47, 59, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 40, 47, 59, 61, 95, 65, 90, 97, 122, 10, 36, 95, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 41, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 41, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 41, 47, 120, 42, 42, 42, 47, 9, 10, 13, 32, 41, 47, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 36, 40, 47, 59, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 40, 47, 59, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 46, 47, 59, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 59, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 47, 59, 120, 42, 42, 42, 47, 9, 10, 13, 32, 47, 59, 48, 57, 42, 42, 42, 47, 65, 97, 76, 108, 85, 117, 69, 101, 9, 10, 13, 32, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 47, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 47, 111, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 9, 10, 13, 32, 47, 61, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 47, 9, 10, 13, 32, 34, 39, 47, 10, 34, 34, 9, 10, 13, 32, 47, 59, 9, 10, 13, 32, 34, 39, 45, 47, 59, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 47, 59, 111, 48, 57, 65, 90, 97, 122, 10, 39, 39, 48, 57, 9, 10, 13, 32, 47, 61, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 48, 57, 9, 10, 13, 32, 45, 47, 61, 9, 10, 13, 32, 45, 47, 61, 10, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 48, 57, 9, 10, 13, 32, 47, 48, 57, 10, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 48, 57, 42, 42, 42, 47, 9, 10, 13, 32, 47, 61, 95, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 116, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 104, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 101, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 114, 46, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 61, 95, 46, 57, 65, 90, 97, 122, 39, 39, 42, 42, 42, 47, 36, 95, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 61, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 36, 38, 39, 47, 95, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 41, 47, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 9, 10, 13, 32, 36, 47, 95, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 34, 39, 45, 47, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 47, 111, 48, 57, 65, 90, 97, 122, 10, 42, 42, 42, 47, 9, 10, 13, 32, 41, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 41, 47, 36, 95, 65, 90, 97, 122, 39, 48, 57, 65, 70, 97, 102, 39, 39, 48, 57, 65, 70, 97, 102, 9, 10, 13, 32, 41, 47, 120, 42, 42, 42, 47, 9, 10, 13, 32, 41, 47, 48, 57, 42, 42, 42, 47, 42, 42, 42, 47, 9, 10, 13, 32, 37, 42, 47, 65, 68, 70, 73, 76, 77, 79, 80, 82, 86, 97, 100, 102, 105, 108, 109, 111, 112, 114, 118, 9, 10, 13, 32, 37, 42, 47, 65, 68, 70, 73, 76, 77, 79, 80, 82, 86, 97, 100, 102, 105, 108, 109, 111, 112, 114, 118, 9, 10, 13, 32, 37, 42, 47, 59, 65, 68, 70, 73, 76, 77, 79, 80, 82, 86, 97, 100, 102, 105, 108, 109, 111, 112, 114, 118, 9, 10, 13, 32, 37, 42, 47, 59, 65, 68, 70, 73, 76, 77, 79, 80, 82, 86, 97, 100, 102, 105, 108, 109, 111, 112, 114, 118, 0 }; static const signed char _sas_commands_single_lengths[] = { 0, 1, 2, 2, 2, 5, 7, 7, 1, 7, 6, 6, 1, 1, 1, 2, 7, 7, 1, 1, 1, 6, 6, 6, 1, 1, 1, 2, 1, 1, 1, 1, 2, 7, 1, 1, 2, 1, 5, 2, 1, 1, 2, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 9, 9, 1, 1, 1, 2, 2, 2, 2, 2, 2, 6, 6, 1, 1, 1, 2, 5, 5, 1, 1, 1, 2, 1, 6, 12, 12, 1, 1, 1, 2, 8, 8, 8, 8, 8, 7, 10, 10, 1, 1, 1, 2, 4, 2, 2, 2, 6, 6, 1, 1, 1, 2, 7, 7, 1, 1, 1, 6, 1, 1, 1, 1, 2, 2, 2, 2, 2, 6, 6, 1, 1, 1, 2, 6, 6, 1, 5, 5, 1, 1, 1, 2, 6, 1, 1, 2, 6, 10, 8, 8, 8, 7, 10, 10, 1, 1, 1, 2, 8, 8, 8, 8, 7, 10, 10, 1, 1, 1, 2, 6, 1, 1, 2, 2, 2, 2, 7, 8, 8, 1, 10, 9, 9, 1, 2, 1, 1, 2, 1, 1, 8, 1, 1, 9, 8, 10, 1, 1, 2, 4, 2, 2, 2, 2, 2, 2, 5, 9, 9, 1, 7, 10, 10, 1, 1, 1, 2, 10, 10, 1, 1, 1, 5, 1, 1, 2, 7, 5, 2, 1, 1, 1, 6, 1, 1, 2, 5, 4, 2, 2, 2, 2, 2, 5, 7, 7, 1, 1, 1, 2, 2, 2, 2, 5, 7, 7, 1, 6, 15, 15, 1, 2, 6, 8, 8, 1, 1, 1, 2, 1, 1, 2, 1, 6, 6, 5, 4, 3, 4, 6, 2, 3, 3, 3, 3, 3, 3, 3, 4, 2, 2, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 4, 3, 3, 4, 3, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 4, 4, 3, 3, 4, 3, 5, 5, 1, 1, 2, 4, 9, 10, 10, 1, 10, 0, 10, 1, 1, 2, 10, 6, 2, 2, 2, 8, 10, 10, 1, 1, 1, 8, 8, 8, 1, 8, 9, 9, 1, 1, 1, 2, 10, 10, 1, 1, 1, 6, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 1, 1, 1, 1, 2, 2, 2, 5, 9, 9, 1, 0, 6, 10, 10, 1, 7, 10, 10, 1, 7, 10, 10, 1, 1, 1, 2, 1, 0, 6, 1, 1, 2, 0, 5, 7, 7, 1, 6, 7, 7, 1, 8, 5, 5, 1, 1, 1, 2, 1, 6, 6, 2, 2, 3, 3, 3, 2, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 0, 6, 7, 10, 10, 1, 7, 10, 10, 1, 1, 1, 2, 1, 0, 6, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 7, 7, 1, 1, 1, 6, 6, 1, 1, 1, 2, 6, 6, 1, 6, 8, 8, 1, 1, 1, 2, 1, 1, 1, 1, 6, 6, 1, 1, 1, 2, 6, 6, 1, 1, 8, 1, 1, 1, 1, 6, 3, 2, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 4, 4, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 6, 2, 2, 2, 5, 7, 7, 1, 7, 6, 6, 1, 1, 1, 2, 7, 7, 1, 1, 1, 6, 8, 8, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 6, 6, 1, 5, 5, 1, 1, 1, 2, 6, 8, 8, 1, 1, 1, 2, 1, 1, 2, 6, 1, 1, 2, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 12, 12, 1, 1, 1, 6, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 9, 9, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 5, 7, 7, 1, 7, 1, 1, 2, 2, 2, 2, 2, 2, 2, 5, 7, 7, 1, 1, 1, 2, 2, 2, 2, 5, 11, 11, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 1, 8, 9, 9, 1, 1, 1, 2, 8, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 10, 10, 1, 1, 1, 6, 6, 6, 1, 1, 1, 2, 7, 7, 1, 1, 1, 6, 10, 10, 1, 1, 1, 0, 6, 1, 1, 2, 6, 7, 7, 1, 5, 5, 5, 1, 1, 1, 2, 6, 1, 1, 2, 7, 8, 8, 8, 8, 7, 1, 1, 1, 1, 2, 2, 8, 8, 8, 1, 1, 1, 2, 10, 10, 1, 1, 1, 6, 7, 7, 1, 1, 1, 2, 9, 9, 1, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 1, 1, 2, 1, 1, 2, 10, 10, 1, 1, 1, 6, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 2, 2, 2, 2, 5, 7, 7, 1, 8, 10, 10, 1, 2, 8, 8, 8, 1, 1, 1, 2, 10, 10, 1, 1, 1, 6, 7, 7, 1, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 1, 1, 2, 10, 10, 1, 1, 1, 6, 9, 9, 1, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 1, 1, 2, 2, 2, 2, 2, 5, 7, 7, 1, 6, 10, 10, 1, 1, 1, 6, 6, 6, 1, 1, 1, 2, 7, 7, 1, 1, 1, 6, 10, 10, 1, 1, 1, 0, 6, 1, 1, 2, 6, 7, 7, 1, 5, 5, 5, 1, 1, 1, 2, 6, 1, 1, 2, 7, 8, 8, 8, 8, 7, 1, 1, 1, 1, 2, 2, 8, 8, 8, 1, 1, 1, 2, 10, 10, 1, 1, 1, 6, 7, 7, 1, 1, 1, 2, 9, 9, 1, 1, 1, 2, 8, 6, 2, 1, 1, 1, 7, 1, 1, 2, 6, 1, 1, 2, 1, 1, 2, 27, 27, 28, 28, 0 }; static const signed char _sas_commands_range_lengths[] = { 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 2, 2, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 3, 2, 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 3, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 3, 3, 0, 3, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 4, 4, 4, 0, 4, 1, 4, 0, 0, 0, 4, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 2, 2, 2, 0, 3, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 1, 2, 2, 0, 3, 3, 3, 0, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 2, 2, 0, 3, 3, 3, 0, 3, 1, 1, 0, 0, 0, 0, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3, 3, 0, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 2, 3, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 2, 0, 3, 2, 2, 0, 2, 3, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 3, 3, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 2, 3, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 3, 0, 2, 3, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const short _sas_commands_index_offsets[] = { 0, 0, 2, 5, 8, 11, 17, 27, 37, 39, 50, 57, 64, 66, 68, 70, 73, 83, 93, 95, 97, 99, 106, 113, 120, 122, 124, 126, 129, 131, 133, 135, 137, 140, 151, 153, 155, 158, 160, 166, 169, 171, 173, 176, 179, 182, 185, 188, 191, 197, 207, 217, 219, 229, 239, 249, 251, 253, 255, 258, 261, 264, 267, 270, 273, 280, 287, 289, 291, 293, 296, 303, 310, 312, 314, 316, 319, 322, 330, 345, 360, 362, 364, 366, 369, 381, 393, 405, 417, 429, 440, 451, 462, 464, 466, 468, 471, 476, 479, 482, 485, 492, 499, 501, 503, 505, 508, 516, 524, 526, 528, 530, 537, 539, 541, 543, 545, 548, 551, 554, 557, 560, 567, 574, 576, 578, 580, 583, 591, 599, 601, 608, 615, 617, 619, 621, 624, 632, 634, 636, 639, 647, 661, 673, 685, 697, 708, 719, 730, 732, 734, 736, 739, 751, 763, 775, 787, 798, 809, 820, 822, 824, 826, 829, 837, 839, 841, 844, 847, 850, 853, 863, 874, 885, 887, 901, 913, 925, 927, 932, 934, 936, 939, 941, 943, 954, 956, 958, 971, 982, 996, 998, 1000, 1003, 1008, 1011, 1014, 1017, 1020, 1023, 1026, 1032, 1044, 1056, 1058, 1069, 1082, 1095, 1097, 1099, 1101, 1104, 1118, 1132, 1134, 1136, 1138, 1144, 1146, 1148, 1151, 1162, 1168, 1173, 1178, 1180, 1185, 1192, 1194, 1196, 1199, 1206, 1211, 1214, 1217, 1220, 1223, 1226, 1232, 1240, 1248, 1250, 1252, 1254, 1257, 1260, 1263, 1266, 1272, 1282, 1292, 1294, 1304, 1323, 1342, 1344, 1350, 1357, 1368, 1379, 1381, 1383, 1385, 1388, 1390, 1392, 1395, 1398, 1406, 1414, 1423, 1431, 1438, 1446, 1454, 1460, 1467, 1474, 1481, 1488, 1495, 1502, 1509, 1517, 1523, 1529, 1536, 1543, 1550, 1557, 1564, 1572, 1579, 1586, 1593, 1600, 1608, 1616, 1623, 1630, 1638, 1645, 1654, 1661, 1668, 1675, 1682, 1689, 1696, 1703, 1710, 1717, 1724, 1732, 1739, 1746, 1753, 1760, 1767, 1774, 1781, 1789, 1796, 1803, 1810, 1817, 1825, 1832, 1839, 1846, 1853, 1860, 1868, 1876, 1883, 1890, 1898, 1905, 1914, 1923, 1925, 1927, 1930, 1935, 1949, 1964, 1979, 1981, 1996, 1998, 2013, 2015, 2017, 2020, 2035, 2042, 2045, 2048, 2051, 2062, 2075, 2088, 2090, 2092, 2094, 2105, 2116, 2127, 2129, 2141, 2153, 2165, 2167, 2169, 2171, 2174, 2188, 2202, 2204, 2206, 2208, 2215, 2217, 2219, 2222, 2234, 2241, 2246, 2251, 2253, 2258, 2266, 2268, 2270, 2273, 2281, 2283, 2285, 2287, 2289, 2292, 2295, 2298, 2304, 2316, 2328, 2330, 2332, 2340, 2353, 2366, 2368, 2379, 2393, 2407, 2409, 2420, 2434, 2448, 2450, 2452, 2454, 2457, 2460, 2462, 2470, 2472, 2474, 2477, 2479, 2486, 2496, 2506, 2508, 2518, 2529, 2540, 2542, 2554, 2561, 2568, 2570, 2572, 2574, 2577, 2580, 2588, 2596, 2602, 2608, 2615, 2622, 2629, 2635, 2641, 2643, 2645, 2648, 2651, 2653, 2655, 2658, 2660, 2662, 2665, 2668, 2670, 2678, 2689, 2703, 2717, 2719, 2730, 2744, 2758, 2760, 2762, 2764, 2767, 2770, 2772, 2780, 2782, 2784, 2787, 2789, 2791, 2794, 2797, 2800, 2803, 2806, 2812, 2822, 2832, 2834, 2844, 2852, 2860, 2862, 2864, 2866, 2873, 2880, 2882, 2884, 2886, 2889, 2896, 2903, 2905, 2913, 2922, 2931, 2933, 2935, 2937, 2940, 2942, 2944, 2946, 2948, 2955, 2962, 2964, 2966, 2968, 2971, 2978, 2985, 2987, 2989, 2999, 3002, 3005, 3008, 3010, 3017, 3021, 3024, 3026, 3030, 3032, 3034, 3036, 3038, 3040, 3042, 3044, 3046, 3050, 3052, 3054, 3056, 3058, 3060, 3062, 3065, 3067, 3069, 3071, 3073, 3076, 3079, 3081, 3083, 3087, 3089, 3093, 3095, 3097, 3099, 3101, 3103, 3105, 3107, 3109, 3111, 3114, 3117, 3119, 3121, 3124, 3126, 3128, 3130, 3132, 3135, 3137, 3139, 3141, 3143, 3146, 3148, 3150, 3152, 3154, 3156, 3159, 3162, 3164, 3166, 3169, 3171, 3176, 3181, 3183, 3185, 3188, 3190, 3192, 3195, 3197, 3199, 3202, 3204, 3206, 3209, 3216, 3219, 3222, 3225, 3231, 3241, 3251, 3253, 3264, 3271, 3278, 3280, 3282, 3284, 3287, 3295, 3303, 3305, 3307, 3309, 3316, 3327, 3338, 3340, 3342, 3344, 3347, 3349, 3351, 3353, 3355, 3358, 3360, 3362, 3365, 3368, 3371, 3374, 3377, 3383, 3393, 3403, 3405, 3415, 3423, 3431, 3433, 3440, 3447, 3449, 3451, 3453, 3456, 3464, 3475, 3486, 3488, 3490, 3492, 3495, 3497, 3499, 3502, 3510, 3512, 3514, 3517, 3520, 3523, 3526, 3529, 3532, 3538, 3548, 3558, 3560, 3570, 3583, 3596, 3598, 3600, 3602, 3609, 3611, 3613, 3615, 3617, 3620, 3623, 3626, 3629, 3632, 3635, 3638, 3641, 3644, 3647, 3650, 3652, 3662, 3672, 3674, 3676, 3678, 3681, 3683, 3685, 3688, 3691, 3694, 3697, 3700, 3703, 3706, 3712, 3722, 3732, 3734, 3745, 3747, 3749, 3752, 3755, 3758, 3761, 3764, 3767, 3770, 3776, 3786, 3796, 3798, 3800, 3802, 3805, 3808, 3811, 3814, 3820, 3832, 3844, 3846, 3848, 3850, 3853, 3856, 3859, 3862, 3865, 3868, 3871, 3874, 3877, 3880, 3883, 3886, 3897, 3908, 3910, 3922, 3934, 3946, 3948, 3950, 3952, 3955, 3964, 3966, 3968, 3970, 3973, 3976, 3979, 3982, 3985, 3988, 3991, 3997, 4007, 4017, 4019, 4029, 4043, 4057, 4059, 4061, 4063, 4070, 4077, 4084, 4086, 4088, 4090, 4093, 4101, 4109, 4111, 4113, 4115, 4122, 4136, 4150, 4152, 4154, 4156, 4158, 4166, 4168, 4170, 4173, 4181, 4189, 4197, 4199, 4205, 4212, 4219, 4221, 4223, 4225, 4228, 4236, 4238, 4240, 4243, 4254, 4266, 4278, 4290, 4302, 4313, 4315, 4317, 4319, 4321, 4324, 4329, 4341, 4352, 4363, 4365, 4367, 4369, 4372, 4386, 4400, 4402, 4404, 4406, 4413, 4423, 4433, 4435, 4437, 4439, 4442, 4455, 4468, 4470, 4472, 4474, 4477, 4489, 4496, 4501, 4506, 4508, 4513, 4521, 4523, 4525, 4528, 4536, 4538, 4540, 4543, 4545, 4547, 4550, 4564, 4578, 4580, 4582, 4584, 4591, 4593, 4595, 4598, 4610, 4617, 4622, 4627, 4629, 4634, 4642, 4644, 4646, 4649, 4657, 4660, 4663, 4666, 4669, 4675, 4685, 4695, 4697, 4709, 4722, 4735, 4737, 4742, 4754, 4765, 4776, 4778, 4780, 4782, 4785, 4799, 4813, 4815, 4817, 4819, 4826, 4836, 4846, 4848, 4850, 4852, 4855, 4867, 4874, 4879, 4884, 4886, 4891, 4899, 4901, 4903, 4906, 4914, 4916, 4918, 4921, 4935, 4949, 4951, 4953, 4955, 4962, 4974, 4986, 4988, 4990, 4992, 4995, 5007, 5014, 5019, 5024, 5026, 5031, 5039, 5041, 5043, 5046, 5054, 5056, 5058, 5061, 5064, 5067, 5070, 5073, 5079, 5089, 5099, 5101, 5111, 5125, 5139, 5141, 5143, 5145, 5152, 5159, 5166, 5168, 5170, 5172, 5175, 5183, 5191, 5193, 5195, 5197, 5204, 5218, 5232, 5234, 5236, 5238, 5240, 5248, 5250, 5252, 5255, 5263, 5271, 5279, 5281, 5287, 5294, 5301, 5303, 5305, 5307, 5310, 5318, 5320, 5322, 5325, 5336, 5348, 5360, 5372, 5384, 5395, 5397, 5399, 5401, 5403, 5406, 5411, 5423, 5434, 5445, 5447, 5449, 5451, 5454, 5468, 5482, 5484, 5486, 5488, 5495, 5505, 5515, 5517, 5519, 5521, 5524, 5537, 5550, 5552, 5554, 5556, 5559, 5571, 5578, 5583, 5588, 5590, 5595, 5603, 5605, 5607, 5610, 5618, 5620, 5622, 5625, 5627, 5629, 5632, 5660, 5688, 5717, 0 }; static const short _sas_commands_cond_targs[] = { 1095, 0, 3, 3, 0, 4, 4, 0, 5, 5, 0, 6, 7, 8, 6, 34, 0, 6, 7, 8, 6, 9, 34, 9, 9, 9, 0, 6, 7, 8, 6, 9, 34, 9, 9, 9, 0, 7, 0, 10, 11, 12, 10, 13, 16, 9, 9, 9, 9, 0, 10, 11, 12, 10, 13, 16, 0, 10, 11, 12, 10, 13, 16, 0, 11, 0, 14, 0, 15, 14, 15, 10, 14, 16, 17, 18, 16, 19, 28, 30, 33, 33, 0, 16, 17, 18, 16, 19, 28, 30, 33, 33, 0, 17, 0, 21, 20, 21, 20, 22, 23, 24, 22, 25, 1094, 0, 22, 23, 24, 22, 25, 1094, 0, 22, 23, 24, 22, 25, 1094, 0, 23, 0, 26, 0, 27, 26, 27, 22, 26, 21, 29, 21, 29, 31, 0, 32, 31, 32, 16, 31, 22, 23, 24, 22, 25, 1094, 33, 33, 33, 33, 0, 35, 0, 36, 35, 36, 6, 35, 38, 37, 38, 1095, 39, 38, 38, 37, 1095, 38, 37, 41, 0, 42, 41, 42, 1094, 41, 44, 44, 0, 45, 45, 0, 46, 46, 0, 47, 47, 0, 48, 48, 0, 49, 50, 51, 49, 164, 0, 49, 50, 51, 49, 52, 164, 52, 52, 52, 0, 49, 50, 51, 49, 52, 164, 52, 52, 52, 0, 50, 0, 53, 54, 55, 53, 56, 52, 52, 52, 52, 0, 53, 54, 55, 53, 56, 59, 96, 59, 96, 0, 53, 54, 55, 53, 56, 59, 96, 59, 96, 0, 54, 0, 57, 0, 58, 57, 58, 53, 57, 60, 60, 0, 61, 61, 0, 62, 62, 0, 63, 63, 0, 64, 64, 0, 64, 65, 66, 64, 67, 70, 0, 64, 65, 66, 64, 67, 70, 0, 65, 0, 68, 0, 69, 68, 69, 64, 68, 70, 71, 72, 70, 73, 76, 0, 70, 71, 72, 70, 73, 76, 0, 71, 0, 74, 0, 75, 74, 75, 70, 74, 77, 76, 0, 78, 79, 80, 78, 81, 1094, 163, 0, 78, 79, 80, 78, 52, 81, 1094, 84, 141, 52, 84, 141, 52, 52, 0, 78, 79, 80, 78, 52, 81, 1094, 84, 141, 52, 84, 141, 52, 52, 0, 79, 0, 82, 0, 83, 82, 83, 78, 82, 53, 54, 55, 53, 56, 85, 52, 85, 52, 52, 52, 0, 53, 54, 55, 53, 56, 86, 52, 86, 52, 52, 52, 0, 53, 54, 55, 53, 56, 87, 52, 87, 52, 52, 52, 0, 53, 54, 55, 53, 56, 88, 52, 88, 52, 52, 52, 0, 53, 54, 55, 53, 56, 89, 52, 89, 52, 52, 52, 0, 90, 91, 92, 90, 93, 70, 52, 52, 52, 52, 0, 90, 91, 92, 90, 93, 70, 59, 96, 59, 96, 0, 90, 91, 92, 90, 93, 70, 59, 96, 59, 96, 0, 91, 0, 94, 0, 95, 94, 95, 90, 94, 97, 117, 97, 117, 0, 98, 98, 0, 99, 99, 0, 100, 100, 0, 100, 101, 102, 100, 103, 106, 0, 100, 101, 102, 100, 103, 106, 0, 101, 0, 104, 0, 105, 104, 105, 100, 104, 106, 107, 108, 106, 109, 112, 114, 0, 106, 107, 108, 106, 109, 112, 114, 0, 107, 0, 111, 110, 111, 110, 78, 79, 80, 78, 81, 1094, 0, 111, 113, 111, 113, 115, 0, 116, 115, 116, 106, 115, 118, 118, 0, 119, 119, 0, 120, 120, 0, 121, 121, 0, 121, 122, 123, 121, 124, 127, 0, 121, 122, 123, 121, 124, 127, 0, 122, 0, 125, 0, 126, 125, 126, 121, 125, 127, 128, 129, 127, 130, 137, 140, 0, 127, 128, 129, 127, 130, 137, 140, 0, 128, 0, 130, 131, 132, 130, 133, 136, 0, 130, 131, 132, 130, 133, 136, 0, 131, 0, 134, 0, 135, 134, 135, 130, 134, 78, 79, 80, 78, 81, 1094, 136, 0, 138, 0, 139, 138, 139, 127, 138, 78, 79, 80, 78, 81, 1094, 140, 0, 53, 54, 55, 53, 56, 142, 152, 52, 142, 152, 52, 52, 52, 0, 53, 54, 55, 53, 56, 143, 52, 143, 52, 52, 52, 0, 53, 54, 55, 53, 56, 144, 52, 144, 52, 52, 52, 0, 53, 54, 55, 53, 56, 145, 52, 145, 52, 52, 52, 0, 146, 147, 148, 146, 149, 106, 52, 52, 52, 52, 0, 146, 147, 148, 146, 149, 106, 59, 96, 59, 96, 0, 146, 147, 148, 146, 149, 106, 59, 96, 59, 96, 0, 147, 0, 150, 0, 151, 150, 151, 146, 150, 53, 54, 55, 53, 56, 153, 52, 153, 52, 52, 52, 0, 53, 54, 55, 53, 56, 154, 52, 154, 52, 52, 52, 0, 53, 54, 55, 53, 56, 155, 52, 155, 52, 52, 52, 0, 53, 54, 55, 53, 56, 156, 52, 156, 52, 52, 52, 0, 157, 158, 159, 157, 160, 127, 52, 52, 52, 52, 0, 157, 158, 159, 157, 160, 127, 59, 96, 59, 96, 0, 157, 158, 159, 157, 160, 127, 59, 96, 59, 96, 0, 158, 0, 161, 0, 162, 161, 162, 157, 161, 78, 79, 80, 78, 81, 1094, 163, 0, 165, 0, 166, 165, 166, 49, 165, 168, 168, 0, 169, 169, 0, 170, 170, 0, 171, 172, 173, 171, 182, 185, 190, 187, 187, 0, 171, 172, 173, 171, 174, 178, 190, 174, 174, 174, 0, 171, 172, 173, 171, 174, 178, 190, 174, 174, 174, 0, 172, 0, 175, 176, 177, 175, 182, 185, 188, 179, 1094, 174, 174, 189, 189, 0, 175, 176, 177, 175, 174, 178, 179, 1094, 174, 174, 174, 0, 175, 176, 177, 175, 174, 178, 179, 1094, 174, 174, 174, 0, 176, 0, 174, 174, 174, 174, 0, 180, 0, 181, 180, 181, 175, 180, 184, 183, 184, 183, 175, 176, 177, 175, 182, 185, 179, 1094, 187, 187, 0, 184, 186, 184, 186, 175, 176, 177, 175, 182, 185, 179, 1094, 187, 187, 187, 187, 0, 175, 176, 177, 175, 182, 185, 179, 1094, 187, 187, 0, 175, 176, 177, 175, 182, 185, 187, 179, 1094, 189, 189, 189, 189, 0, 191, 0, 192, 191, 192, 171, 191, 194, 231, 194, 231, 0, 195, 195, 0, 196, 196, 0, 197, 197, 0, 198, 198, 0, 199, 199, 0, 200, 200, 0, 201, 202, 203, 201, 217, 0, 201, 202, 203, 201, 19, 204, 28, 217, 204, 204, 204, 0, 201, 202, 203, 201, 19, 204, 28, 217, 204, 204, 204, 0, 202, 0, 205, 206, 207, 205, 208, 211, 204, 204, 204, 204, 0, 205, 206, 207, 205, 19, 204, 28, 208, 211, 204, 204, 204, 0, 205, 206, 207, 205, 19, 204, 28, 208, 211, 204, 204, 204, 0, 206, 0, 209, 0, 210, 209, 210, 205, 209, 211, 212, 213, 211, 214, 220, 222, 223, 227, 220, 230, 220, 220, 0, 211, 212, 213, 211, 214, 220, 222, 223, 227, 220, 230, 220, 220, 0, 212, 0, 216, 215, 216, 215, 201, 202, 203, 201, 217, 0, 218, 0, 219, 218, 219, 201, 218, 201, 202, 203, 201, 221, 217, 220, 220, 220, 220, 0, 201, 202, 203, 201, 217, 0, 220, 220, 220, 220, 0, 216, 225, 225, 225, 224, 216, 224, 226, 225, 225, 225, 224, 201, 202, 203, 201, 217, 221, 0, 228, 0, 229, 228, 229, 211, 228, 201, 202, 203, 201, 217, 230, 0, 232, 244, 232, 244, 0, 233, 233, 0, 234, 234, 0, 235, 235, 0, 236, 236, 0, 237, 237, 0, 238, 239, 240, 238, 241, 0, 238, 239, 240, 238, 19, 28, 241, 0, 238, 239, 240, 238, 19, 28, 241, 0, 239, 0, 242, 0, 243, 242, 243, 238, 242, 245, 245, 0, 246, 246, 0, 247, 247, 0, 248, 249, 250, 248, 339, 0, 248, 249, 250, 248, 251, 339, 251, 251, 251, 0, 248, 249, 250, 248, 251, 339, 251, 251, 251, 0, 249, 0, 252, 253, 254, 252, 263, 251, 251, 251, 251, 0, 252, 253, 254, 252, 255, 263, 269, 288, 295, 303, 309, 312, 320, 332, 255, 266, 255, 255, 0, 252, 253, 254, 252, 255, 263, 269, 288, 295, 303, 309, 312, 320, 332, 255, 266, 255, 255, 0, 253, 0, 256, 255, 255, 255, 255, 0, 257, 258, 259, 257, 260, 1094, 0, 257, 258, 259, 257, 251, 260, 1094, 251, 251, 251, 0, 257, 258, 259, 257, 251, 260, 1094, 251, 251, 251, 0, 258, 0, 261, 0, 262, 261, 262, 257, 261, 264, 0, 265, 264, 265, 252, 264, 267, 266, 0, 257, 258, 259, 257, 260, 1094, 268, 0, 257, 258, 259, 257, 260, 1094, 268, 0, 256, 270, 278, 285, 255, 255, 255, 255, 0, 256, 271, 274, 255, 255, 255, 255, 0, 256, 272, 255, 255, 255, 255, 0, 273, 274, 275, 255, 255, 255, 255, 0, 257, 258, 259, 257, 260, 1094, 268, 0, 273, 255, 255, 255, 255, 0, 256, 276, 255, 255, 255, 255, 0, 256, 277, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 279, 255, 255, 255, 255, 0, 256, 280, 255, 255, 255, 255, 0, 256, 281, 255, 255, 255, 255, 0, 256, 282, 255, 255, 255, 255, 0, 256, 284, 284, 255, 283, 255, 255, 0, 273, 255, 283, 255, 255, 0, 256, 255, 283, 255, 255, 0, 256, 286, 255, 255, 255, 255, 0, 256, 287, 255, 255, 255, 255, 0, 256, 276, 255, 255, 255, 255, 0, 256, 289, 255, 255, 255, 255, 0, 256, 290, 255, 255, 255, 255, 0, 256, 291, 293, 255, 255, 255, 255, 0, 256, 292, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 294, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 296, 300, 255, 255, 255, 255, 0, 256, 297, 298, 255, 255, 255, 255, 0, 256, 280, 255, 255, 255, 255, 0, 256, 299, 255, 255, 255, 255, 0, 273, 274, 274, 255, 255, 255, 255, 0, 256, 301, 255, 255, 255, 255, 0, 256, 287, 302, 292, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 304, 255, 255, 255, 255, 0, 256, 305, 255, 255, 255, 255, 0, 256, 306, 255, 255, 255, 255, 0, 256, 307, 255, 255, 255, 255, 0, 256, 308, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 310, 255, 255, 255, 255, 0, 256, 311, 255, 255, 255, 255, 0, 273, 274, 255, 255, 255, 255, 0, 256, 313, 319, 255, 255, 255, 255, 0, 256, 314, 255, 255, 255, 255, 0, 256, 315, 255, 255, 255, 255, 0, 273, 316, 255, 255, 255, 255, 0, 256, 317, 255, 255, 255, 255, 0, 256, 318, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 321, 326, 255, 255, 255, 255, 0, 256, 322, 255, 255, 255, 255, 0, 256, 323, 255, 255, 255, 255, 0, 256, 324, 255, 255, 255, 255, 0, 256, 325, 255, 255, 255, 255, 0, 256, 277, 274, 255, 255, 255, 255, 0, 256, 327, 255, 255, 255, 255, 0, 256, 328, 255, 255, 255, 255, 0, 256, 329, 255, 255, 255, 255, 0, 256, 330, 255, 255, 255, 255, 0, 256, 331, 255, 255, 255, 255, 0, 256, 274, 274, 255, 255, 255, 255, 0, 256, 333, 335, 255, 255, 255, 255, 0, 256, 334, 255, 255, 255, 255, 0, 256, 274, 255, 255, 255, 255, 0, 256, 336, 338, 255, 255, 255, 255, 0, 256, 337, 255, 255, 255, 255, 0, 273, 319, 274, 274, 255, 255, 255, 255, 0, 273, 274, 299, 274, 255, 255, 255, 255, 0, 340, 0, 341, 340, 341, 248, 340, 343, 354, 343, 354, 0, 344, 345, 346, 344, 347, 348, 344, 350, 347, 344, 349, 347, 347, 0, 344, 345, 346, 344, 347, 348, 344, 350, 1094, 347, 344, 349, 347, 347, 0, 344, 345, 346, 344, 347, 348, 344, 350, 1094, 347, 344, 349, 347, 347, 0, 345, 0, 344, 345, 346, 344, 347, 348, 344, 350, 1094, 347, 344, 353, 347, 347, 0, 349, 0, 344, 345, 346, 344, 347, 348, 344, 350, 1094, 347, 344, 349, 347, 347, 0, 351, 0, 352, 351, 352, 344, 351, 344, 345, 346, 344, 347, 348, 344, 350, 1094, 347, 344, 353, 347, 347, 0, 355, 400, 487, 355, 400, 487, 0, 356, 356, 0, 357, 357, 0, 358, 358, 0, 359, 360, 361, 359, 368, 397, 1094, 368, 368, 368, 0, 359, 360, 361, 359, 362, 368, 395, 397, 1094, 368, 368, 368, 0, 359, 360, 361, 359, 362, 368, 395, 397, 1094, 368, 368, 368, 0, 360, 0, 364, 363, 364, 363, 365, 366, 367, 365, 368, 381, 1094, 368, 368, 368, 0, 365, 366, 367, 365, 368, 381, 1094, 368, 368, 368, 0, 365, 366, 367, 365, 368, 381, 1094, 368, 368, 368, 0, 366, 0, 369, 370, 371, 369, 372, 1094, 375, 368, 368, 368, 368, 0, 369, 370, 371, 369, 368, 372, 1094, 375, 368, 368, 368, 0, 369, 370, 371, 369, 368, 372, 1094, 375, 368, 368, 368, 0, 370, 0, 373, 0, 374, 373, 374, 369, 373, 375, 376, 377, 375, 378, 384, 386, 387, 391, 384, 394, 384, 384, 0, 375, 376, 377, 375, 378, 384, 386, 387, 391, 384, 394, 384, 384, 0, 376, 0, 380, 379, 380, 379, 365, 366, 367, 365, 381, 1094, 0, 382, 0, 383, 382, 383, 365, 382, 365, 366, 367, 365, 385, 381, 1094, 384, 384, 384, 384, 0, 365, 366, 367, 365, 381, 1094, 0, 384, 384, 384, 384, 0, 380, 389, 389, 389, 388, 380, 388, 390, 389, 389, 389, 388, 365, 366, 367, 365, 381, 1094, 385, 0, 392, 0, 393, 392, 393, 375, 392, 365, 366, 367, 365, 381, 1094, 394, 0, 364, 396, 364, 396, 398, 0, 399, 398, 399, 359, 398, 401, 401, 0, 402, 402, 0, 403, 404, 405, 403, 484, 0, 403, 404, 405, 403, 406, 467, 484, 428, 467, 467, 467, 0, 403, 404, 405, 403, 406, 467, 484, 428, 467, 467, 467, 0, 404, 0, 407, 0, 408, 409, 410, 408, 425, 1094, 407, 0, 408, 409, 410, 408, 406, 411, 425, 1094, 428, 411, 411, 411, 0, 408, 409, 410, 408, 406, 411, 425, 1094, 428, 411, 411, 411, 0, 409, 0, 412, 413, 414, 412, 461, 1094, 411, 411, 411, 411, 0, 412, 413, 414, 412, 406, 415, 461, 1094, 428, 411, 464, 411, 411, 0, 412, 413, 414, 412, 406, 415, 461, 1094, 428, 411, 464, 411, 411, 0, 413, 0, 416, 417, 418, 416, 419, 1094, 411, 411, 411, 411, 0, 416, 417, 418, 416, 406, 415, 419, 1094, 428, 411, 422, 411, 411, 0, 416, 417, 418, 416, 406, 415, 419, 1094, 428, 411, 422, 411, 411, 0, 417, 0, 420, 0, 421, 420, 421, 416, 420, 423, 422, 0, 424, 0, 408, 409, 410, 408, 425, 1094, 424, 0, 426, 0, 427, 426, 427, 408, 426, 429, 0, 430, 431, 432, 430, 458, 429, 0, 430, 431, 432, 430, 433, 458, 433, 433, 433, 0, 430, 431, 432, 430, 433, 458, 433, 433, 433, 0, 431, 0, 434, 435, 436, 434, 454, 433, 433, 433, 433, 0, 434, 435, 436, 434, 437, 454, 448, 457, 448, 448, 0, 434, 435, 436, 434, 437, 454, 448, 457, 448, 448, 0, 435, 0, 438, 439, 440, 438, 445, 441, 449, 448, 447, 448, 448, 0, 438, 439, 440, 438, 441, 444, 0, 438, 439, 440, 438, 441, 444, 0, 439, 0, 442, 0, 443, 442, 443, 438, 442, 445, 444, 0, 408, 409, 410, 408, 425, 1094, 446, 0, 408, 409, 410, 408, 425, 1094, 446, 0, 445, 448, 447, 448, 448, 0, 445, 448, 448, 448, 448, 0, 445, 450, 448, 448, 448, 448, 0, 445, 451, 448, 448, 448, 448, 0, 445, 452, 448, 448, 448, 448, 0, 445, 448, 453, 448, 448, 0, 445, 448, 453, 448, 448, 0, 455, 0, 456, 455, 456, 434, 455, 445, 457, 0, 459, 0, 460, 459, 460, 430, 459, 462, 0, 463, 462, 463, 412, 462, 465, 464, 0, 466, 0, 408, 409, 410, 408, 425, 1094, 466, 0, 468, 469, 470, 468, 481, 1094, 467, 467, 467, 467, 0, 468, 469, 470, 468, 406, 471, 481, 1094, 428, 411, 464, 411, 411, 0, 468, 469, 470, 468, 406, 471, 481, 1094, 428, 411, 464, 411, 411, 0, 469, 0, 472, 473, 474, 472, 475, 1094, 411, 411, 411, 411, 0, 472, 473, 474, 472, 406, 415, 475, 1094, 428, 411, 478, 411, 411, 0, 472, 473, 474, 472, 406, 415, 475, 1094, 428, 411, 478, 411, 411, 0, 473, 0, 476, 0, 477, 476, 477, 472, 476, 479, 478, 0, 480, 0, 408, 409, 410, 408, 425, 1094, 480, 0, 482, 0, 483, 482, 483, 468, 482, 485, 0, 486, 485, 486, 403, 485, 488, 488, 0, 489, 489, 0, 490, 490, 0, 491, 491, 0, 492, 493, 494, 492, 614, 0, 492, 493, 494, 492, 495, 614, 495, 495, 495, 0, 492, 493, 494, 492, 495, 614, 495, 495, 495, 0, 493, 0, 496, 497, 498, 496, 611, 495, 495, 495, 495, 0, 496, 497, 498, 496, 499, 611, 517, 0, 496, 497, 498, 496, 499, 611, 517, 0, 497, 0, 501, 500, 501, 500, 501, 502, 503, 501, 504, 507, 0, 501, 502, 503, 501, 504, 507, 0, 502, 0, 505, 0, 506, 505, 506, 501, 505, 507, 508, 509, 507, 510, 608, 0, 507, 508, 509, 507, 510, 608, 0, 508, 0, 511, 512, 513, 511, 514, 1094, 536, 0, 511, 512, 513, 511, 499, 514, 1094, 517, 0, 511, 512, 513, 511, 499, 514, 1094, 517, 0, 512, 0, 515, 0, 516, 515, 516, 511, 515, 518, 0, 519, 0, 520, 0, 521, 0, 521, 522, 523, 521, 524, 527, 0, 521, 522, 523, 521, 524, 527, 0, 522, 0, 525, 0, 526, 525, 526, 521, 525, 527, 528, 529, 527, 530, 605, 0, 527, 528, 529, 527, 530, 605, 0, 528, 0, 531, 0, 537, 554, 561, 569, 575, 578, 586, 598, 532, 0, 533, 532, 0, 535, 534, 0, 535, 534, 0, 536, 0, 511, 512, 513, 511, 514, 1094, 0, 538, 545, 551, 0, 539, 541, 0, 540, 0, 533, 541, 542, 0, 533, 0, 543, 0, 544, 0, 541, 0, 546, 0, 547, 0, 548, 0, 549, 0, 550, 550, 532, 0, 532, 0, 552, 0, 553, 0, 543, 0, 555, 0, 556, 0, 557, 559, 0, 558, 0, 541, 0, 560, 0, 541, 0, 562, 566, 0, 563, 564, 0, 547, 0, 565, 0, 533, 541, 541, 0, 567, 0, 553, 568, 558, 0, 541, 0, 570, 0, 571, 0, 572, 0, 573, 0, 574, 0, 541, 0, 576, 0, 577, 0, 533, 541, 0, 579, 585, 0, 580, 0, 581, 0, 533, 582, 0, 583, 0, 584, 0, 541, 0, 541, 0, 587, 592, 0, 588, 0, 589, 0, 590, 0, 591, 0, 544, 541, 0, 593, 0, 594, 0, 595, 0, 596, 0, 597, 0, 541, 541, 0, 599, 601, 0, 600, 0, 541, 0, 602, 604, 0, 603, 0, 533, 585, 541, 541, 0, 533, 541, 565, 541, 0, 606, 0, 607, 606, 607, 527, 606, 609, 0, 610, 609, 610, 507, 609, 612, 0, 613, 612, 613, 496, 612, 615, 0, 616, 615, 616, 492, 615, 618, 652, 684, 618, 652, 684, 0, 619, 619, 0, 620, 620, 0, 621, 621, 0, 622, 623, 624, 622, 649, 0, 622, 623, 624, 622, 625, 649, 625, 625, 625, 0, 622, 623, 624, 622, 625, 649, 625, 625, 625, 0, 623, 0, 626, 627, 628, 626, 629, 632, 625, 625, 625, 625, 0, 626, 627, 628, 626, 629, 632, 0, 626, 627, 628, 626, 629, 632, 0, 627, 0, 630, 0, 631, 630, 631, 626, 630, 632, 633, 634, 632, 635, 644, 646, 0, 632, 633, 634, 632, 635, 644, 646, 0, 633, 0, 637, 636, 637, 636, 638, 639, 640, 638, 641, 1094, 0, 638, 639, 640, 638, 625, 641, 1094, 625, 625, 625, 0, 638, 639, 640, 638, 625, 641, 1094, 625, 625, 625, 0, 639, 0, 642, 0, 643, 642, 643, 638, 642, 637, 645, 637, 645, 647, 0, 648, 647, 648, 632, 647, 650, 0, 651, 650, 651, 622, 650, 653, 653, 0, 654, 654, 0, 655, 655, 0, 656, 656, 0, 657, 658, 659, 657, 681, 0, 657, 658, 659, 657, 660, 681, 660, 660, 660, 0, 657, 658, 659, 657, 660, 681, 660, 660, 660, 0, 658, 0, 661, 662, 663, 661, 677, 660, 660, 660, 660, 0, 661, 662, 663, 661, 664, 677, 680, 0, 661, 662, 663, 661, 664, 677, 680, 0, 662, 0, 664, 665, 666, 664, 667, 670, 0, 664, 665, 666, 664, 667, 670, 0, 665, 0, 668, 0, 669, 668, 669, 664, 668, 671, 672, 673, 671, 674, 1094, 670, 0, 671, 672, 673, 671, 660, 674, 1094, 660, 660, 660, 0, 671, 672, 673, 671, 660, 674, 1094, 660, 660, 660, 0, 672, 0, 675, 0, 676, 675, 676, 671, 675, 678, 0, 679, 678, 679, 661, 678, 671, 672, 673, 671, 674, 1094, 680, 0, 682, 0, 683, 682, 683, 657, 682, 685, 685, 0, 686, 686, 0, 687, 687, 0, 688, 688, 0, 689, 689, 0, 690, 691, 692, 690, 722, 0, 690, 691, 692, 690, 693, 722, 693, 693, 693, 0, 690, 691, 692, 690, 693, 722, 693, 693, 693, 0, 691, 0, 694, 695, 696, 694, 702, 693, 693, 693, 693, 0, 694, 695, 696, 694, 697, 700, 702, 705, 709, 712, 705, 709, 0, 694, 695, 696, 694, 697, 700, 702, 705, 709, 712, 705, 709, 0, 695, 0, 699, 698, 699, 698, 365, 366, 367, 365, 381, 1094, 0, 699, 701, 699, 701, 703, 0, 704, 703, 704, 694, 703, 706, 706, 0, 707, 707, 0, 708, 708, 0, 22, 22, 0, 710, 710, 0, 711, 711, 0, 22, 22, 0, 713, 713, 0, 714, 714, 0, 715, 715, 0, 716, 0, 716, 717, 718, 716, 719, 705, 709, 705, 709, 0, 716, 717, 718, 716, 719, 705, 709, 705, 709, 0, 717, 0, 720, 0, 721, 720, 721, 716, 720, 723, 0, 724, 723, 724, 690, 723, 726, 726, 0, 727, 727, 0, 728, 728, 0, 729, 729, 0, 730, 730, 0, 731, 731, 0, 732, 733, 734, 732, 736, 0, 732, 733, 734, 732, 735, 736, 735, 735, 735, 0, 732, 733, 734, 732, 735, 736, 735, 735, 735, 0, 733, 0, 22, 23, 24, 22, 25, 1094, 735, 735, 735, 735, 0, 737, 0, 738, 737, 738, 732, 737, 740, 740, 0, 741, 741, 0, 742, 742, 0, 743, 743, 0, 744, 744, 0, 745, 745, 0, 746, 747, 748, 746, 749, 0, 746, 747, 748, 746, 368, 749, 368, 368, 368, 0, 746, 747, 748, 746, 368, 749, 368, 368, 368, 0, 747, 0, 750, 0, 751, 750, 751, 746, 750, 753, 753, 0, 754, 754, 0, 755, 755, 0, 756, 757, 758, 756, 759, 0, 756, 757, 758, 756, 759, 762, 768, 916, 762, 768, 916, 0, 756, 757, 758, 756, 759, 762, 768, 916, 762, 768, 916, 0, 757, 0, 760, 0, 761, 760, 761, 756, 760, 763, 763, 0, 764, 764, 0, 765, 765, 0, 766, 766, 0, 767, 767, 0, 744, 744, 0, 769, 769, 0, 770, 770, 0, 771, 771, 0, 772, 772, 0, 773, 773, 0, 773, 774, 775, 773, 776, 902, 783, 776, 776, 776, 0, 773, 774, 775, 773, 776, 902, 783, 776, 776, 776, 0, 774, 0, 777, 778, 779, 777, 780, 783, 896, 776, 776, 776, 776, 0, 777, 778, 779, 777, 776, 780, 783, 896, 776, 776, 776, 0, 777, 778, 779, 777, 776, 780, 783, 896, 776, 776, 776, 0, 778, 0, 781, 0, 782, 781, 782, 777, 781, 1096, 1097, 784, 1096, 785, 1096, 790, 790, 0, 1097, 0, 786, 0, 787, 786, 787, 1096, 786, 789, 789, 0, 22, 22, 0, 791, 791, 0, 792, 792, 0, 793, 793, 0, 794, 794, 0, 795, 796, 797, 795, 893, 0, 795, 796, 797, 795, 798, 893, 798, 798, 798, 0, 795, 796, 797, 795, 798, 893, 798, 798, 798, 0, 796, 0, 799, 800, 801, 799, 890, 798, 798, 798, 798, 0, 799, 800, 801, 799, 802, 820, 853, 822, 890, 843, 827, 842, 842, 0, 799, 800, 801, 799, 802, 820, 853, 822, 890, 843, 827, 842, 842, 0, 800, 0, 804, 803, 804, 803, 805, 806, 807, 805, 808, 811, 0, 805, 806, 807, 805, 808, 811, 0, 805, 806, 807, 805, 808, 811, 0, 806, 0, 809, 0, 810, 809, 810, 805, 809, 811, 812, 813, 811, 814, 848, 850, 0, 811, 812, 813, 811, 814, 848, 850, 0, 812, 0, 816, 815, 816, 815, 817, 818, 819, 817, 824, 1096, 0, 817, 818, 819, 817, 802, 820, 822, 824, 1096, 843, 827, 842, 842, 0, 817, 818, 819, 817, 802, 820, 822, 824, 1096, 843, 827, 842, 842, 0, 818, 0, 804, 821, 804, 821, 823, 0, 805, 806, 807, 805, 808, 811, 823, 0, 825, 0, 826, 825, 826, 817, 825, 828, 829, 830, 828, 839, 811, 827, 0, 828, 829, 830, 828, 831, 839, 811, 0, 828, 829, 830, 828, 831, 839, 811, 0, 829, 0, 832, 833, 834, 832, 835, 0, 832, 833, 834, 832, 835, 838, 0, 832, 833, 834, 832, 835, 838, 0, 833, 0, 836, 0, 837, 836, 837, 832, 836, 805, 806, 807, 805, 808, 811, 838, 0, 840, 0, 841, 840, 841, 828, 840, 805, 806, 807, 805, 808, 811, 842, 842, 842, 842, 0, 805, 806, 807, 805, 808, 811, 842, 844, 842, 842, 842, 0, 805, 806, 807, 805, 808, 811, 842, 845, 842, 842, 842, 0, 805, 806, 807, 805, 808, 811, 842, 846, 842, 842, 842, 0, 805, 806, 807, 805, 808, 811, 842, 847, 842, 842, 842, 0, 805, 806, 807, 805, 808, 811, 842, 842, 842, 842, 0, 816, 849, 816, 849, 851, 0, 852, 851, 852, 811, 851, 854, 854, 854, 854, 0, 855, 856, 857, 855, 873, 858, 861, 854, 854, 854, 854, 0, 855, 856, 857, 855, 854, 858, 861, 854, 854, 854, 0, 855, 856, 857, 855, 854, 858, 861, 854, 854, 854, 0, 856, 0, 859, 0, 860, 859, 860, 855, 859, 861, 862, 863, 861, 864, 879, 881, 882, 886, 879, 889, 879, 879, 0, 861, 862, 863, 861, 864, 879, 881, 882, 886, 879, 889, 879, 879, 0, 862, 0, 866, 865, 866, 865, 867, 868, 869, 867, 873, 870, 0, 867, 868, 869, 867, 854, 870, 854, 854, 854, 0, 867, 868, 869, 867, 854, 870, 854, 854, 854, 0, 868, 0, 871, 0, 872, 871, 872, 867, 871, 873, 874, 875, 873, 802, 820, 822, 876, 843, 827, 842, 842, 0, 873, 874, 875, 873, 802, 820, 822, 876, 843, 827, 842, 842, 0, 874, 0, 877, 0, 878, 877, 878, 873, 877, 867, 868, 869, 867, 873, 880, 870, 879, 879, 879, 879, 0, 867, 868, 869, 867, 873, 870, 0, 879, 879, 879, 879, 0, 866, 884, 884, 884, 883, 866, 883, 885, 884, 884, 884, 883, 867, 868, 869, 867, 873, 870, 880, 0, 887, 0, 888, 887, 888, 861, 887, 867, 868, 869, 867, 873, 870, 889, 0, 891, 0, 892, 891, 892, 799, 891, 894, 0, 895, 894, 895, 795, 894, 896, 897, 898, 896, 899, 905, 907, 908, 912, 905, 915, 905, 905, 0, 896, 897, 898, 896, 899, 905, 907, 908, 912, 905, 915, 905, 905, 0, 897, 0, 901, 900, 901, 900, 773, 774, 775, 773, 902, 783, 0, 903, 0, 904, 903, 904, 773, 903, 773, 774, 775, 773, 906, 902, 783, 905, 905, 905, 905, 0, 773, 774, 775, 773, 902, 783, 0, 905, 905, 905, 905, 0, 901, 910, 910, 910, 909, 901, 909, 911, 910, 910, 910, 909, 773, 774, 775, 773, 902, 783, 906, 0, 913, 0, 914, 913, 914, 896, 913, 773, 774, 775, 773, 902, 783, 915, 0, 917, 917, 0, 918, 918, 0, 919, 919, 0, 920, 920, 0, 921, 922, 923, 921, 985, 0, 921, 922, 923, 921, 924, 985, 924, 924, 924, 0, 921, 922, 923, 921, 924, 985, 924, 924, 924, 0, 922, 0, 925, 926, 927, 925, 959, 1094, 962, 924, 924, 924, 924, 0, 925, 926, 927, 925, 924, 928, 959, 1094, 962, 924, 924, 924, 0, 925, 926, 927, 925, 924, 928, 959, 1094, 962, 924, 924, 924, 0, 926, 0, 929, 929, 929, 929, 0, 930, 931, 932, 930, 22, 933, 936, 929, 929, 929, 929, 0, 930, 931, 932, 930, 929, 933, 936, 929, 929, 929, 0, 930, 931, 932, 930, 929, 933, 936, 929, 929, 929, 0, 931, 0, 934, 0, 935, 934, 935, 930, 934, 936, 937, 938, 936, 939, 948, 950, 951, 955, 948, 958, 948, 948, 0, 936, 937, 938, 936, 939, 948, 950, 951, 955, 948, 958, 948, 948, 0, 937, 0, 941, 940, 941, 940, 942, 943, 944, 942, 22, 945, 0, 942, 943, 944, 942, 929, 945, 929, 929, 929, 0, 942, 943, 944, 942, 929, 945, 929, 929, 929, 0, 943, 0, 946, 0, 947, 946, 947, 942, 946, 942, 943, 944, 942, 22, 949, 945, 948, 948, 948, 948, 0, 942, 943, 944, 942, 22, 945, 0, 948, 948, 948, 948, 0, 941, 953, 953, 953, 952, 941, 952, 954, 953, 953, 953, 952, 942, 943, 944, 942, 22, 945, 949, 0, 956, 0, 957, 956, 957, 936, 956, 942, 943, 944, 942, 22, 945, 958, 0, 960, 0, 961, 960, 961, 925, 960, 962, 963, 964, 962, 965, 974, 976, 977, 981, 974, 984, 974, 974, 0, 962, 963, 964, 962, 965, 974, 976, 977, 981, 974, 984, 974, 974, 0, 963, 0, 967, 966, 967, 966, 968, 969, 970, 968, 971, 1094, 0, 968, 969, 970, 968, 924, 928, 971, 1094, 924, 924, 924, 0, 968, 969, 970, 968, 924, 928, 971, 1094, 924, 924, 924, 0, 969, 0, 972, 0, 973, 972, 973, 968, 972, 968, 969, 970, 968, 975, 971, 1094, 974, 974, 974, 974, 0, 968, 969, 970, 968, 971, 1094, 0, 974, 974, 974, 974, 0, 967, 979, 979, 979, 978, 967, 978, 980, 979, 979, 979, 978, 968, 969, 970, 968, 971, 1094, 975, 0, 982, 0, 983, 982, 983, 962, 982, 968, 969, 970, 968, 971, 1094, 984, 0, 986, 0, 987, 986, 987, 921, 986, 989, 989, 0, 990, 990, 0, 991, 991, 0, 992, 992, 0, 993, 994, 995, 993, 1091, 0, 993, 994, 995, 993, 996, 1091, 996, 996, 996, 0, 993, 994, 995, 993, 996, 1091, 996, 996, 996, 0, 994, 0, 997, 998, 999, 997, 1088, 996, 996, 996, 996, 0, 997, 998, 999, 997, 1000, 1018, 1051, 1020, 1088, 1041, 1025, 1040, 1040, 0, 997, 998, 999, 997, 1000, 1018, 1051, 1020, 1088, 1041, 1025, 1040, 1040, 0, 998, 0, 1002, 1001, 1002, 1001, 1003, 1004, 1005, 1003, 1006, 1009, 0, 1003, 1004, 1005, 1003, 1006, 1009, 0, 1003, 1004, 1005, 1003, 1006, 1009, 0, 1004, 0, 1007, 0, 1008, 1007, 1008, 1003, 1007, 1009, 1010, 1011, 1009, 1012, 1046, 1048, 0, 1009, 1010, 1011, 1009, 1012, 1046, 1048, 0, 1010, 0, 1014, 1013, 1014, 1013, 1015, 1016, 1017, 1015, 1022, 1094, 0, 1015, 1016, 1017, 1015, 1000, 1018, 1020, 1022, 1094, 1041, 1025, 1040, 1040, 0, 1015, 1016, 1017, 1015, 1000, 1018, 1020, 1022, 1094, 1041, 1025, 1040, 1040, 0, 1016, 0, 1002, 1019, 1002, 1019, 1021, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1021, 0, 1023, 0, 1024, 1023, 1024, 1015, 1023, 1026, 1027, 1028, 1026, 1037, 1009, 1025, 0, 1026, 1027, 1028, 1026, 1029, 1037, 1009, 0, 1026, 1027, 1028, 1026, 1029, 1037, 1009, 0, 1027, 0, 1030, 1031, 1032, 1030, 1033, 0, 1030, 1031, 1032, 1030, 1033, 1036, 0, 1030, 1031, 1032, 1030, 1033, 1036, 0, 1031, 0, 1034, 0, 1035, 1034, 1035, 1030, 1034, 1003, 1004, 1005, 1003, 1006, 1009, 1036, 0, 1038, 0, 1039, 1038, 1039, 1026, 1038, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1040, 1040, 1040, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1042, 1040, 1040, 1040, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1043, 1040, 1040, 1040, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1044, 1040, 1040, 1040, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1045, 1040, 1040, 1040, 0, 1003, 1004, 1005, 1003, 1006, 1009, 1040, 1040, 1040, 1040, 0, 1014, 1047, 1014, 1047, 1049, 0, 1050, 1049, 1050, 1009, 1049, 1052, 1052, 1052, 1052, 0, 1053, 1054, 1055, 1053, 1071, 1056, 1059, 1052, 1052, 1052, 1052, 0, 1053, 1054, 1055, 1053, 1052, 1056, 1059, 1052, 1052, 1052, 0, 1053, 1054, 1055, 1053, 1052, 1056, 1059, 1052, 1052, 1052, 0, 1054, 0, 1057, 0, 1058, 1057, 1058, 1053, 1057, 1059, 1060, 1061, 1059, 1062, 1077, 1079, 1080, 1084, 1077, 1087, 1077, 1077, 0, 1059, 1060, 1061, 1059, 1062, 1077, 1079, 1080, 1084, 1077, 1087, 1077, 1077, 0, 1060, 0, 1064, 1063, 1064, 1063, 1065, 1066, 1067, 1065, 1071, 1068, 0, 1065, 1066, 1067, 1065, 1052, 1068, 1052, 1052, 1052, 0, 1065, 1066, 1067, 1065, 1052, 1068, 1052, 1052, 1052, 0, 1066, 0, 1069, 0, 1070, 1069, 1070, 1065, 1069, 1071, 1072, 1073, 1071, 1000, 1018, 1020, 1074, 1041, 1025, 1040, 1040, 0, 1071, 1072, 1073, 1071, 1000, 1018, 1020, 1074, 1041, 1025, 1040, 1040, 0, 1072, 0, 1075, 0, 1076, 1075, 1076, 1071, 1075, 1065, 1066, 1067, 1065, 1071, 1078, 1068, 1077, 1077, 1077, 1077, 0, 1065, 1066, 1067, 1065, 1071, 1068, 0, 1077, 1077, 1077, 1077, 0, 1064, 1082, 1082, 1082, 1081, 1064, 1081, 1083, 1082, 1082, 1082, 1081, 1065, 1066, 1067, 1065, 1071, 1068, 1078, 0, 1085, 0, 1086, 1085, 1086, 1059, 1085, 1065, 1066, 1067, 1065, 1071, 1068, 1087, 0, 1089, 0, 1090, 1089, 1090, 997, 1089, 1092, 0, 1093, 1092, 1093, 993, 1092, 1094, 1095, 1, 1094, 2, 37, 40, 43, 167, 193, 342, 617, 725, 739, 752, 788, 988, 43, 167, 193, 342, 617, 725, 739, 752, 788, 988, 0, 1094, 1095, 1, 1094, 2, 37, 40, 43, 167, 193, 342, 617, 725, 739, 752, 788, 988, 43, 167, 193, 342, 617, 725, 739, 752, 788, 988, 0, 1096, 1097, 784, 1096, 2, 37, 785, 1096, 43, 167, 193, 342, 617, 725, 739, 752, 788, 790, 43, 167, 193, 342, 617, 725, 739, 752, 788, 790, 0, 1096, 1097, 784, 1096, 2, 37, 785, 1096, 43, 167, 193, 342, 617, 725, 739, 752, 788, 790, 43, 167, 193, 342, 617, 725, 739, 752, 788, 790, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 0 }; static const short _sas_commands_cond_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 29, 29, 29, 29, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 9, 9, 9, 9, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 13, 13, 13, 13, 13, 13, 41, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 27, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 77, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 123, 123, 123, 123, 123, 123, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 177, 3, 0, 0, 0, 0, 0, 0, 0, 0, 187, 187, 187, 187, 187, 187, 3, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13, 13, 13, 13, 13, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 29, 29, 29, 29, 29, 29, 29, 29, 29, 0, 0, 29, 29, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 27, 27, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 71, 21, 23, 0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 29, 29, 29, 29, 29, 29, 29, 29, 0, 0, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 29, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 27, 27, 27, 27, 27, 27, 41, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 77, 77, 77, 77, 77, 77, 139, 77, 77, 0, 0, 0, 86, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 3, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 41, 0, 86, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 41, 0, 0, 0, 86, 0, 3, 0, 0, 0, 86, 0, 41, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 0, 41, 27, 27, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 27, 0, 41, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 25, 77, 25, 139, 77, 77, 0, 0, 0, 29, 29, 29, 29, 98, 29, 29, 29, 29, 98, 29, 151, 98, 98, 0, 41, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 27, 0, 47, 27, 27, 0, 0, 0, 0, 0, 0, 0, 0, 29, 29, 29, 29, 98, 29, 29, 29, 29, 98, 29, 155, 98, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 71, 21, 23, 0, 9, 9, 9, 9, 65, 9, 9, 65, 65, 65, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 29, 95, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 104, 0, 39, 104, 104, 104, 0, 25, 25, 25, 25, 80, 147, 25, 80, 147, 147, 147, 0, 0, 0, 41, 0, 37, 37, 37, 37, 37, 37, 3, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 41, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 139, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 41, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 139, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 3, 0, 41, 0, 172, 172, 172, 172, 172, 172, 3, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 7, 7, 7, 7, 7, 3, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 41, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 139, 77, 77, 0, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 3, 0, 13, 13, 13, 13, 13, 13, 131, 0, 0, 0, 0, 0, 0, 0, 3, 0, 182, 0, 3, 0, 0, 0, 83, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, 0, 0, 0, 83, 0, 41, 0, 0, 0, 111, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 3, 0, 41, 0, 107, 107, 107, 107, 107, 107, 3, 0, 92, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 41, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 139, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 41, 27, 27, 0, 25, 25, 25, 25, 25, 77, 25, 25, 25, 77, 139, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 3, 0, 41, 0, 167, 167, 167, 167, 167, 167, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 29, 29, 29, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 3, 0, 0, 41, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 41, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 59, 59, 59, 59, 59, 59, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 92, 92, 92, 92, 92, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 25, 25, 25, 25, 25, 139, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 115, 115, 115, 115, 115, 115, 3, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119, 119, 119, 119, 119, 119, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 29, 29, 29, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 9, 9, 9, 9, 9, 9, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 29, 29, 29, 29, 29, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 86, 86, 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 71, 21, 23, 0, 127, 127, 127, 127, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 62, 62, 62, 62, 62, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 68, 17, 19, 0, 41, 0, 31, 31, 31, 31, 31, 31, 3, 0, 0, 0, 0, 0, 0, 0, 0, 33, 33, 33, 33, 33, 33, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 159, 0, 25, 25, 25, 25, 25, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35, 3, 0, 0, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 163, 163, 163, 163, 163, 163, 0, 0, 0, 0, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 95, 29, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 29, 95, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 25, 77, 77, 77, 0, 0, 0, 27, 27, 27, 27, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 95, 29, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 27, 0, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 29, 95, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 86, 86, 86, 86, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 71, 21, 23, 0, 127, 127, 127, 127, 127, 127, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 71, 21, 23, 0, 62, 62, 62, 62, 62, 62, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 68, 17, 19, 0, 41, 0, 31, 31, 31, 31, 31, 31, 3, 0, 0, 0, 0, 0, 0, 0, 0, 33, 33, 33, 33, 33, 33, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 159, 0, 25, 25, 25, 25, 25, 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35, 35, 35, 35, 35, 35, 3, 0, 0, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 101, 101, 101, 101, 101, 101, 0, 0, 0, 0, 0, 0, 163, 163, 163, 163, 163, 163, 0, 0, 0, 0, 0, 68, 17, 19, 0, 0, 0, 0, 0, 0, 0, 0, 27, 27, 27, 27, 0, 89, 89, 89, 89, 89, 89, 89, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 0, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 44, 1, 1, 0, 44, 41, 44, 44, 0, 25, 25, 25, 25, 74, 143, 74, 74, 25, 143, 139, 143, 143, 0, 0, 0, 71, 21, 23, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 27, 0, 27, 27, 27, 0, 25, 25, 25, 25, 77, 25, 77, 77, 77, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 139, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 95, 95, 95, 95, 29, 95, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 0, 27, 27, 27, 27, 0, 68, 135, 135, 135, 17, 19, 0, 19, 5, 5, 5, 0, 56, 56, 56, 56, 56, 56, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 11, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 0, 25, 0 }; static const short _sas_commands_eof_trans[] = { 5747, 5748, 5749, 5750, 5751, 5752, 5753, 5754, 5755, 5756, 5757, 5758, 5759, 5760, 5761, 5762, 5763, 5764, 5765, 5766, 5767, 5768, 5769, 5770, 5771, 5772, 5773, 5774, 5775, 5776, 5777, 5778, 5779, 5780, 5781, 5782, 5783, 5784, 5785, 5786, 5787, 5788, 5789, 5790, 5791, 5792, 5793, 5794, 5795, 5796, 5797, 5798, 5799, 5800, 5801, 5802, 5803, 5804, 5805, 5806, 5807, 5808, 5809, 5810, 5811, 5812, 5813, 5814, 5815, 5816, 5817, 5818, 5819, 5820, 5821, 5822, 5823, 5824, 5825, 5826, 5827, 5828, 5829, 5830, 5831, 5832, 5833, 5834, 5835, 5836, 5837, 5838, 5839, 5840, 5841, 5842, 5843, 5844, 5845, 5846, 5847, 5848, 5849, 5850, 5851, 5852, 5853, 5854, 5855, 5856, 5857, 5858, 5859, 5860, 5861, 5862, 5863, 5864, 5865, 5866, 5867, 5868, 5869, 5870, 5871, 5872, 5873, 5874, 5875, 5876, 5877, 5878, 5879, 5880, 5881, 5882, 5883, 5884, 5885, 5886, 5887, 5888, 5889, 5890, 5891, 5892, 5893, 5894, 5895, 5896, 5897, 5898, 5899, 5900, 5901, 5902, 5903, 5904, 5905, 5906, 5907, 5908, 5909, 5910, 5911, 5912, 5913, 5914, 5915, 5916, 5917, 5918, 5919, 5920, 5921, 5922, 5923, 5924, 5925, 5926, 5927, 5928, 5929, 5930, 5931, 5932, 5933, 5934, 5935, 5936, 5937, 5938, 5939, 5940, 5941, 5942, 5943, 5944, 5945, 5946, 5947, 5948, 5949, 5950, 5951, 5952, 5953, 5954, 5955, 5956, 5957, 5958, 5959, 5960, 5961, 5962, 5963, 5964, 5965, 5966, 5967, 5968, 5969, 5970, 5971, 5972, 5973, 5974, 5975, 5976, 5977, 5978, 5979, 5980, 5981, 5982, 5983, 5984, 5985, 5986, 5987, 5988, 5989, 5990, 5991, 5992, 5993, 5994, 5995, 5996, 5997, 5998, 5999, 6000, 6001, 6002, 6003, 6004, 6005, 6006, 6007, 6008, 6009, 6010, 6011, 6012, 6013, 6014, 6015, 6016, 6017, 6018, 6019, 6020, 6021, 6022, 6023, 6024, 6025, 6026, 6027, 6028, 6029, 6030, 6031, 6032, 6033, 6034, 6035, 6036, 6037, 6038, 6039, 6040, 6041, 6042, 6043, 6044, 6045, 6046, 6047, 6048, 6049, 6050, 6051, 6052, 6053, 6054, 6055, 6056, 6057, 6058, 6059, 6060, 6061, 6062, 6063, 6064, 6065, 6066, 6067, 6068, 6069, 6070, 6071, 6072, 6073, 6074, 6075, 6076, 6077, 6078, 6079, 6080, 6081, 6082, 6083, 6084, 6085, 6086, 6087, 6088, 6089, 6090, 6091, 6092, 6093, 6094, 6095, 6096, 6097, 6098, 6099, 6100, 6101, 6102, 6103, 6104, 6105, 6106, 6107, 6108, 6109, 6110, 6111, 6112, 6113, 6114, 6115, 6116, 6117, 6118, 6119, 6120, 6121, 6122, 6123, 6124, 6125, 6126, 6127, 6128, 6129, 6130, 6131, 6132, 6133, 6134, 6135, 6136, 6137, 6138, 6139, 6140, 6141, 6142, 6143, 6144, 6145, 6146, 6147, 6148, 6149, 6150, 6151, 6152, 6153, 6154, 6155, 6156, 6157, 6158, 6159, 6160, 6161, 6162, 6163, 6164, 6165, 6166, 6167, 6168, 6169, 6170, 6171, 6172, 6173, 6174, 6175, 6176, 6177, 6178, 6179, 6180, 6181, 6182, 6183, 6184, 6185, 6186, 6187, 6188, 6189, 6190, 6191, 6192, 6193, 6194, 6195, 6196, 6197, 6198, 6199, 6200, 6201, 6202, 6203, 6204, 6205, 6206, 6207, 6208, 6209, 6210, 6211, 6212, 6213, 6214, 6215, 6216, 6217, 6218, 6219, 6220, 6221, 6222, 6223, 6224, 6225, 6226, 6227, 6228, 6229, 6230, 6231, 6232, 6233, 6234, 6235, 6236, 6237, 6238, 6239, 6240, 6241, 6242, 6243, 6244, 6245, 6246, 6247, 6248, 6249, 6250, 6251, 6252, 6253, 6254, 6255, 6256, 6257, 6258, 6259, 6260, 6261, 6262, 6263, 6264, 6265, 6266, 6267, 6268, 6269, 6270, 6271, 6272, 6273, 6274, 6275, 6276, 6277, 6278, 6279, 6280, 6281, 6282, 6283, 6284, 6285, 6286, 6287, 6288, 6289, 6290, 6291, 6292, 6293, 6294, 6295, 6296, 6297, 6298, 6299, 6300, 6301, 6302, 6303, 6304, 6305, 6306, 6307, 6308, 6309, 6310, 6311, 6312, 6313, 6314, 6315, 6316, 6317, 6318, 6319, 6320, 6321, 6322, 6323, 6324, 6325, 6326, 6327, 6328, 6329, 6330, 6331, 6332, 6333, 6334, 6335, 6336, 6337, 6338, 6339, 6340, 6341, 6342, 6343, 6344, 6345, 6346, 6347, 6348, 6349, 6350, 6351, 6352, 6353, 6354, 6355, 6356, 6357, 6358, 6359, 6360, 6361, 6362, 6363, 6364, 6365, 6366, 6367, 6368, 6369, 6370, 6371, 6372, 6373, 6374, 6375, 6376, 6377, 6378, 6379, 6380, 6381, 6382, 6383, 6384, 6385, 6386, 6387, 6388, 6389, 6390, 6391, 6392, 6393, 6394, 6395, 6396, 6397, 6398, 6399, 6400, 6401, 6402, 6403, 6404, 6405, 6406, 6407, 6408, 6409, 6410, 6411, 6412, 6413, 6414, 6415, 6416, 6417, 6418, 6419, 6420, 6421, 6422, 6423, 6424, 6425, 6426, 6427, 6428, 6429, 6430, 6431, 6432, 6433, 6434, 6435, 6436, 6437, 6438, 6439, 6440, 6441, 6442, 6443, 6444, 6445, 6446, 6447, 6448, 6449, 6450, 6451, 6452, 6453, 6454, 6455, 6456, 6457, 6458, 6459, 6460, 6461, 6462, 6463, 6464, 6465, 6466, 6467, 6468, 6469, 6470, 6471, 6472, 6473, 6474, 6475, 6476, 6477, 6478, 6479, 6480, 6481, 6482, 6483, 6484, 6485, 6486, 6487, 6488, 6489, 6490, 6491, 6492, 6493, 6494, 6495, 6496, 6497, 6498, 6499, 6500, 6501, 6502, 6503, 6504, 6505, 6506, 6507, 6508, 6509, 6510, 6511, 6512, 6513, 6514, 6515, 6516, 6517, 6518, 6519, 6520, 6521, 6522, 6523, 6524, 6525, 6526, 6527, 6528, 6529, 6530, 6531, 6532, 6533, 6534, 6535, 6536, 6537, 6538, 6539, 6540, 6541, 6542, 6543, 6544, 6545, 6546, 6547, 6548, 6549, 6550, 6551, 6552, 6553, 6554, 6555, 6556, 6557, 6558, 6559, 6560, 6561, 6562, 6563, 6564, 6565, 6566, 6567, 6568, 6569, 6570, 6571, 6572, 6573, 6574, 6575, 6576, 6577, 6578, 6579, 6580, 6581, 6582, 6583, 6584, 6585, 6586, 6587, 6588, 6589, 6590, 6591, 6592, 6593, 6594, 6595, 6596, 6597, 6598, 6599, 6600, 6601, 6602, 6603, 6604, 6605, 6606, 6607, 6608, 6609, 6610, 6611, 6612, 6613, 6614, 6615, 6616, 6617, 6618, 6619, 6620, 6621, 6622, 6623, 6624, 6625, 6626, 6627, 6628, 6629, 6630, 6631, 6632, 6633, 6634, 6635, 6636, 6637, 6638, 6639, 6640, 6641, 6642, 6643, 6644, 6645, 6646, 6647, 6648, 6649, 6650, 6651, 6652, 6653, 6654, 6655, 6656, 6657, 6658, 6659, 6660, 6661, 6662, 6663, 6664, 6665, 6666, 6667, 6668, 6669, 6670, 6671, 6672, 6673, 6674, 6675, 6676, 6677, 6678, 6679, 6680, 6681, 6682, 6683, 6684, 6685, 6686, 6687, 6688, 6689, 6690, 6691, 6692, 6693, 6694, 6695, 6696, 6697, 6698, 6699, 6700, 6701, 6702, 6703, 6704, 6705, 6706, 6707, 6708, 6709, 6710, 6711, 6712, 6713, 6714, 6715, 6716, 6717, 6718, 6719, 6720, 6721, 6722, 6723, 6724, 6725, 6726, 6727, 6728, 6729, 6730, 6731, 6732, 6733, 6734, 6735, 6736, 6737, 6738, 6739, 6740, 6741, 6742, 6743, 6744, 6745, 6746, 6747, 6748, 6749, 6750, 6751, 6752, 6753, 6754, 6755, 6756, 6757, 6758, 6759, 6760, 6761, 6762, 6763, 6764, 6765, 6766, 6767, 6768, 6769, 6770, 6771, 6772, 6773, 6774, 6775, 6776, 6777, 6778, 6779, 6780, 6781, 6782, 6783, 6784, 6785, 6786, 6787, 6788, 6789, 6790, 6791, 6792, 6793, 6794, 6795, 6796, 6797, 6798, 6799, 6800, 6801, 6802, 6803, 6804, 6805, 6806, 6807, 6808, 6809, 6810, 6811, 6812, 6813, 6814, 6815, 6816, 6817, 6818, 6819, 6820, 6821, 6822, 6823, 6824, 6825, 6826, 6827, 6828, 6829, 6830, 6831, 6832, 6833, 6834, 6835, 6836, 6837, 6838, 6839, 6840, 6841, 6842, 6843, 6844, 0 }; static const int sas_commands_start = 1094; static const int sas_commands_en_main = 1094; #line 13 "src/txt/readstat_sas_commands_read.rl" readstat_schema_t *readstat_parse_sas_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *eof = pe; unsigned char *str_start = NULL; size_t str_len = 0; int cs; double double_value = NAN; uint64_t first_integer = 0; uint64_t integer = 0; int line_no = 0; unsigned char *line_start = p; char varname[32]; char argname[32]; char labelset[32]; char string_value[32]; char buf[1024]; readstat_type_t var_type = READSTAT_TYPE_DOUBLE; label_type_t label_type = LABEL_TYPE_DOUBLE; int var_row = 0, var_col = 0; int var_len = 0; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; #line 3220 "src/txt/readstat_sas_commands_read.c" { cs = (int)sas_commands_start; } #line 3225 "src/txt/readstat_sas_commands_read.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _sas_commands_eof_trans[cs] > 0 ) { _trans = (unsigned int)_sas_commands_eof_trans[cs] - 1; } } else { _keys = ( _sas_commands_trans_keys + (_sas_commands_key_offsets[cs])); _trans = (unsigned int)_sas_commands_index_offsets[cs]; _klen = (int)_sas_commands_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_sas_commands_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_sas_commands_cond_targs[_trans]; if ( _sas_commands_cond_actions[_trans] != 0 ) { _acts = ( _sas_commands_actions + (_sas_commands_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 72 "src/txt/readstat_sas_commands_read.rl" integer = 0; } #line 3310 "src/txt/readstat_sas_commands_read.c" break; } case 1: { { #line 76 "src/txt/readstat_sas_commands_read.rl" integer = 10 * integer + ((( (*( p)))) - '0'); } #line 3321 "src/txt/readstat_sas_commands_read.c" break; } case 2: { { #line 80 "src/txt/readstat_sas_commands_read.rl" int value = 0; if ((( (*( p)))) >= '0' && (( (*( p)))) <= '9') { value = (( (*( p)))) - '0'; } else if ((( (*( p)))) >= 'A' && (( (*( p)))) <= 'F') { value = (( (*( p)))) - 'A' + 10; } else if ((( (*( p)))) >= 'a' && (( (*( p)))) <= 'f') { value = (( (*( p)))) - 'a' + 10; } integer = 16 * integer + value; } #line 3340 "src/txt/readstat_sas_commands_read.c" break; } case 3: { { #line 92 "src/txt/readstat_sas_commands_read.rl" var_col = integer - 1; var_len = 1; } #line 3352 "src/txt/readstat_sas_commands_read.c" break; } case 4: { { #line 97 "src/txt/readstat_sas_commands_read.rl" var_len = integer - var_col; } #line 3363 "src/txt/readstat_sas_commands_read.c" break; } case 5: { { #line 101 "src/txt/readstat_sas_commands_read.rl" var_type = READSTAT_TYPE_STRING; } #line 3374 "src/txt/readstat_sas_commands_read.c" break; } case 6: { { #line 105 "src/txt/readstat_sas_commands_read.rl" var_type = READSTAT_TYPE_DOUBLE; } #line 3385 "src/txt/readstat_sas_commands_read.c" break; } case 7: { { #line 109 "src/txt/readstat_sas_commands_read.rl" readstat_copy(buf, sizeof(buf), (char *)str_start, str_len); } #line 3396 "src/txt/readstat_sas_commands_read.c" break; } case 8: { { #line 113 "src/txt/readstat_sas_commands_read.rl" readstat_copy(labelset, sizeof(labelset), (char *)str_start, str_len); } #line 3407 "src/txt/readstat_sas_commands_read.c" break; } case 9: { { #line 117 "src/txt/readstat_sas_commands_read.rl" readstat_copy(string_value, sizeof(string_value), (char *)str_start, str_len); } #line 3418 "src/txt/readstat_sas_commands_read.c" break; } case 10: { { #line 121 "src/txt/readstat_sas_commands_read.rl" readstat_copy(argname, sizeof(argname), (char *)str_start, str_len); } #line 3429 "src/txt/readstat_sas_commands_read.c" break; } case 11: { { #line 125 "src/txt/readstat_sas_commands_read.rl" readstat_copy_lower(varname, sizeof(varname), (char *)str_start, str_len); } #line 3440 "src/txt/readstat_sas_commands_read.c" break; } case 12: { { #line 129 "src/txt/readstat_sas_commands_read.rl" if (strcasecmp(argname, "firstobs") == 0) { schema->first_line = integer; } if (strcasecmp(argname, "dlm") == 0) { schema->field_delimiter = integer ? integer : buf[0]; } } #line 3456 "src/txt/readstat_sas_commands_read.c" break; } case 13: { { #line 138 "src/txt/readstat_sas_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->variable.type = var_type; entry->row = var_row; entry->col = var_col; entry->len = var_len; } #line 3471 "src/txt/readstat_sas_commands_read.c" break; } case 14: { { #line 146 "src/txt/readstat_sas_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->len = var_len; } #line 3483 "src/txt/readstat_sas_commands_read.c" break; } case 15: { { #line 151 "src/txt/readstat_sas_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->variable.label, sizeof(entry->variable.label), buf, sizeof(buf)); } #line 3495 "src/txt/readstat_sas_commands_read.c" break; } case 16: { { #line 156 "src/txt/readstat_sas_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->labelset, sizeof(entry->labelset), labelset, sizeof(labelset)); } #line 3507 "src/txt/readstat_sas_commands_read.c" break; } case 17: { { #line 161 "src/txt/readstat_sas_commands_read.rl" error = submit_value_label(parser, labelset, label_type, first_integer, integer, double_value, string_value, buf, user_ctx); if (error != READSTAT_OK) goto cleanup; } #line 3521 "src/txt/readstat_sas_commands_read.c" break; } case 18: { { #line 168 "src/txt/readstat_sas_commands_read.rl" str_start = p; } #line 3530 "src/txt/readstat_sas_commands_read.c" break; } case 19: { { #line 168 "src/txt/readstat_sas_commands_read.rl" str_len = p - str_start; } #line 3539 "src/txt/readstat_sas_commands_read.c" break; } case 20: { { #line 170 "src/txt/readstat_sas_commands_read.rl" str_start = p; } #line 3548 "src/txt/readstat_sas_commands_read.c" break; } case 21: { { #line 170 "src/txt/readstat_sas_commands_read.rl" str_len = p - str_start; } #line 3557 "src/txt/readstat_sas_commands_read.c" break; } case 22: { { #line 178 "src/txt/readstat_sas_commands_read.rl" line_no++; line_start = p; } #line 3566 "src/txt/readstat_sas_commands_read.c" break; } case 23: { { #line 182 "src/txt/readstat_sas_commands_read.rl" str_start = p; } #line 3575 "src/txt/readstat_sas_commands_read.c" break; } case 24: { { #line 182 "src/txt/readstat_sas_commands_read.rl" str_len = p - str_start; } #line 3584 "src/txt/readstat_sas_commands_read.c" break; } case 25: { { #line 221 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_DOUBLE; double_value = -(double)integer; } #line 3593 "src/txt/readstat_sas_commands_read.c" break; } case 26: { { #line 222 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_DOUBLE; double_value = integer; } #line 3602 "src/txt/readstat_sas_commands_read.c" break; } case 27: { { #line 223 "src/txt/readstat_sas_commands_read.rl" first_integer = integer; } #line 3611 "src/txt/readstat_sas_commands_read.c" break; } case 28: { { #line 223 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_RANGE; } #line 3620 "src/txt/readstat_sas_commands_read.c" break; } case 29: { { #line 224 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_STRING; } #line 3629 "src/txt/readstat_sas_commands_read.c" break; } case 30: { { #line 225 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_STRING; } #line 3638 "src/txt/readstat_sas_commands_read.c" break; } case 31: { { #line 226 "src/txt/readstat_sas_commands_read.rl" label_type = LABEL_TYPE_OTHER; } #line 3647 "src/txt/readstat_sas_commands_read.c" break; } case 32: { { #line 229 "src/txt/readstat_sas_commands_read.rl" var_len = integer; } #line 3656 "src/txt/readstat_sas_commands_read.c" break; } case 33: { { #line 328 "src/txt/readstat_sas_commands_read.rl" var_row = integer - 1; } #line 3665 "src/txt/readstat_sas_commands_read.c" break; } case 34: { { #line 332 "src/txt/readstat_sas_commands_read.rl" var_row = 0; } #line 3674 "src/txt/readstat_sas_commands_read.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 1094 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 378 "src/txt/readstat_sas_commands_read.rl" /* suppress warnings */ (void)sas_commands_en_main; if (cs < #line 3705 "src/txt/readstat_sas_commands_read.c" 1094 #line 383 "src/txt/readstat_sas_commands_read.rl" ) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing SAS command file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing SAS command file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } error = submit_columns(parser, schema, user_ctx); cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_sas_commands_read.rl000066400000000000000000000346611410722155500225410ustar00rootroot00000000000000#include #include "../readstat.h" #include "../readstat_strings.h" #include "readstat_schema.h" #include "readstat_copy.h" #include "commands_util.h" %%{ machine sas_commands; write data noerror nofinal; }%% readstat_schema_t *readstat_parse_sas_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *eof = pe; unsigned char *str_start = NULL; size_t str_len = 0; int cs; double double_value = NAN; uint64_t first_integer = 0; uint64_t integer = 0; int line_no = 0; unsigned char *line_start = p; char varname[32]; char argname[32]; char labelset[32]; char string_value[32]; char buf[1024]; readstat_type_t var_type = READSTAT_TYPE_DOUBLE; label_type_t label_type = LABEL_TYPE_DOUBLE; int var_row = 0, var_col = 0; int var_len = 0; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; %%{ action start_integer { integer = 0; } action incr_integer { integer = 10 * integer + (fc - '0'); } action incr_hex_integer { int value = 0; if (fc >= '0' && fc <= '9') { value = fc - '0'; } else if (fc >= 'A' && fc <= 'F') { value = fc - 'A' + 10; } else if (fc >= 'a' && fc <= 'f') { value = fc - 'a' + 10; } integer = 16 * integer + value; } action copy_pos { var_col = integer - 1; var_len = 1; } action set_len { var_len = integer - var_col; } action set_str { var_type = READSTAT_TYPE_STRING; } action set_dbl { var_type = READSTAT_TYPE_DOUBLE; } action copy_buf { readstat_copy(buf, sizeof(buf), (char *)str_start, str_len); } action copy_labelset { readstat_copy(labelset, sizeof(labelset), (char *)str_start, str_len); } action copy_string { readstat_copy(string_value, sizeof(string_value), (char *)str_start, str_len); } action copy_argname { readstat_copy(argname, sizeof(argname), (char *)str_start, str_len); } action copy_varname { readstat_copy_lower(varname, sizeof(varname), (char *)str_start, str_len); } action handle_arg { if (strcasecmp(argname, "firstobs") == 0) { schema->first_line = integer; } if (strcasecmp(argname, "dlm") == 0) { schema->field_delimiter = integer ? integer : buf[0]; } } action handle_var { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->variable.type = var_type; entry->row = var_row; entry->col = var_col; entry->len = var_len; } action handle_var_len { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->len = var_len; } action handle_var_label { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->variable.label, sizeof(entry->variable.label), buf, sizeof(buf)); } action handle_var_labelset { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->labelset, sizeof(entry->labelset), labelset, sizeof(labelset)); } action handle_value_label { error = submit_value_label(parser, labelset, label_type, first_integer, integer, double_value, string_value, buf, user_ctx); if (error != READSTAT_OK) goto cleanup; } single_quoted_string = "'" ( [^']* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; } "'"; double_quoted_string = "\"" ( [^"]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; } "\""; unquoted_string = [A-Za-z] [_A-Za-z0-9\.]*; quoted_string = ( single_quoted_string | double_quoted_string ) %copy_buf; hex_string = "'" ( [0-9A-Fa-f]+ ) >start_integer $incr_hex_integer "'x"; newline = ( "\n" | "\r\n" ) %{ line_no++; line_start = p; }; missing_value = "." [A-Z]?; identifier = ( [$_A-Za-z] [_A-Za-z0-9]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; }; identifier_eval = "&"? identifier "."?; integer = [0-9]+ >start_integer $incr_integer; true_whitespace = [ \t] | newline; multiline_comment = "/*" ( any* - ( any* "*/" any* ) ) "*/"; comment = "*" ( any* - ( any* ";" true_whitespace* newline any* ) ) ";" true_whitespace* newline | multiline_comment; whitespace = true_whitespace | multiline_comment; var = identifier %copy_varname; labelset = identifier %copy_labelset; arg = identifier %copy_argname (whitespace* "=" whitespace* (identifier_eval | quoted_string | hex_string | integer) >start_integer %handle_arg)?; args = arg ( whitespace+ arg)*; options_cmd = "OPTIONS"i whitespace+ args whitespace* ";"; let_macro = "%LET"i whitespace+ identifier whitespace* "=" whitespace* (unquoted_string | quoted_string) whitespace* ";"; libname_cmd = "LIBNAME"i whitespace+ identifier whitespace+ ( quoted_string (whitespace+ args)? | "CLEAR"i | "_ALL_"i whitespace* "CLEAR"i | "LIST"i | "_ALL_"i whitespace* "LIST"i ) whitespace* ";"; footnote_cmd = "FOOTNOTE"i whitespace+ quoted_string whitespace* ";"; empty_cmd = ";"; value_label = ( "-" integer %{ label_type = LABEL_TYPE_DOUBLE; double_value = -(double)integer; } | integer %{ label_type = LABEL_TYPE_DOUBLE; double_value = integer; } | integer whitespace+ "-" whitespace+ %{ first_integer = integer; } integer %{ label_type = LABEL_TYPE_RANGE; } | unquoted_string %{ label_type = LABEL_TYPE_STRING; } %copy_string | quoted_string %{ label_type = LABEL_TYPE_STRING; } %copy_string | "other" %{ label_type = LABEL_TYPE_OTHER; } ) whitespace* "=" whitespace* quoted_string %handle_value_label; var_len = ("$" whitespace* integer %set_str | integer %set_dbl) %{ var_len = integer; }; value_cmd = "VALUE"i whitespace+ labelset whitespace+ ("(" args ")" whitespace*)? value_label (whitespace+ value_label)* whitespace* ";"; proc_format_cmd = "PROC"i whitespace+ "FORMAT"i whitespace* ( args whitespace* )? ";" ( whitespace | empty_cmd | value_cmd )+; filename_cmd = "FILENAME"i (whitespace+ args)? whitespace+ quoted_string whitespace* ";"; if_statement = "IF"i ( whitespace | identifier | "-"? integer | "(" | ")" | ".")+ ";"; data_cmd = "DATA"i (whitespace+ identifier_eval | unquoted_string | quoted_string )+ whitespace* ";"; missing_cmd = "MISSING"i whitespace+ identifier whitespace* ";"; # lrecl_option = "LRECL"i whitespace* "=" whitespace* integer %handle_info; infile_cmd = "INFILE"i (whitespace+ quoted_string)? (whitespace* args)? whitespace* ";"; length_spec = var whitespace+ var_len %handle_var_len; length_cmd = "LENGTH"i whitespace+ length_spec (whitespace+ length_spec)* whitespace* ";"; label_spec = var whitespace* "=" whitespace* quoted_string %handle_var_label; label_cmd = "LABEL"i whitespace+ label_spec (whitespace+ label_spec)* whitespace* ";"; date_separator = [SN]; date_format = ( "MMDDYY" integer | "DATE" | "DATE9" | "DATETIME" | "DAY" | "DDMMYY" date_separator? integer | "DOWNAME" | "JULDAY" | "JULIAN" | "MMDDYY" date_separator? integer | "MMYY" date_separator? | "MONNAME" | "MONTH" | "MONYY" | "PDFJULG" | "WEEKDATE" | "WEEKDAY" | "WORDDATE" | "WORDDATX" | "QTR" | "QTRR" | "TIME" | "TIMEAMPM" | "TOD" | "YEAR" | "YYMMDD" | "YYMM" date_separator? | "YYQ" date_separator? | "YYQR" date_separator? ); format_lbl_spec = labelset "." %handle_var_labelset; format_dbl_spec = integer "." integer?; format_date_spec = date_format "." integer?; var_format_spec = var whitespace+ ( format_lbl_spec | format_dbl_spec | format_date_spec ); format_cmd = "FORMAT"i whitespace+ var_format_spec (whitespace+ var_format_spec)* whitespace* ";"; var_attribute = ( "LENGTH"i whitespace* "=" whitespace* var_len %handle_var_len | "LABEL"i whitespace* "=" whitespace* quoted_string %handle_var_label | "FORMAT"i whitespace* "=" whitespace* format_dbl_spec ); var_attributes = var_attribute (whitespace+ var_attribute)*; attrib_spec = var whitespace+ var_attributes %handle_var; attrib_cmd = "ATTRIB"i whitespace+ attrib_spec (whitespace+ attrib_spec)* whitespace* ";"; input_format_spec = ("$CHAR" integer %set_str | identifier %set_dbl); input_int_spec = var whitespace+ integer %copy_pos "-" integer %set_len %set_dbl %handle_var; input_dbl_spec = "@" integer %copy_pos whitespace+ var whitespace+ (var_len | input_format_spec) "." %handle_var integer?; input_txt_spec = var whitespace+ "$" whitespace+ integer %copy_pos "-" integer %set_len %set_str %handle_var; row_spec = "#" integer %{ var_row = integer - 1; }; input_spec = (input_int_spec | input_dbl_spec | input_txt_spec | row_spec | var); input_cmd = "INPUT"i whitespace+ %{ var_row = 0; } input_spec (whitespace+ input_spec)* whitespace* ";"; invalue_missing_spec = single_quoted_string whitespace* "=" whitespace* missing_value; invalue_format_spec = format_dbl_spec | format_date_spec; invalue_other_spec = "OTHER" whitespace* "=" whitespace* "(|" invalue_format_spec "|)"; invalue_spec = invalue_missing_spec | invalue_other_spec; invalue_cmd = "INVALUE"i whitespace+ identifier whitespace+ invalue_spec (whitespace+ invalue_spec)* whitespace* ";"; proc_print_cmd = "PROC"i whitespace+ "PRINT"i (whitespace+ args) (whitespace+ "(" args ")")? whitespace* ";"; proc_contents_cmd = "PROC"i whitespace+ "CONTENTS"i (whitespace+ args) whitespace* ";"; run_cmd = "RUN"i whitespace* ";"; command = options_cmd | let_macro | libname_cmd | footnote_cmd | value_cmd | proc_format_cmd | filename_cmd | attrib_cmd | data_cmd | if_statement | missing_cmd | infile_cmd | format_cmd | label_cmd | length_cmd | input_cmd | invalue_cmd | proc_print_cmd | proc_contents_cmd | run_cmd; main := ( true_whitespace | comment | command )*; write init; write exec; }%% /* suppress warnings */ (void)sas_commands_en_main; if (cs < %%{ write first_final; }%%) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing SAS command file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing SAS command file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } error = submit_columns(parser, schema, user_ctx); cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_schema.c000066400000000000000000000021131410722155500201270ustar00rootroot00000000000000#include #include "../readstat.h" #include "readstat_schema.h" #include "readstat_copy.h" void readstat_schema_free(readstat_schema_t *schema) { if (schema) { free(schema->entries); free(schema); } } readstat_schema_entry_t *readstat_schema_find_or_create_entry(readstat_schema_t *dct, const char *var_name) { readstat_schema_entry_t *entry = NULL; int i; /* linear search. this is shitty, but whatever */ for (i=0; ientry_count; i++) { if (strcmp(dct->entries[i].variable.name, var_name) == 0) { entry = &dct->entries[i]; break; } } if (!entry) { dct->entries = realloc(dct->entries, sizeof(readstat_schema_entry_t) * (dct->entry_count + 1)); entry = &dct->entries[dct->entry_count]; memset(entry, 0, sizeof(readstat_schema_entry_t)); readstat_copy(entry->variable.name, sizeof(entry->variable.name), var_name, strlen(var_name)); entry->decimal_separator = '.'; entry->variable.index = dct->entry_count++; } return entry; } ReadStat-1.1.7/src/txt/readstat_schema.h000066400000000000000000000001551410722155500201400ustar00rootroot00000000000000readstat_schema_entry_t *readstat_schema_find_or_create_entry(readstat_schema_t *dct, const char *var_name); ReadStat-1.1.7/src/txt/readstat_spss_commands_read.c000066400000000000000000002214141410722155500225420ustar00rootroot00000000000000#line 1 "src/txt/readstat_spss_commands_read.rl" #include #include #include "../readstat.h" #include "../readstat_strings.h" #include "readstat_schema.h" #include "readstat_copy.h" #include "commands_util.h" #line 14 "src/txt/readstat_spss_commands_read.c" static const signed char _spss_commands_actions[] = { 0, 1, 1, 1, 2, 1, 4, 1, 8, 1, 12, 1, 13, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1, 20, 1, 21, 1, 22, 1, 27, 1, 30, 1, 31, 1, 32, 1, 34, 2, 0, 1, 2, 1, 0, 2, 2, 28, 2, 2, 29, 2, 3, 28, 2, 3, 29, 2, 4, 9, 2, 4, 12, 2, 4, 14, 2, 4, 20, 2, 8, 20, 2, 15, 16, 2, 17, 18, 2, 19, 13, 2, 19, 20, 2, 21, 6, 2, 21, 7, 2, 21, 12, 2, 21, 20, 2, 23, 8, 2, 24, 8, 2, 25, 8, 2, 26, 8, 3, 4, 0, 1, 3, 4, 14, 13, 3, 4, 35, 5, 3, 19, 0, 1, 3, 19, 8, 20, 3, 21, 0, 1, 3, 21, 1, 0, 3, 21, 6, 11, 3, 21, 12, 6, 3, 33, 0, 1, 4, 19, 33, 0, 1, 4, 21, 6, 10, 11, 4, 21, 6, 11, 10, 0 }; static const short _spss_commands_key_offsets[] = { 0, 0, 1, 2, 7, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 25, 27, 29, 35, 41, 47, 48, 50, 52, 54, 58, 68, 78, 79, 81, 85, 87, 92, 97, 98, 108, 118, 119, 120, 121, 126, 137, 148, 149, 155, 163, 171, 172, 183, 189, 195, 196, 203, 212, 221, 222, 226, 229, 233, 241, 249, 250, 261, 267, 273, 274, 283, 294, 305, 306, 310, 313, 319, 329, 339, 340, 344, 346, 350, 356, 362, 363, 365, 367, 369, 371, 375, 381, 387, 388, 390, 392, 394, 396, 398, 400, 405, 410, 411, 413, 415, 417, 419, 421, 425, 427, 429, 433, 439, 445, 446, 448, 450, 452, 454, 456, 460, 468, 476, 477, 488, 493, 498, 499, 503, 515, 520, 525, 526, 538, 550, 551, 552, 553, 559, 565, 571, 572, 573, 574, 582, 595, 597, 599, 601, 603, 605, 609, 617, 625, 626, 637, 646, 655, 656, 657, 659, 662, 664, 667, 672, 678, 684, 685, 687, 689, 693, 701, 709, 710, 712, 714, 716, 720, 725, 730, 731, 737, 749, 754, 759, 760, 772, 784, 785, 786, 787, 793, 799, 805, 806, 807, 808, 816, 829, 843, 857, 871, 885, 899, 913, 927, 941, 953, 958, 963, 964, 976, 988, 989, 1002, 1014, 1026, 1027, 1031, 1039, 1049, 1059, 1060, 1071, 1081, 1091, 1092, 1094, 1096, 1098, 1100, 1108, 1110, 1118, 1126, 1128, 1130, 1132, 1134, 1136, 1138, 1140, 1142, 1144, 1148, 1154, 1160, 1161, 1163, 1165, 1167, 1169, 1171, 1175, 1183, 1191, 1192, 1203, 1208, 1213, 1214, 1216, 1217, 1218, 1219, 1224, 1233, 1242, 1243, 1244, 1245, 1247, 1249, 1251, 1253, 1255, 1259, 1267, 1275, 1276, 1287, 1292, 1297, 1298, 1301, 1303, 1311, 1318, 1325, 1326, 1328, 1335, 1340, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1356, 1361, 1362, 1366, 1368, 1370, 1374, 1382, 1390, 1391, 1393, 1395, 1397, 1399, 1401, 1403, 1405, 1407, 1409, 1416, 1423, 1424, 1425, 1426, 1432, 1438, 1444, 1445, 1450, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1463, 1464, 1465, 1472, 1479, 1480, 1481, 1482, 1488, 1494, 1495, 1497, 1499, 1501, 1503, 1505, 1507, 1511, 1513, 1515, 1517, 1521, 1527, 1533, 1534, 1536, 1540, 1555, 1571, 1587, 1588, 1589, 1590, 1606, 1607, 1608, 1610, 1626, 1643, 1660, 1675, 1676, 1680, 1688, 1696, 1697, 1709, 1714, 1719, 1720, 1732, 1744, 1745, 1746, 1747, 1752, 1753, 1754, 1761, 1773, 1775, 1779, 1781, 1783, 1787, 1793, 1799, 1800, 1802, 1804, 1806, 1808, 1810, 1814, 1823, 1832, 1833, 1841, 1849, 1850, 1861, 1875, 1889, 1890, 1891, 1892, 1896, 1902, 1908, 1909, 1910, 1911, 1917, 1928, 1939, 1940, 1941, 1942, 1944, 1950, 1961, 1972, 1973, 1986, 1999, 2012, 2025, 2038, 2051, 2064, 2075, 2091, 2107, 2108, 2112, 2118, 2125, 2132, 2133, 2134, 2135, 2139, 2145, 2151, 2152, 2158, 2169, 2182, 2195, 2208, 2221, 2232, 2246, 2260, 2261, 2272, 2287, 2302, 2303, 2309, 2311, 2313, 2315, 2317, 2319, 2321, 2322, 2328, 2334, 2335, 2343, 2351, 2352, 2363, 2372, 2381, 2382, 2393, 2395, 2397, 2399, 2401, 2403, 2405, 2416, 2418, 2420, 2422, 2424, 2426, 2430, 2436, 2442, 2443, 2445, 2447, 2449, 2451, 2457, 2465, 2473, 2474, 2485, 2491, 2497, 2498, 2499, 2500, 2506, 2516, 2526, 2527, 2532, 2541, 2550, 2551, 2552, 2553, 2557, 2559, 2561, 2563, 2565, 2573, 2575, 2577, 2579, 2586, 2593, 2594, 2600, 2606, 2607, 2610, 2616, 2619, 2625, 2631, 2632, 2640, 2643, 2647, 2650, 2656, 2662, 2663, 2669, 2675, 2677, 2679, 2681, 2683, 2685, 2692, 2697, 2702, 2703, 2709, 2715, 2716, 2723, 2725, 2727, 2729, 2731, 2736, 2737, 2738, 2750, 2752, 2754, 2756, 2758, 2762, 2768, 2774, 2775, 2777, 2779, 2781, 2785, 2795, 2805, 2806, 2807, 2808, 2813, 2819, 2825, 2826, 2828, 2830, 2832, 2834, 2836, 2841, 2846, 2847, 2855, 2863, 2864, 2876, 2877, 2878, 2880, 2882, 2884, 2886, 2888, 2893, 2902, 2911, 2912, 2924, 2951, 2978, 3005, 0 }; static const char _spss_commands_trans_keys[] = { 10, 46, 9, 10, 13, 32, 46, 10, 46, 42, 42, 42, 47, 79, 77, 77, 69, 78, 84, 32, 46, 65, 73, 97, 105, 84, 116, 65, 97, 9, 10, 13, 32, 83, 115, 9, 10, 13, 32, 76, 108, 9, 10, 13, 32, 76, 108, 10, 73, 105, 83, 115, 84, 116, 9, 10, 13, 32, 9, 10, 13, 32, 70, 82, 84, 102, 114, 116, 9, 10, 13, 32, 70, 82, 84, 102, 114, 116, 10, 73, 105, 76, 88, 108, 120, 69, 101, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 34, 39, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 70, 82, 84, 102, 114, 116, 9, 10, 13, 32, 47, 70, 82, 84, 102, 114, 116, 10, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 45, 48, 57, 9, 10, 13, 32, 40, 65, 90, 97, 122, 9, 10, 13, 32, 40, 65, 90, 97, 122, 10, 65, 97, 48, 57, 41, 48, 57, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 45, 46, 47, 48, 57, 9, 10, 13, 32, 40, 46, 47, 65, 90, 97, 122, 9, 10, 13, 32, 40, 46, 47, 65, 90, 97, 122, 10, 65, 97, 48, 57, 41, 48, 57, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 10, 78, 88, 110, 120, 68, 100, 9, 10, 13, 32, 9, 10, 13, 32, 73, 105, 9, 10, 13, 32, 73, 105, 10, 78, 110, 80, 112, 85, 117, 84, 116, 9, 10, 13, 32, 9, 10, 13, 32, 80, 112, 9, 10, 13, 32, 80, 112, 10, 82, 114, 79, 111, 71, 103, 82, 114, 65, 97, 77, 109, 9, 10, 13, 32, 46, 9, 10, 13, 32, 46, 10, 69, 101, 67, 99, 85, 117, 84, 116, 69, 101, 73, 79, 105, 111, 76, 108, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 72, 104, 9, 10, 13, 32, 72, 104, 10, 65, 97, 78, 110, 68, 100, 76, 108, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 10, 65, 90, 97, 122, 9, 10, 13, 32, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 39, 39, 9, 10, 13, 32, 46, 47, 48, 57, 9, 10, 13, 32, 46, 47, 95, 48, 57, 65, 90, 97, 122, 82, 114, 77, 109, 65, 97, 84, 116, 83, 115, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 40, 65, 90, 97, 122, 9, 10, 13, 32, 40, 65, 90, 97, 122, 10, 70, 48, 57, 46, 48, 57, 48, 57, 41, 48, 57, 9, 10, 13, 32, 46, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 69, 101, 84, 116, 9, 10, 13, 32, 9, 10, 13, 32, 68, 70, 100, 102, 9, 10, 13, 32, 68, 70, 100, 102, 10, 65, 97, 84, 116, 65, 97, 9, 10, 13, 32, 9, 10, 13, 32, 47, 9, 10, 13, 32, 47, 10, 86, 118, 65, 90, 97, 122, 9, 10, 13, 32, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 39, 39, 9, 10, 13, 32, 46, 47, 48, 57, 9, 10, 13, 32, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 65, 95, 97, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 61, 82, 95, 114, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 73, 95, 105, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 65, 95, 97, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 61, 66, 95, 98, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 76, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 69, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 83, 95, 115, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 47, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 47, 65, 68, 70, 97, 100, 102, 9, 10, 13, 32, 46, 47, 65, 68, 70, 97, 100, 102, 10, 68, 100, 48, 57, 9, 10, 13, 32, 46, 47, 48, 57, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 65, 68, 70, 97, 100, 102, 9, 10, 13, 32, 65, 68, 70, 97, 100, 102, 10, 65, 97, 84, 116, 69, 101, 48, 57, 9, 10, 13, 32, 46, 47, 48, 57, 48, 57, 9, 10, 13, 32, 46, 47, 48, 57, 9, 10, 13, 32, 46, 47, 48, 57, 73, 105, 83, 115, 84, 116, 73, 105, 83, 115, 83, 115, 73, 105, 78, 110, 71, 103, 9, 10, 13, 32, 9, 10, 13, 32, 86, 118, 9, 10, 13, 32, 86, 118, 10, 65, 97, 76, 108, 85, 117, 69, 101, 83, 115, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 40, 9, 10, 13, 32, 40, 10, 34, 39, 34, 34, 41, 9, 10, 13, 32, 46, 9, 10, 13, 32, 46, 65, 90, 97, 122, 9, 10, 13, 32, 46, 65, 90, 97, 122, 10, 39, 39, 69, 101, 67, 99, 79, 111, 68, 100, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 40, 9, 10, 13, 32, 40, 10, 45, 48, 57, 48, 57, 9, 10, 13, 32, 46, 61, 48, 57, 9, 10, 13, 32, 45, 48, 57, 9, 10, 13, 32, 45, 48, 57, 10, 48, 57, 9, 10, 13, 32, 61, 48, 57, 9, 10, 13, 32, 83, 9, 10, 13, 32, 83, 10, 89, 83, 77, 73, 83, 9, 10, 13, 32, 41, 9, 10, 13, 32, 41, 10, 65, 69, 97, 101, 86, 118, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 68, 79, 100, 111, 9, 10, 13, 32, 68, 79, 100, 111, 10, 73, 105, 67, 99, 84, 116, 73, 105, 79, 111, 78, 110, 65, 97, 82, 114, 89, 121, 9, 10, 13, 32, 34, 39, 61, 9, 10, 13, 32, 34, 39, 61, 10, 34, 34, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 9, 10, 13, 32, 67, 9, 10, 13, 32, 67, 10, 79, 77, 80, 82, 69, 83, 83, 69, 68, 9, 10, 13, 32, 46, 47, 67, 9, 10, 13, 32, 46, 47, 67, 10, 39, 39, 9, 10, 13, 32, 34, 39, 9, 10, 13, 32, 34, 39, 10, 85, 117, 84, 116, 70, 102, 73, 105, 76, 108, 69, 101, 76, 84, 108, 116, 69, 101, 67, 99, 84, 116, 9, 10, 13, 32, 9, 10, 13, 32, 73, 105, 9, 10, 13, 32, 73, 105, 10, 70, 102, 9, 10, 13, 32, 9, 10, 13, 32, 34, 39, 45, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 40, 41, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 34, 39, 45, 46, 40, 41, 48, 57, 65, 90, 97, 122, 39, 39, 48, 57, 9, 10, 13, 32, 34, 39, 45, 46, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 95, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 95, 40, 41, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 40, 41, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 61, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 46, 39, 39, 9, 10, 13, 32, 46, 48, 57, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 65, 97, 76, 82, 108, 114, 85, 117, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 76, 108, 9, 10, 13, 32, 76, 108, 10, 65, 97, 66, 98, 69, 101, 76, 108, 83, 115, 9, 10, 13, 32, 9, 10, 13, 32, 47, 65, 90, 97, 122, 9, 10, 13, 32, 47, 65, 90, 97, 122, 10, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 48, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 9, 10, 13, 32, 34, 39, 9, 10, 13, 32, 34, 39, 10, 34, 34, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 34, 39, 45, 46, 47, 48, 57, 9, 10, 13, 32, 34, 39, 45, 46, 47, 48, 57, 10, 39, 39, 48, 57, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 46, 86, 118, 65, 90, 97, 122, 9, 10, 13, 32, 46, 86, 118, 65, 90, 97, 122, 10, 9, 10, 13, 32, 65, 95, 97, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 82, 95, 114, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 73, 95, 105, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 65, 95, 97, 48, 57, 66, 90, 98, 122, 9, 10, 13, 32, 66, 95, 98, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 76, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 69, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 76, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 76, 108, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 34, 39, 45, 9, 10, 13, 32, 34, 39, 45, 10, 39, 39, 9, 10, 13, 32, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 69, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 86, 95, 118, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 69, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 76, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 45, 46, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 46, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 40, 45, 46, 48, 57, 65, 90, 97, 122, 10, 78, 79, 83, 110, 111, 115, 79, 111, 77, 109, 73, 105, 78, 110, 65, 97, 76, 108, 41, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 10, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 40, 65, 90, 97, 122, 9, 10, 13, 32, 40, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 82, 114, 68, 100, 67, 99, 65, 97, 76, 108, 69, 101, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 73, 105, 65, 97, 66, 98, 76, 108, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 76, 108, 9, 10, 13, 32, 76, 108, 10, 65, 97, 66, 98, 69, 101, 76, 108, 9, 10, 13, 32, 83, 115, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 9, 10, 13, 32, 34, 39, 10, 34, 34, 9, 10, 13, 32, 46, 47, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 9, 10, 13, 32, 46, 47, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 9, 10, 13, 32, 46, 65, 90, 97, 122, 9, 10, 13, 32, 46, 65, 90, 97, 122, 10, 39, 39, 9, 10, 13, 32, 65, 97, 84, 116, 69, 101, 48, 57, 9, 10, 13, 32, 46, 47, 48, 57, 73, 105, 76, 108, 69, 101, 9, 10, 13, 32, 34, 39, 61, 9, 10, 13, 32, 34, 39, 61, 10, 9, 10, 13, 32, 34, 39, 9, 10, 13, 32, 34, 39, 10, 41, 48, 57, 9, 10, 13, 32, 46, 47, 41, 48, 57, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 46, 47, 48, 57, 41, 48, 57, 9, 10, 13, 32, 41, 48, 57, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 69, 101, 67, 99, 79, 111, 82, 114, 68, 100, 9, 10, 13, 32, 61, 83, 115, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 48, 57, 9, 10, 13, 32, 48, 57, 10, 9, 10, 13, 32, 47, 48, 57, 65, 97, 66, 98, 76, 108, 69, 101, 9, 10, 13, 32, 47, 39, 39, 9, 10, 13, 32, 47, 95, 48, 57, 65, 90, 97, 122, 69, 101, 68, 100, 69, 101, 84, 116, 9, 10, 13, 32, 9, 10, 13, 32, 78, 110, 9, 10, 13, 32, 78, 110, 10, 65, 97, 77, 109, 69, 101, 9, 10, 13, 32, 9, 10, 13, 32, 34, 39, 65, 90, 97, 122, 9, 10, 13, 32, 34, 39, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 46, 9, 10, 13, 32, 87, 119, 9, 10, 13, 32, 87, 119, 10, 73, 105, 78, 110, 68, 100, 79, 111, 87, 119, 9, 10, 13, 32, 61, 9, 10, 13, 32, 61, 10, 9, 10, 13, 32, 65, 90, 97, 122, 9, 10, 13, 32, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 39, 39, 83, 115, 80, 112, 76, 108, 65, 97, 89, 121, 9, 10, 13, 32, 46, 9, 10, 13, 32, 46, 65, 90, 97, 122, 9, 10, 13, 32, 46, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 42, 47, 67, 68, 69, 70, 71, 73, 76, 77, 82, 83, 86, 100, 101, 102, 103, 105, 108, 109, 114, 115, 118, 9, 10, 13, 32, 42, 47, 67, 68, 69, 70, 71, 73, 76, 77, 82, 83, 86, 100, 101, 102, 103, 105, 108, 109, 114, 115, 118, 9, 10, 13, 32, 42, 47, 67, 68, 69, 70, 71, 73, 76, 77, 82, 83, 86, 100, 101, 102, 103, 105, 108, 109, 114, 115, 118, 9, 10, 13, 32, 42, 47, 67, 68, 69, 70, 71, 73, 76, 77, 82, 83, 86, 100, 101, 102, 103, 105, 108, 109, 114, 115, 118, 48, 57, 0 }; static const signed char _spss_commands_single_lengths[] = { 0, 1, 1, 5, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 4, 2, 2, 6, 6, 6, 1, 2, 2, 2, 4, 10, 10, 1, 2, 4, 2, 5, 5, 1, 6, 6, 1, 1, 1, 5, 11, 11, 1, 4, 4, 4, 1, 5, 4, 4, 1, 5, 5, 5, 1, 2, 1, 4, 4, 4, 1, 5, 4, 4, 1, 7, 7, 7, 1, 2, 1, 6, 6, 6, 1, 4, 2, 4, 6, 6, 1, 2, 2, 2, 2, 4, 6, 6, 1, 2, 2, 2, 2, 2, 2, 5, 5, 1, 2, 2, 2, 2, 2, 4, 2, 2, 4, 6, 6, 1, 2, 2, 2, 2, 2, 4, 4, 4, 1, 5, 5, 5, 1, 0, 6, 5, 5, 1, 6, 6, 1, 1, 1, 6, 6, 6, 1, 1, 1, 6, 7, 2, 2, 2, 2, 2, 4, 4, 4, 1, 5, 5, 5, 1, 1, 0, 1, 0, 1, 5, 6, 6, 1, 2, 2, 4, 8, 8, 1, 2, 2, 2, 4, 5, 5, 1, 2, 6, 5, 5, 1, 6, 6, 1, 1, 1, 6, 6, 6, 1, 1, 1, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 6, 5, 5, 1, 6, 6, 1, 7, 12, 12, 1, 2, 6, 6, 6, 1, 5, 10, 10, 1, 2, 2, 2, 0, 6, 0, 6, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 6, 6, 1, 2, 2, 2, 2, 2, 4, 4, 4, 1, 5, 5, 5, 1, 2, 1, 1, 1, 5, 5, 5, 1, 1, 1, 2, 2, 2, 2, 2, 4, 4, 4, 1, 5, 5, 5, 1, 1, 0, 6, 5, 5, 1, 0, 5, 5, 5, 1, 1, 1, 1, 1, 1, 5, 5, 1, 4, 2, 2, 4, 8, 8, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 7, 7, 1, 1, 1, 6, 6, 6, 1, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 7, 1, 1, 1, 6, 6, 1, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 4, 6, 6, 1, 2, 4, 7, 8, 8, 1, 1, 1, 8, 1, 1, 0, 8, 9, 9, 7, 1, 4, 4, 4, 1, 6, 5, 5, 1, 6, 6, 1, 1, 1, 5, 1, 1, 5, 6, 2, 4, 2, 2, 4, 6, 6, 1, 2, 2, 2, 2, 2, 4, 5, 5, 1, 4, 4, 1, 5, 8, 8, 1, 1, 1, 4, 6, 6, 1, 1, 1, 6, 9, 9, 1, 1, 1, 0, 4, 7, 7, 1, 7, 7, 7, 7, 7, 7, 7, 5, 10, 10, 1, 4, 4, 7, 7, 1, 1, 1, 4, 4, 4, 1, 4, 5, 7, 7, 7, 7, 5, 8, 8, 1, 5, 9, 9, 1, 6, 2, 2, 2, 2, 2, 2, 1, 6, 6, 1, 4, 4, 1, 5, 5, 5, 1, 5, 2, 2, 2, 2, 2, 2, 5, 2, 2, 2, 2, 2, 4, 6, 6, 1, 2, 2, 2, 2, 6, 4, 4, 1, 5, 6, 6, 1, 1, 1, 6, 6, 6, 1, 5, 5, 5, 1, 1, 1, 4, 2, 2, 2, 0, 6, 2, 2, 2, 7, 7, 1, 6, 6, 1, 1, 6, 1, 4, 4, 1, 6, 1, 4, 1, 4, 4, 1, 4, 4, 2, 2, 2, 2, 2, 7, 5, 5, 1, 4, 4, 1, 5, 2, 2, 2, 2, 5, 1, 1, 6, 2, 2, 2, 2, 4, 6, 6, 1, 2, 2, 2, 4, 6, 6, 1, 1, 1, 5, 6, 6, 1, 2, 2, 2, 2, 2, 5, 5, 1, 4, 4, 1, 6, 1, 1, 2, 2, 2, 2, 2, 5, 5, 5, 1, 6, 27, 27, 27, 27, 0 }; static const signed char _spss_commands_range_lengths[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 2, 2, 0, 3, 1, 1, 0, 1, 2, 2, 0, 1, 1, 0, 2, 2, 0, 3, 1, 1, 0, 1, 2, 2, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 2, 3, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 2, 2, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 1, 1, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 0, 0, 0, 4, 0, 0, 1, 4, 4, 4, 4, 0, 0, 2, 2, 0, 3, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 2, 2, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 3, 0, 0, 0, 1, 0 }; static const short _spss_commands_index_offsets[] = { 0, 0, 2, 4, 10, 13, 15, 17, 20, 22, 24, 26, 28, 30, 32, 34, 36, 41, 44, 47, 54, 61, 68, 70, 73, 76, 79, 84, 95, 106, 108, 111, 116, 119, 125, 131, 133, 142, 151, 153, 155, 157, 163, 175, 187, 189, 195, 202, 209, 211, 220, 226, 232, 234, 241, 249, 257, 259, 263, 266, 271, 278, 285, 287, 296, 302, 308, 310, 319, 329, 339, 341, 345, 348, 355, 364, 373, 375, 380, 383, 388, 395, 402, 404, 407, 410, 413, 416, 421, 428, 435, 437, 440, 443, 446, 449, 452, 455, 461, 467, 469, 472, 475, 478, 481, 484, 489, 492, 495, 500, 507, 514, 516, 519, 522, 525, 528, 531, 536, 543, 550, 552, 561, 567, 573, 575, 578, 588, 594, 600, 602, 612, 622, 624, 626, 628, 635, 642, 649, 651, 653, 655, 663, 674, 677, 680, 683, 686, 689, 694, 701, 708, 710, 719, 727, 735, 737, 739, 741, 744, 746, 749, 755, 762, 769, 771, 774, 777, 782, 791, 800, 802, 805, 808, 811, 816, 822, 828, 830, 835, 845, 851, 857, 859, 869, 879, 881, 883, 885, 892, 899, 906, 908, 910, 912, 920, 931, 943, 955, 967, 979, 991, 1003, 1015, 1027, 1037, 1043, 1049, 1051, 1061, 1071, 1073, 1084, 1097, 1110, 1112, 1116, 1124, 1133, 1142, 1144, 1153, 1164, 1175, 1177, 1180, 1183, 1186, 1188, 1196, 1198, 1206, 1214, 1217, 1220, 1223, 1226, 1229, 1232, 1235, 1238, 1241, 1246, 1253, 1260, 1262, 1265, 1268, 1271, 1274, 1277, 1282, 1289, 1296, 1298, 1307, 1313, 1319, 1321, 1324, 1326, 1328, 1330, 1336, 1344, 1352, 1354, 1356, 1358, 1361, 1364, 1367, 1370, 1373, 1378, 1385, 1392, 1394, 1403, 1409, 1415, 1417, 1420, 1422, 1430, 1437, 1444, 1446, 1448, 1455, 1461, 1467, 1469, 1471, 1473, 1475, 1477, 1479, 1485, 1491, 1493, 1498, 1501, 1504, 1509, 1518, 1527, 1529, 1532, 1535, 1538, 1541, 1544, 1547, 1550, 1553, 1556, 1564, 1572, 1574, 1576, 1578, 1585, 1592, 1599, 1601, 1607, 1613, 1615, 1617, 1619, 1621, 1623, 1625, 1627, 1629, 1631, 1633, 1641, 1649, 1651, 1653, 1655, 1662, 1669, 1671, 1674, 1677, 1680, 1683, 1686, 1689, 1694, 1697, 1700, 1703, 1708, 1715, 1722, 1724, 1727, 1732, 1744, 1757, 1770, 1772, 1774, 1776, 1789, 1791, 1793, 1795, 1808, 1822, 1836, 1848, 1850, 1855, 1862, 1869, 1871, 1881, 1887, 1893, 1895, 1905, 1915, 1917, 1919, 1921, 1927, 1929, 1931, 1938, 1948, 1951, 1956, 1959, 1962, 1967, 1974, 1981, 1983, 1986, 1989, 1992, 1995, 1998, 2003, 2011, 2019, 2021, 2028, 2035, 2037, 2046, 2058, 2070, 2072, 2074, 2076, 2081, 2088, 2095, 2097, 2099, 2101, 2108, 2119, 2130, 2132, 2134, 2136, 2138, 2144, 2154, 2164, 2166, 2177, 2188, 2199, 2210, 2221, 2232, 2243, 2252, 2266, 2280, 2282, 2287, 2293, 2301, 2309, 2311, 2313, 2315, 2320, 2326, 2332, 2334, 2340, 2349, 2360, 2371, 2382, 2393, 2402, 2414, 2426, 2428, 2437, 2450, 2463, 2465, 2472, 2475, 2478, 2481, 2484, 2487, 2490, 2492, 2499, 2506, 2508, 2515, 2522, 2524, 2533, 2541, 2549, 2551, 2560, 2563, 2566, 2569, 2572, 2575, 2578, 2587, 2590, 2593, 2596, 2599, 2602, 2607, 2614, 2621, 2623, 2626, 2629, 2632, 2635, 2642, 2649, 2656, 2658, 2667, 2674, 2681, 2683, 2685, 2687, 2694, 2703, 2712, 2714, 2720, 2728, 2736, 2738, 2740, 2742, 2747, 2750, 2753, 2756, 2758, 2766, 2769, 2772, 2775, 2783, 2791, 2793, 2800, 2807, 2809, 2812, 2819, 2822, 2828, 2834, 2836, 2844, 2847, 2852, 2855, 2861, 2867, 2869, 2875, 2881, 2884, 2887, 2890, 2893, 2896, 2904, 2910, 2916, 2918, 2924, 2930, 2932, 2939, 2942, 2945, 2948, 2951, 2957, 2959, 2961, 2971, 2974, 2977, 2980, 2983, 2988, 2995, 3002, 3004, 3007, 3010, 3013, 3018, 3027, 3036, 3038, 3040, 3042, 3048, 3055, 3062, 3064, 3067, 3070, 3073, 3076, 3079, 3085, 3091, 3093, 3100, 3107, 3109, 3119, 3121, 3123, 3126, 3129, 3132, 3135, 3138, 3144, 3152, 3160, 3162, 3172, 3200, 3228, 3256, 0 }; static const short _spss_commands_cond_targs[] = { 629, 0, 3, 2, 3, 629, 4, 3, 3, 2, 629, 3, 2, 6, 0, 7, 6, 7, 628, 6, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 0, 628, 15, 17, 618, 17, 618, 0, 18, 18, 0, 19, 19, 0, 20, 21, 22, 20, 585, 585, 0, 20, 21, 22, 20, 23, 23, 0, 20, 21, 22, 20, 23, 23, 0, 21, 0, 24, 24, 0, 25, 25, 0, 26, 26, 0, 27, 28, 29, 27, 0, 27, 28, 29, 27, 30, 562, 575, 30, 562, 575, 0, 27, 28, 29, 27, 30, 562, 575, 30, 562, 575, 0, 28, 0, 31, 31, 0, 32, 583, 32, 583, 0, 33, 33, 0, 33, 34, 35, 33, 36, 0, 33, 34, 35, 33, 36, 0, 34, 0, 36, 37, 38, 36, 39, 580, 582, 582, 0, 36, 37, 38, 36, 39, 580, 582, 582, 0, 37, 0, 41, 40, 41, 40, 42, 43, 44, 42, 45, 0, 42, 43, 44, 42, 45, 30, 562, 575, 30, 562, 575, 0, 42, 43, 44, 42, 45, 30, 562, 575, 30, 562, 575, 0, 43, 0, 46, 47, 48, 46, 561, 0, 46, 47, 48, 46, 49, 49, 0, 46, 47, 48, 46, 49, 49, 0, 47, 0, 50, 51, 52, 50, 49, 49, 49, 49, 0, 50, 51, 52, 50, 53, 0, 50, 51, 52, 50, 53, 0, 51, 0, 54, 55, 56, 54, 557, 53, 0, 54, 55, 56, 54, 57, 63, 63, 0, 54, 55, 56, 54, 57, 63, 63, 0, 55, 0, 554, 554, 58, 0, 59, 58, 0, 60, 61, 62, 60, 0, 60, 61, 62, 60, 63, 63, 0, 60, 61, 62, 60, 63, 63, 0, 61, 0, 64, 65, 66, 64, 63, 63, 63, 63, 0, 64, 65, 66, 64, 67, 0, 64, 65, 66, 64, 67, 0, 65, 0, 68, 69, 70, 68, 550, 630, 45, 67, 0, 68, 69, 70, 68, 71, 630, 45, 63, 63, 0, 68, 69, 70, 68, 71, 630, 45, 63, 63, 0, 69, 0, 547, 547, 72, 0, 73, 72, 0, 74, 75, 76, 74, 630, 45, 0, 74, 75, 76, 74, 630, 45, 63, 63, 0, 74, 75, 76, 74, 630, 45, 63, 63, 0, 75, 0, 78, 100, 78, 100, 0, 79, 79, 0, 80, 81, 82, 80, 0, 80, 81, 82, 80, 83, 83, 0, 80, 81, 82, 80, 83, 83, 0, 81, 0, 84, 84, 0, 85, 85, 0, 86, 86, 0, 87, 87, 0, 88, 89, 90, 88, 0, 88, 89, 90, 88, 91, 91, 0, 88, 89, 90, 88, 91, 91, 0, 89, 0, 92, 92, 0, 93, 93, 0, 94, 94, 0, 95, 95, 0, 96, 96, 0, 97, 97, 0, 97, 98, 99, 97, 628, 0, 97, 98, 99, 97, 628, 0, 98, 0, 101, 101, 0, 102, 102, 0, 103, 103, 0, 104, 104, 0, 97, 97, 0, 106, 143, 106, 143, 0, 107, 107, 0, 108, 108, 0, 109, 110, 111, 109, 0, 109, 110, 111, 109, 112, 112, 0, 109, 110, 111, 109, 112, 112, 0, 110, 0, 113, 113, 0, 114, 114, 0, 115, 115, 0, 116, 116, 0, 117, 117, 0, 118, 119, 120, 118, 0, 118, 119, 120, 118, 121, 121, 0, 118, 119, 120, 118, 121, 121, 0, 119, 0, 122, 123, 124, 122, 121, 121, 121, 121, 0, 122, 123, 124, 122, 125, 0, 122, 123, 124, 122, 125, 0, 123, 0, 126, 126, 0, 127, 128, 129, 127, 130, 126, 126, 126, 126, 0, 127, 128, 129, 127, 130, 0, 127, 128, 129, 127, 130, 0, 128, 0, 130, 131, 132, 130, 133, 139, 141, 142, 142, 0, 130, 131, 132, 130, 133, 139, 141, 142, 142, 0, 131, 0, 135, 134, 135, 134, 136, 137, 138, 136, 628, 125, 0, 136, 137, 138, 136, 628, 125, 0, 136, 137, 138, 136, 628, 125, 0, 137, 0, 135, 140, 135, 140, 136, 137, 138, 136, 628, 125, 141, 0, 136, 137, 138, 136, 628, 125, 142, 142, 142, 142, 0, 144, 144, 0, 145, 145, 0, 146, 146, 0, 147, 147, 0, 148, 148, 0, 149, 150, 151, 149, 0, 149, 150, 151, 149, 152, 152, 0, 149, 150, 151, 149, 152, 152, 0, 150, 0, 153, 154, 155, 153, 152, 152, 152, 152, 0, 153, 154, 155, 153, 156, 152, 152, 0, 153, 154, 155, 153, 156, 152, 152, 0, 154, 0, 157, 0, 158, 0, 159, 158, 0, 160, 0, 161, 160, 0, 162, 163, 164, 162, 628, 0, 162, 163, 164, 162, 628, 148, 0, 162, 163, 164, 162, 628, 148, 0, 163, 0, 166, 166, 0, 167, 167, 0, 168, 169, 170, 168, 0, 168, 169, 170, 168, 171, 538, 171, 538, 0, 168, 169, 170, 168, 171, 538, 171, 538, 0, 169, 0, 172, 172, 0, 173, 173, 0, 174, 174, 0, 175, 176, 177, 175, 0, 175, 176, 177, 175, 178, 0, 175, 176, 177, 175, 178, 0, 176, 0, 196, 196, 179, 179, 0, 180, 181, 182, 180, 183, 179, 179, 179, 179, 0, 180, 181, 182, 180, 183, 0, 180, 181, 182, 180, 183, 0, 181, 0, 183, 184, 185, 183, 186, 192, 194, 195, 195, 0, 183, 184, 185, 183, 186, 192, 194, 195, 195, 0, 184, 0, 188, 187, 188, 187, 189, 190, 191, 189, 628, 178, 0, 189, 190, 191, 189, 628, 178, 0, 189, 190, 191, 189, 628, 178, 0, 190, 0, 188, 193, 188, 193, 189, 190, 191, 189, 628, 178, 194, 0, 189, 190, 191, 189, 628, 178, 195, 195, 195, 195, 0, 180, 181, 182, 180, 183, 197, 179, 197, 179, 179, 179, 0, 180, 181, 182, 180, 183, 198, 179, 198, 179, 179, 179, 0, 180, 181, 182, 180, 183, 199, 179, 199, 179, 179, 179, 0, 180, 181, 182, 180, 183, 200, 179, 200, 179, 179, 179, 0, 180, 181, 182, 180, 183, 201, 179, 201, 179, 179, 179, 0, 180, 181, 182, 180, 183, 202, 179, 202, 179, 179, 179, 0, 180, 181, 182, 180, 183, 203, 179, 203, 179, 179, 179, 0, 180, 181, 182, 180, 183, 204, 179, 204, 179, 179, 179, 0, 205, 206, 207, 205, 208, 179, 179, 179, 179, 0, 205, 206, 207, 205, 208, 0, 205, 206, 207, 205, 208, 0, 206, 0, 208, 209, 210, 208, 186, 192, 194, 211, 211, 0, 208, 209, 210, 208, 186, 192, 194, 211, 211, 0, 209, 0, 212, 213, 214, 212, 628, 178, 211, 211, 211, 211, 0, 212, 213, 214, 212, 628, 178, 215, 224, 229, 215, 224, 229, 0, 212, 213, 214, 212, 628, 178, 215, 224, 229, 215, 224, 229, 0, 213, 0, 533, 533, 216, 0, 217, 218, 219, 217, 628, 178, 216, 0, 217, 218, 219, 217, 628, 178, 220, 220, 0, 217, 218, 219, 217, 628, 178, 220, 220, 0, 218, 0, 221, 222, 223, 221, 220, 220, 220, 220, 0, 221, 222, 223, 221, 215, 224, 229, 215, 224, 229, 0, 221, 222, 223, 221, 215, 224, 229, 215, 224, 229, 0, 222, 0, 225, 225, 0, 226, 226, 0, 227, 227, 0, 228, 0, 217, 218, 219, 217, 628, 178, 228, 0, 230, 0, 217, 218, 219, 217, 631, 178, 230, 0, 217, 218, 219, 217, 628, 178, 231, 0, 233, 233, 0, 234, 234, 0, 97, 97, 0, 236, 236, 0, 237, 237, 0, 238, 238, 0, 239, 239, 0, 240, 240, 0, 241, 241, 0, 242, 243, 244, 242, 0, 242, 243, 244, 242, 245, 245, 0, 242, 243, 244, 242, 245, 245, 0, 243, 0, 246, 246, 0, 247, 247, 0, 248, 248, 0, 249, 249, 0, 250, 250, 0, 251, 252, 253, 251, 0, 251, 252, 253, 251, 254, 254, 0, 251, 252, 253, 251, 254, 254, 0, 252, 0, 255, 256, 257, 255, 254, 254, 254, 254, 0, 255, 256, 257, 255, 258, 0, 255, 256, 257, 255, 258, 0, 256, 0, 259, 266, 0, 261, 260, 261, 260, 262, 0, 263, 264, 265, 263, 628, 0, 263, 264, 265, 263, 628, 254, 254, 0, 263, 264, 265, 263, 628, 254, 254, 0, 264, 0, 261, 267, 261, 267, 269, 269, 0, 270, 270, 0, 271, 271, 0, 272, 272, 0, 273, 273, 0, 274, 275, 276, 274, 0, 274, 275, 276, 274, 277, 277, 0, 274, 275, 276, 274, 277, 277, 0, 275, 0, 278, 279, 280, 278, 277, 277, 277, 277, 0, 278, 279, 280, 278, 281, 0, 278, 279, 280, 278, 281, 0, 279, 0, 282, 283, 0, 283, 0, 284, 285, 286, 284, 287, 289, 283, 0, 284, 285, 286, 284, 282, 283, 0, 284, 285, 286, 284, 282, 283, 0, 285, 0, 288, 0, 284, 285, 286, 284, 289, 288, 0, 289, 290, 291, 289, 292, 0, 289, 290, 291, 289, 292, 0, 290, 0, 293, 0, 294, 0, 295, 0, 296, 0, 297, 0, 297, 298, 299, 297, 97, 0, 297, 298, 299, 297, 97, 0, 298, 0, 301, 351, 301, 351, 0, 302, 302, 0, 303, 303, 0, 304, 305, 306, 304, 0, 304, 305, 306, 304, 307, 345, 307, 345, 0, 304, 305, 306, 304, 307, 345, 307, 345, 0, 305, 0, 308, 308, 0, 309, 309, 0, 310, 310, 0, 311, 311, 0, 312, 312, 0, 313, 313, 0, 314, 314, 0, 315, 315, 0, 316, 316, 0, 316, 317, 318, 316, 319, 340, 342, 0, 316, 317, 318, 316, 319, 340, 342, 0, 317, 0, 321, 320, 321, 320, 322, 323, 324, 322, 628, 337, 0, 322, 323, 324, 322, 628, 325, 0, 322, 323, 324, 322, 628, 325, 0, 323, 0, 325, 326, 327, 325, 328, 0, 325, 326, 327, 325, 328, 0, 326, 0, 329, 0, 330, 0, 331, 0, 332, 0, 333, 0, 334, 0, 335, 0, 336, 0, 97, 0, 337, 338, 339, 337, 628, 325, 328, 0, 337, 338, 339, 337, 628, 325, 328, 0, 338, 0, 321, 341, 321, 341, 342, 343, 344, 342, 319, 340, 0, 342, 343, 344, 342, 319, 340, 0, 343, 0, 346, 346, 0, 347, 347, 0, 348, 348, 0, 349, 349, 0, 350, 350, 0, 316, 316, 0, 352, 376, 352, 376, 0, 353, 353, 0, 354, 354, 0, 355, 355, 0, 356, 357, 358, 356, 0, 356, 357, 358, 356, 359, 359, 0, 356, 357, 358, 356, 359, 359, 0, 357, 0, 360, 360, 0, 361, 374, 375, 361, 0, 362, 363, 364, 362, 365, 368, 370, 362, 371, 372, 372, 0, 362, 363, 364, 362, 365, 368, 370, 628, 362, 371, 372, 372, 0, 362, 363, 364, 362, 365, 368, 370, 628, 362, 371, 372, 372, 0, 363, 0, 367, 366, 367, 366, 362, 363, 364, 362, 365, 368, 370, 628, 362, 371, 372, 372, 0, 367, 369, 367, 369, 371, 0, 362, 363, 364, 362, 365, 368, 370, 628, 362, 371, 372, 372, 0, 362, 363, 364, 362, 365, 368, 370, 628, 372, 362, 373, 372, 372, 0, 362, 363, 364, 362, 365, 368, 370, 628, 372, 362, 373, 372, 372, 0, 362, 363, 364, 362, 365, 368, 370, 362, 371, 372, 372, 0, 374, 0, 377, 378, 379, 377, 0, 377, 378, 379, 377, 380, 380, 0, 377, 378, 379, 377, 380, 380, 0, 378, 0, 381, 382, 383, 381, 384, 380, 380, 380, 380, 0, 381, 382, 383, 381, 384, 0, 381, 382, 383, 381, 384, 0, 382, 0, 384, 385, 386, 384, 387, 390, 392, 393, 393, 0, 384, 385, 386, 384, 387, 390, 392, 393, 393, 0, 385, 0, 389, 388, 389, 388, 97, 98, 99, 97, 628, 0, 389, 391, 389, 391, 97, 98, 99, 97, 628, 392, 0, 97, 98, 99, 97, 628, 393, 393, 393, 393, 0, 395, 395, 0, 396, 499, 396, 499, 0, 397, 397, 0, 398, 398, 0, 399, 400, 401, 399, 0, 399, 400, 401, 399, 402, 402, 0, 399, 400, 401, 399, 402, 402, 0, 400, 0, 403, 403, 0, 404, 404, 0, 405, 405, 0, 406, 406, 0, 407, 407, 0, 408, 409, 410, 408, 0, 408, 409, 410, 408, 411, 414, 414, 0, 408, 409, 410, 408, 411, 414, 414, 0, 409, 0, 411, 412, 413, 411, 414, 414, 0, 411, 412, 413, 411, 414, 414, 0, 412, 0, 415, 416, 417, 415, 414, 414, 414, 414, 0, 415, 416, 417, 415, 418, 430, 432, 448, 449, 460, 460, 0, 415, 416, 417, 415, 418, 430, 432, 448, 449, 460, 460, 0, 416, 0, 420, 419, 420, 419, 421, 422, 423, 421, 0, 421, 422, 423, 421, 424, 453, 0, 421, 422, 423, 421, 424, 453, 0, 422, 0, 426, 425, 426, 425, 427, 428, 429, 427, 628, 434, 0, 427, 428, 429, 427, 418, 430, 432, 628, 434, 449, 0, 427, 428, 429, 427, 418, 430, 432, 628, 434, 449, 0, 428, 0, 420, 431, 420, 431, 433, 0, 421, 422, 423, 421, 433, 0, 434, 435, 436, 434, 628, 437, 437, 414, 414, 0, 434, 435, 436, 434, 628, 437, 437, 414, 414, 0, 435, 0, 415, 416, 417, 415, 438, 414, 438, 414, 414, 414, 0, 415, 416, 417, 415, 439, 414, 439, 414, 414, 414, 0, 415, 416, 417, 415, 440, 414, 440, 414, 414, 414, 0, 415, 416, 417, 415, 441, 414, 441, 414, 414, 414, 0, 415, 416, 417, 415, 442, 414, 442, 414, 414, 414, 0, 415, 416, 417, 415, 443, 414, 443, 414, 414, 414, 0, 415, 416, 417, 415, 444, 414, 444, 414, 414, 414, 0, 445, 446, 447, 445, 414, 414, 414, 414, 0, 445, 446, 447, 445, 418, 430, 432, 448, 461, 461, 449, 460, 460, 0, 445, 446, 447, 445, 418, 430, 432, 448, 461, 461, 449, 460, 460, 0, 446, 0, 421, 422, 423, 421, 0, 450, 451, 452, 450, 449, 0, 450, 451, 452, 450, 424, 453, 455, 0, 450, 451, 452, 450, 424, 453, 455, 0, 451, 0, 426, 454, 426, 454, 456, 457, 458, 456, 0, 456, 457, 458, 456, 459, 0, 456, 457, 458, 456, 459, 0, 457, 0, 421, 422, 423, 421, 459, 0, 415, 416, 417, 415, 460, 460, 460, 460, 0, 415, 416, 417, 415, 462, 460, 462, 460, 460, 460, 0, 415, 416, 417, 415, 463, 460, 463, 460, 460, 460, 0, 415, 416, 417, 415, 464, 460, 464, 460, 460, 460, 0, 415, 416, 417, 415, 465, 460, 465, 460, 460, 460, 0, 466, 467, 468, 466, 460, 460, 460, 460, 0, 466, 467, 468, 466, 418, 430, 432, 448, 449, 469, 469, 0, 466, 467, 468, 466, 418, 430, 432, 448, 449, 469, 469, 0, 467, 0, 470, 471, 472, 470, 469, 469, 469, 469, 0, 470, 471, 472, 470, 418, 430, 473, 432, 448, 449, 498, 498, 0, 470, 471, 472, 470, 418, 430, 473, 432, 448, 449, 498, 498, 0, 471, 0, 474, 492, 494, 474, 492, 494, 0, 475, 475, 0, 476, 476, 0, 477, 477, 0, 478, 478, 0, 479, 479, 0, 480, 480, 0, 481, 0, 481, 482, 483, 481, 628, 484, 0, 481, 482, 483, 481, 628, 484, 0, 482, 0, 484, 485, 486, 484, 487, 487, 0, 484, 485, 486, 484, 487, 487, 0, 485, 0, 488, 489, 490, 488, 487, 487, 487, 487, 0, 488, 489, 490, 488, 473, 491, 491, 0, 488, 489, 490, 488, 473, 491, 491, 0, 489, 0, 488, 489, 490, 488, 491, 491, 491, 491, 0, 493, 493, 0, 476, 476, 0, 495, 495, 0, 496, 496, 0, 497, 497, 0, 480, 480, 0, 470, 471, 472, 470, 498, 498, 498, 498, 0, 500, 500, 0, 501, 501, 0, 502, 502, 0, 503, 503, 0, 504, 504, 0, 505, 506, 507, 505, 0, 505, 506, 507, 505, 508, 508, 0, 505, 506, 507, 505, 508, 508, 0, 506, 0, 509, 509, 0, 510, 510, 0, 511, 511, 0, 512, 512, 0, 513, 514, 515, 513, 532, 532, 0, 513, 514, 515, 513, 516, 516, 0, 513, 514, 515, 513, 516, 516, 0, 514, 0, 517, 518, 519, 517, 516, 516, 516, 516, 0, 517, 518, 519, 517, 520, 530, 0, 517, 518, 519, 517, 520, 530, 0, 518, 0, 522, 521, 522, 521, 523, 524, 525, 523, 628, 526, 0, 523, 524, 525, 523, 628, 526, 516, 516, 0, 523, 524, 525, 523, 628, 526, 516, 516, 0, 524, 0, 527, 528, 529, 527, 628, 0, 527, 528, 529, 527, 628, 516, 516, 0, 527, 528, 529, 527, 628, 516, 516, 0, 528, 0, 522, 531, 522, 531, 513, 514, 515, 513, 0, 534, 534, 0, 535, 535, 0, 536, 536, 0, 537, 0, 217, 218, 219, 217, 628, 178, 537, 0, 539, 539, 0, 540, 540, 0, 541, 541, 0, 541, 542, 543, 541, 387, 390, 544, 0, 541, 542, 543, 541, 387, 390, 544, 0, 542, 0, 544, 545, 546, 544, 387, 390, 0, 544, 545, 546, 544, 387, 390, 0, 545, 0, 548, 549, 0, 74, 75, 76, 74, 630, 45, 0, 548, 549, 0, 550, 551, 552, 550, 553, 0, 550, 551, 552, 550, 553, 0, 551, 0, 68, 69, 70, 68, 630, 45, 553, 0, 555, 556, 0, 60, 61, 62, 60, 0, 555, 556, 0, 557, 558, 559, 557, 560, 0, 557, 558, 559, 557, 560, 0, 558, 0, 54, 55, 56, 54, 560, 0, 46, 47, 48, 46, 561, 0, 563, 563, 0, 564, 564, 0, 565, 565, 0, 566, 566, 0, 567, 567, 0, 568, 569, 570, 568, 571, 568, 568, 0, 568, 569, 570, 568, 571, 0, 568, 569, 570, 568, 571, 0, 569, 0, 571, 572, 573, 571, 574, 0, 571, 572, 573, 571, 574, 0, 572, 0, 42, 43, 44, 42, 45, 574, 0, 576, 576, 0, 577, 577, 0, 578, 578, 0, 579, 579, 0, 42, 43, 44, 42, 45, 0, 41, 581, 41, 581, 42, 43, 44, 42, 45, 582, 582, 582, 582, 0, 584, 584, 0, 579, 579, 0, 586, 586, 0, 587, 587, 0, 588, 589, 590, 588, 0, 588, 589, 590, 588, 591, 591, 0, 588, 589, 590, 588, 591, 591, 0, 589, 0, 592, 592, 0, 593, 593, 0, 594, 594, 0, 595, 596, 597, 595, 0, 595, 596, 597, 595, 598, 616, 615, 615, 0, 595, 596, 597, 595, 598, 616, 615, 615, 0, 596, 0, 600, 599, 600, 599, 601, 602, 603, 601, 628, 0, 601, 602, 603, 601, 604, 604, 0, 601, 602, 603, 601, 604, 604, 0, 602, 0, 605, 605, 0, 606, 606, 0, 607, 607, 0, 608, 608, 0, 609, 609, 0, 609, 610, 611, 609, 612, 0, 609, 610, 611, 609, 612, 0, 610, 0, 612, 613, 614, 612, 615, 615, 0, 612, 613, 614, 612, 615, 615, 0, 613, 0, 601, 602, 603, 601, 628, 615, 615, 615, 615, 0, 600, 617, 600, 617, 619, 619, 0, 620, 620, 0, 621, 621, 0, 622, 622, 0, 623, 623, 0, 624, 625, 626, 624, 628, 0, 624, 625, 626, 624, 628, 627, 627, 0, 624, 625, 626, 624, 628, 627, 627, 0, 625, 0, 624, 625, 626, 624, 628, 627, 627, 627, 627, 0, 628, 629, 1, 628, 2, 5, 8, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 0, 628, 629, 1, 628, 2, 5, 8, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 0, 628, 629, 1, 628, 2, 5, 8, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 0, 628, 629, 1, 628, 2, 5, 8, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 16, 77, 105, 165, 83, 232, 235, 268, 300, 394, 231, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 0 }; static const short _spss_commands_cond_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 84, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 120, 0, 0, 0, 45, 45, 45, 45, 3, 1, 0, 0, 0, 0, 0, 0, 69, 69, 0, 21, 21, 21, 21, 21, 124, 124, 0, 0, 0, 0, 0, 39, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 69, 0, 21, 21, 21, 21, 124, 124, 0, 0, 0, 84, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 120, 0, 0, 0, 48, 48, 48, 48, 3, 48, 48, 1, 0, 0, 0, 0, 0, 0, 0, 0, 69, 69, 0, 21, 21, 21, 21, 21, 21, 21, 124, 124, 0, 0, 0, 0, 0, 39, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69, 69, 0, 21, 21, 21, 21, 21, 21, 124, 124, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 23, 23, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 1, 0, 25, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 0, 0, 39, 0, 0, 1, 0, 39, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 23, 23, 23, 23, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 75, 17, 19, 0, 60, 60, 60, 60, 60, 60, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 72, 13, 15, 0, 9, 9, 9, 9, 9, 9, 1, 0, 90, 90, 90, 90, 90, 90, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 87, 87, 87, 87, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 140, 140, 140, 140, 90, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 39, 0, 96, 96, 96, 96, 96, 96, 1, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 84, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 102, 102, 102, 102, 102, 102, 1, 0, 39, 0, 99, 99, 99, 99, 99, 99, 1, 0, 7, 7, 7, 7, 7, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 84, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 75, 17, 19, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 39, 0, 39, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 21, 120, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 108, 66, 66, 0, 72, 13, 15, 0, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 23, 23, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 128, 93, 93, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 25, 132, 93, 93, 0, 21, 21, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 1, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 75, 17, 19, 0, 116, 116, 116, 116, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 75, 17, 19, 0, 63, 63, 63, 63, 112, 112, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 39, 0, 21, 21, 21, 21, 21, 21, 21, 78, 78, 120, 0, 0, 0, 72, 13, 15, 0, 39, 0, 33, 33, 33, 33, 1, 0, 0, 0, 0, 0, 0, 23, 23, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 81, 81, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 21, 21, 81, 81, 120, 81, 81, 0, 0, 0, 31, 31, 31, 31, 0, 35, 35, 35, 35, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 144, 0, 21, 21, 21, 21, 148, 0, 0, 0, 37, 37, 37, 37, 1, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 158, 158, 158, 158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 23, 23, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 120, 81, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 153, 153, 153, 153, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 136, 136, 136, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 84, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 75, 17, 19, 0, 57, 57, 57, 57, 57, 57, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 105, 105, 105, 105, 105, 105, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 39, 0, 27, 27, 27, 27, 27, 27, 0, 0, 1, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 120, 0, 0, 0, 54, 54, 54, 54, 54, 54, 1, 0, 0, 39, 0, 27, 27, 27, 27, 0, 0, 1, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 120, 0, 0, 0, 51, 51, 51, 51, 1, 0, 29, 29, 29, 29, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 39, 0, 21, 21, 21, 21, 120, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72, 13, 15, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 75, 17, 19, 0, 5, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 72, 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 0, 21, 21, 21, 21, 21, 81, 81, 0, 0, 0, 25, 25, 25, 25, 25, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21, 7, 0, 0 }; static const short _spss_commands_eof_trans[] = { 3286, 3287, 3288, 3289, 3290, 3291, 3292, 3293, 3294, 3295, 3296, 3297, 3298, 3299, 3300, 3301, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309, 3310, 3311, 3312, 3313, 3314, 3315, 3316, 3317, 3318, 3319, 3320, 3321, 3322, 3323, 3324, 3325, 3326, 3327, 3328, 3329, 3330, 3331, 3332, 3333, 3334, 3335, 3336, 3337, 3338, 3339, 3340, 3341, 3342, 3343, 3344, 3345, 3346, 3347, 3348, 3349, 3350, 3351, 3352, 3353, 3354, 3355, 3356, 3357, 3358, 3359, 3360, 3361, 3362, 3363, 3364, 3365, 3366, 3367, 3368, 3369, 3370, 3371, 3372, 3373, 3374, 3375, 3376, 3377, 3378, 3379, 3380, 3381, 3382, 3383, 3384, 3385, 3386, 3387, 3388, 3389, 3390, 3391, 3392, 3393, 3394, 3395, 3396, 3397, 3398, 3399, 3400, 3401, 3402, 3403, 3404, 3405, 3406, 3407, 3408, 3409, 3410, 3411, 3412, 3413, 3414, 3415, 3416, 3417, 3418, 3419, 3420, 3421, 3422, 3423, 3424, 3425, 3426, 3427, 3428, 3429, 3430, 3431, 3432, 3433, 3434, 3435, 3436, 3437, 3438, 3439, 3440, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3473, 3474, 3475, 3476, 3477, 3478, 3479, 3480, 3481, 3482, 3483, 3484, 3485, 3486, 3487, 3488, 3489, 3490, 3491, 3492, 3493, 3494, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3502, 3503, 3504, 3505, 3506, 3507, 3508, 3509, 3510, 3511, 3512, 3513, 3514, 3515, 3516, 3517, 3518, 3519, 3520, 3521, 3522, 3523, 3524, 3525, 3526, 3527, 3528, 3529, 3530, 3531, 3532, 3533, 3534, 3535, 3536, 3537, 3538, 3539, 3540, 3541, 3542, 3543, 3544, 3545, 3546, 3547, 3548, 3549, 3550, 3551, 3552, 3553, 3554, 3555, 3556, 3557, 3558, 3559, 3560, 3561, 3562, 3563, 3564, 3565, 3566, 3567, 3568, 3569, 3570, 3571, 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3579, 3580, 3581, 3582, 3583, 3584, 3585, 3586, 3587, 3588, 3589, 3590, 3591, 3592, 3593, 3594, 3595, 3596, 3597, 3598, 3599, 3600, 3601, 3602, 3603, 3604, 3605, 3606, 3607, 3608, 3609, 3610, 3611, 3612, 3613, 3614, 3615, 3616, 3617, 3618, 3619, 3620, 3621, 3622, 3623, 3624, 3625, 3626, 3627, 3628, 3629, 3630, 3631, 3632, 3633, 3634, 3635, 3636, 3637, 3638, 3639, 3640, 3641, 3642, 3643, 3644, 3645, 3646, 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, 3657, 3658, 3659, 3660, 3661, 3662, 3663, 3664, 3665, 3666, 3667, 3668, 3669, 3670, 3671, 3672, 3673, 3674, 3675, 3676, 3677, 3678, 3679, 3680, 3681, 3682, 3683, 3684, 3685, 3686, 3687, 3688, 3689, 3690, 3691, 3692, 3693, 3694, 3695, 3696, 3697, 3698, 3699, 3700, 3701, 3702, 3703, 3704, 3705, 3706, 3707, 3708, 3709, 3710, 3711, 3712, 3713, 3714, 3715, 3716, 3717, 3718, 3719, 3720, 3721, 3722, 3723, 3724, 3725, 3726, 3727, 3728, 3729, 3730, 3731, 3732, 3733, 3734, 3735, 3736, 3737, 3738, 3739, 3740, 3741, 3742, 3743, 3744, 3745, 3746, 3747, 3748, 3749, 3750, 3751, 3752, 3753, 3754, 3755, 3756, 3757, 3758, 3759, 3760, 3761, 3762, 3763, 3764, 3765, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3773, 3774, 3775, 3776, 3777, 3778, 3779, 3780, 3781, 3782, 3783, 3784, 3785, 3786, 3787, 3788, 3789, 3790, 3791, 3792, 3793, 3794, 3795, 3796, 3797, 3798, 3799, 3800, 3801, 3802, 3803, 3804, 3805, 3806, 3807, 3808, 3809, 3810, 3811, 3812, 3813, 3814, 3815, 3816, 3817, 3818, 3819, 3820, 3821, 3822, 3823, 3824, 3825, 3826, 3827, 3828, 3829, 3830, 3831, 3832, 3833, 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3906, 3907, 3908, 3909, 3910, 3911, 3912, 3913, 3914, 3915, 3916, 3917, 0 }; static const int spss_commands_start = 628; static const int spss_commands_en_main = 628; #line 14 "src/txt/readstat_spss_commands_read.rl" readstat_schema_t *readstat_parse_spss_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *eof = pe; unsigned char *str_start = NULL; size_t str_len = 0; int cs; int i; int line_no = 0; uint64_t first_integer = 0, integer = 0; double double_value = NAN; unsigned char *line_start = p; char varname[32]; char argname[32]; char string_value[32]; char buf[1024]; char var_list[1024][32]; long var_col = 0; long var_row = 0; long var_len = 0; long var_count = 0; readstat_type_t var_type = READSTAT_TYPE_DOUBLE; label_type_t label_type = LABEL_TYPE_DOUBLE; int labelset_count = 0; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; #line 1893 "src/txt/readstat_spss_commands_read.c" { cs = (int)spss_commands_start; } #line 1898 "src/txt/readstat_spss_commands_read.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe && p != eof ) goto _out; if ( p == eof ) { if ( _spss_commands_eof_trans[cs] > 0 ) { _trans = (unsigned int)_spss_commands_eof_trans[cs] - 1; } } else { _keys = ( _spss_commands_trans_keys + (_spss_commands_key_offsets[cs])); _trans = (unsigned int)_spss_commands_index_offsets[cs]; _klen = (int)_spss_commands_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_spss_commands_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} } cs = (int)_spss_commands_cond_targs[_trans]; if ( _spss_commands_cond_actions[_trans] != 0 ) { _acts = ( _spss_commands_actions + (_spss_commands_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 78 "src/txt/readstat_spss_commands_read.rl" integer = 0; } #line 1983 "src/txt/readstat_spss_commands_read.c" break; } case 1: { { #line 82 "src/txt/readstat_spss_commands_read.rl" integer = 10 * integer + ((( (*( p)))) - '0'); } #line 1994 "src/txt/readstat_spss_commands_read.c" break; } case 2: { { #line 86 "src/txt/readstat_spss_commands_read.rl" var_col = integer - 1; var_len = 1; } #line 2006 "src/txt/readstat_spss_commands_read.c" break; } case 3: { { #line 91 "src/txt/readstat_spss_commands_read.rl" var_len = integer - var_col; } #line 2017 "src/txt/readstat_spss_commands_read.c" break; } case 4: { { #line 95 "src/txt/readstat_spss_commands_read.rl" readstat_copy_quoted(buf, sizeof(buf), (char *)str_start, str_len); } #line 2028 "src/txt/readstat_spss_commands_read.c" break; } case 5: { { #line 99 "src/txt/readstat_spss_commands_read.rl" readstat_copy_quoted(string_value, sizeof(string_value), (char *)str_start, str_len); } #line 2039 "src/txt/readstat_spss_commands_read.c" break; } case 6: { { #line 107 "src/txt/readstat_spss_commands_read.rl" readstat_copy(varname, sizeof(varname), (char *)str_start, str_len); } #line 2050 "src/txt/readstat_spss_commands_read.c" break; } case 7: { { #line 111 "src/txt/readstat_spss_commands_read.rl" readstat_copy(argname, sizeof(argname), (char *)str_start, str_len); } #line 2061 "src/txt/readstat_spss_commands_read.c" break; } case 8: { { #line 115 "src/txt/readstat_spss_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->variable.type = var_type; entry->row = var_row; entry->col = var_col; entry->len = var_len; } #line 2076 "src/txt/readstat_spss_commands_read.c" break; } case 9: { { #line 123 "src/txt/readstat_spss_commands_read.rl" readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->variable.label, sizeof(entry->variable.label), buf, sizeof(buf)); } #line 2088 "src/txt/readstat_spss_commands_read.c" break; } case 10: { { #line 128 "src/txt/readstat_spss_commands_read.rl" var_count = 0; } #line 2099 "src/txt/readstat_spss_commands_read.c" break; } case 11: { { #line 132 "src/txt/readstat_spss_commands_read.rl" if (var_count < sizeof(var_list)/sizeof(var_list[0])) { memcpy(var_list[var_count++], varname, sizeof(varname)); } } #line 2112 "src/txt/readstat_spss_commands_read.c" break; } case 12: { { #line 138 "src/txt/readstat_spss_commands_read.rl" if (strcasecmp(argname, "FIRSTCASE") == 0) { schema->first_line = integer; } if (strcasecmp(argname, "DELIMITERS") == 0) { schema->field_delimiter = buf[0]; } } #line 2128 "src/txt/readstat_spss_commands_read.c" break; } case 13: { { #line 147 "src/txt/readstat_spss_commands_read.rl" char labelset_name[256]; snprintf(labelset_name, sizeof(labelset_name), "labels%d", labelset_count++); for (i=0; ilabelset, sizeof(entry->labelset), labelset_name, sizeof(labelset_name)); } } #line 2144 "src/txt/readstat_spss_commands_read.c" break; } case 14: { { #line 156 "src/txt/readstat_spss_commands_read.rl" char labelset_name[256]; snprintf(labelset_name, sizeof(labelset_name), "labels%d", labelset_count); error = submit_value_label(parser, labelset_name, label_type, first_integer, integer, double_value, string_value, buf, user_ctx); if (error != READSTAT_OK) goto cleanup; } #line 2160 "src/txt/readstat_spss_commands_read.c" break; } case 15: { { #line 165 "src/txt/readstat_spss_commands_read.rl" str_start = p; } #line 2169 "src/txt/readstat_spss_commands_read.c" break; } case 16: { { #line 165 "src/txt/readstat_spss_commands_read.rl" str_len = p - str_start; } #line 2178 "src/txt/readstat_spss_commands_read.c" break; } case 17: { { #line 167 "src/txt/readstat_spss_commands_read.rl" str_start = p; } #line 2187 "src/txt/readstat_spss_commands_read.c" break; } case 18: { { #line 167 "src/txt/readstat_spss_commands_read.rl" str_len = p - str_start; } #line 2196 "src/txt/readstat_spss_commands_read.c" break; } case 19: { { #line 171 "src/txt/readstat_spss_commands_read.rl" line_no++; line_start = p; } #line 2205 "src/txt/readstat_spss_commands_read.c" break; } case 20: { { #line 173 "src/txt/readstat_spss_commands_read.rl" str_start = p; } #line 2214 "src/txt/readstat_spss_commands_read.c" break; } case 21: { { #line 173 "src/txt/readstat_spss_commands_read.rl" str_len = p - str_start; } #line 2223 "src/txt/readstat_spss_commands_read.c" break; } case 22: { { #line 191 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_STRING; } #line 2232 "src/txt/readstat_spss_commands_read.c" break; } case 23: { { #line 194 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_STRING; } #line 2241 "src/txt/readstat_spss_commands_read.c" break; } case 24: { { #line 195 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_DOUBLE; } #line 2250 "src/txt/readstat_spss_commands_read.c" break; } case 25: { { #line 196 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_DOUBLE; } #line 2259 "src/txt/readstat_spss_commands_read.c" break; } case 26: { { #line 197 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_STRING; } #line 2268 "src/txt/readstat_spss_commands_read.c" break; } case 27: { { #line 218 "src/txt/readstat_spss_commands_read.rl" var_row = integer - 1; } #line 2277 "src/txt/readstat_spss_commands_read.c" break; } case 28: { { #line 219 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_DOUBLE; } #line 2286 "src/txt/readstat_spss_commands_read.c" break; } case 29: { { #line 220 "src/txt/readstat_spss_commands_read.rl" var_type = READSTAT_TYPE_DOUBLE; } #line 2295 "src/txt/readstat_spss_commands_read.c" break; } case 30: { { #line 253 "src/txt/readstat_spss_commands_read.rl" label_type = -1; } #line 2304 "src/txt/readstat_spss_commands_read.c" break; } case 31: { { #line 259 "src/txt/readstat_spss_commands_read.rl" label_type = LABEL_TYPE_DOUBLE; double_value = -(double)integer; } #line 2313 "src/txt/readstat_spss_commands_read.c" break; } case 32: { { #line 260 "src/txt/readstat_spss_commands_read.rl" label_type = LABEL_TYPE_DOUBLE; double_value = integer; } #line 2322 "src/txt/readstat_spss_commands_read.c" break; } case 33: { { #line 261 "src/txt/readstat_spss_commands_read.rl" first_integer = integer; } #line 2331 "src/txt/readstat_spss_commands_read.c" break; } case 34: { { #line 261 "src/txt/readstat_spss_commands_read.rl" label_type = LABEL_TYPE_RANGE; } #line 2340 "src/txt/readstat_spss_commands_read.c" break; } case 35: { { #line 262 "src/txt/readstat_spss_commands_read.rl" label_type = LABEL_TYPE_STRING; } #line 2349 "src/txt/readstat_spss_commands_read.c" break; } } _nacts -= 1; _acts += 1; } } if ( p == eof ) { if ( cs >= 628 ) goto _out; } else { if ( cs != 0 ) { p += 1; goto _resume; } } _out: {} } #line 312 "src/txt/readstat_spss_commands_read.rl" /* suppress warnings */ (void)spss_commands_en_main; if (cs < #line 2380 "src/txt/readstat_spss_commands_read.c" 628 #line 317 "src/txt/readstat_spss_commands_read.rl" ) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing SPSS command file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing SPSS command file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } error = submit_columns(parser, schema, user_ctx); cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_spss_commands_read.rl000066400000000000000000000330071410722155500227340ustar00rootroot00000000000000#include #include #include "../readstat.h" #include "../readstat_strings.h" #include "readstat_schema.h" #include "readstat_copy.h" #include "commands_util.h" %%{ machine spss_commands; write data noerror nofinal; }%% readstat_schema_t *readstat_parse_spss_commands(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *eof = pe; unsigned char *str_start = NULL; size_t str_len = 0; int cs; int i; int line_no = 0; uint64_t first_integer = 0, integer = 0; double double_value = NAN; unsigned char *line_start = p; char varname[32]; char argname[32]; char string_value[32]; char buf[1024]; char var_list[1024][32]; long var_col = 0; long var_row = 0; long var_len = 0; long var_count = 0; readstat_type_t var_type = READSTAT_TYPE_DOUBLE; label_type_t label_type = LABEL_TYPE_DOUBLE; int labelset_count = 0; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; %%{ action start_integer { integer = 0; } action incr_integer { integer = 10 * integer + (fc - '0'); } action copy_pos { var_col = integer - 1; var_len = 1; } action set_len { var_len = integer - var_col; } action copy_quoted_buf { readstat_copy_quoted(buf, sizeof(buf), (char *)str_start, str_len); } action copy_quoted_string { readstat_copy_quoted(string_value, sizeof(string_value), (char *)str_start, str_len); } action copy_string { readstat_copy(string_value, sizeof(string_value), (char *)str_start, str_len); } action copy_varname { readstat_copy(varname, sizeof(varname), (char *)str_start, str_len); } action copy_argname { readstat_copy(argname, sizeof(argname), (char *)str_start, str_len); } action handle_var { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); entry->variable.type = var_type; entry->row = var_row; entry->col = var_col; entry->len = var_len; } action handle_var_label { readstat_schema_entry_t *entry = readstat_schema_find_or_create_entry(schema, varname); readstat_copy(entry->variable.label, sizeof(entry->variable.label), buf, sizeof(buf)); } action reset_variable_list { var_count = 0; } action add_variable_to_list { if (var_count < sizeof(var_list)/sizeof(var_list[0])) { memcpy(var_list[var_count++], varname, sizeof(varname)); } } action handle_get_data_arg { if (strcasecmp(argname, "FIRSTCASE") == 0) { schema->first_line = integer; } if (strcasecmp(argname, "DELIMITERS") == 0) { schema->field_delimiter = buf[0]; } } action handle_labelset { char labelset_name[256]; snprintf(labelset_name, sizeof(labelset_name), "labels%d", labelset_count++); for (i=0; ilabelset, sizeof(entry->labelset), labelset_name, sizeof(labelset_name)); } } action handle_value_label { char labelset_name[256]; snprintf(labelset_name, sizeof(labelset_name), "labels%d", labelset_count); error = submit_value_label(parser, labelset_name, label_type, first_integer, integer, double_value, string_value, buf, user_ctx); if (error != READSTAT_OK) goto cleanup; } single_quoted_string = "'" ( [^']* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; } "'"; double_quoted_string = "\"" ( [^"]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; } "\""; quoted_string = ( single_quoted_string | double_quoted_string ) %copy_quoted_buf; newline = ( "\n" | "\r\n" ) %{ line_no++; line_start = p; }; identifier = ( [A-Za-z] [_A-Za-z0-9]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; }; integer = [0-9]+ >start_integer $incr_integer; double_value = "-"? integer ("." integer)?; whitespace = [ \t] | newline; pos = ( integer %copy_pos ("-" whitespace* integer %set_len)? ); multiline_comment = "/*" ( any* - ( any* "*/" any* ) ) "*/"; comment = "*" ( any* - ( any* "." whitespace* newline any* ) ) "." whitespace* newline | multiline_comment | "COMMENT " [^\.]* "."; var = identifier %copy_varname; width = (whitespace+ "(A"i integer? ")" %{ var_type = READSTAT_TYPE_STRING; } | whitespace+ "(" integer ")" )?; type = ( "A"i integer %{ var_type = READSTAT_TYPE_STRING; } | "F"i integer %{ var_type = READSTAT_TYPE_DOUBLE; } ("." integer)? | "DATE"i integer %{ var_type = READSTAT_TYPE_DOUBLE; } | "ADATE"i integer %{ var_type = READSTAT_TYPE_STRING; } ); slash_arg = "/" identifier %copy_argname whitespace* "=" whitespace* (identifier | quoted_string | integer); slash_args = slash_arg ( whitespace* slash_arg)*; select_cmd = "SELECT"i whitespace+ "IF"i whitespace+ (whitespace | identifier | "-"? integer | "(" | ")" | quoted_string )+ "."; file_handle_cmd = "FILE"i whitespace+ "HANDLE"i whitespace+ identifier whitespace+ slash_args whitespace* "."; save_cmd = "SAVE"i whitespace+ ( "OUTFILE"i | "DICTIONARY"i ) whitespace* "="? whitespace* quoted_string "/"? whitespace* ("/" whitespace* "COMPRESSED" whitespace*)? "."; data_list_arg = ( "RECORD"i ("S"i)? whitespace* "=" whitespace* integer | "FILE"i whitespace* "=" whitespace* (quoted_string | identifier) | "TABLE"i | "FIXED"i ); data_list_args = data_list_arg (whitespace+ data_list_arg)*; data_list_cmd = "DATA"i whitespace+ "LIST"i whitespace+ data_list_args whitespace* ( "/" ( integer %{ var_row = integer - 1; } )? whitespace+ var whitespace+ pos %{ var_type = READSTAT_TYPE_DOUBLE; } width (whitespace+ var >handle_var whitespace+ pos %{ var_type = READSTAT_TYPE_DOUBLE; } width )+ whitespace* )+ "." %handle_var; get_data_variable = var whitespace+ type %handle_var; get_data_variable_list = get_data_variable ( whitespace+ get_data_variable )*; get_data_arg = ( slash_arg %handle_get_data_arg | "/VARIABLES"i whitespace* "=" whitespace* get_data_variable_list ); get_data_args = get_data_arg (whitespace* get_data_arg)*; get_data_cmd = "GET"i whitespace+ "DATA"i whitespace+ get_data_args whitespace* "."; get_file_cmd = "GET"i whitespace+ "FILE"i whitespace* ("=" whitespace*)? quoted_string whitespace* "."; dataset_cmd_arg = "WINDOW"i whitespace* "=" whitespace* identifier; dataset_cmd_args = dataset_cmd_arg (whitespace+ dataset_cmd_arg)*; dataset_cmd = "DATASET"i whitespace+ "NAME"i whitespace+ (identifier | quoted_string) (whitespace+ dataset_cmd_args)? "."; format_string = "F" integer "." integer; format_spec = identifier (whitespace+ identifier)* whitespace+ "(" format_string ")"; formats_cmd = "FORMATS"i whitespace+ format_spec (whitespace+ "/" whitespace+ format_spec)* whitespace* "."; variable_labels_cmd = "VARIABLE"i whitespace+ "LABEL"i ("S"i)? (whitespace+ var whitespace+ quoted_string %handle_var_label (whitespace* "/")? )+ whitespace* "."; variable_list = var %reset_variable_list %add_variable_to_list (whitespace+ var %add_variable_to_list)*; missing_value_label = "." %{ label_type = -1; } whitespace+ quoted_string %handle_value_label; missing_values_item = var whitespace+ "(" quoted_string ")"; missing_values_list = missing_values_item (whitespace+ missing_values_item)*; value_label = ( "-" integer %{ label_type = LABEL_TYPE_DOUBLE; double_value = -(double)integer; } | integer %{ label_type = LABEL_TYPE_DOUBLE; double_value = integer; } | integer whitespace+ "-" whitespace+ %{ first_integer = integer; } integer %{ label_type = LABEL_TYPE_RANGE; } | quoted_string %{ label_type = LABEL_TYPE_STRING; } %copy_quoted_string ) whitespace+ quoted_string %handle_value_label; variable_value_labels = variable_list whitespace+ ( value_label | missing_value_label ) (whitespace+ value_label)* whitespace* %handle_labelset; variable_level = variable_list whitespace+ ( "(SCALE)"i | "(NOMINAL)"i | "(ORDINAL)"i ); variable_level_subcmd = "VARIABLE"i whitespace+ "LEVEL"i whitespace+ variable_level ( whitespace* "/" whitespace* variable_level )*; value_labels_cmd = "VALUE"i whitespace+ "LABELS"i whitespace+ ("/" whitespace*)? variable_value_labels ( "/" whitespace* variable_value_labels )* ( "/" whitespace* ( variable_level_subcmd whitespace* )? )? "."; missing_values_cmd = "MISSING"i whitespace+ "VALUES"i whitespace+ missing_values_list whitespace* "."; recode_cmd = "RECODE"i whitespace+ identifier whitespace+ "(" double_value (whitespace+ double_value)* "=" whitespace* "SYSMIS" whitespace* ")" whitespace* "."; execute_cmd = "EXECUTE"i whitespace* "."; list_cmd = "LIST"i whitespace* "."; display_cmd = "DISPLAY"i (whitespace+ identifier)* whitespace* "."; input_program_cmd = "INPUT"i whitespace+ "PROGRAM"i whitespace* "."; end_input_program_cmd = "END"i whitespace+ input_program_cmd; set_cmd = "SET"i whitespace+ identifier whitespace* "=" whitespace* (identifier | integer | quoted_string) whitespace* "."; command = file_handle_cmd | data_list_cmd | get_data_cmd | get_file_cmd | dataset_cmd | display_cmd | formats_cmd | missing_values_cmd | variable_labels_cmd | value_labels_cmd | recode_cmd | select_cmd | save_cmd | list_cmd | input_program_cmd | end_input_program_cmd | set_cmd | execute_cmd; main := ( whitespace | comment | command )*; write init; write exec; }%% /* suppress warnings */ (void)spss_commands_en_main; if (cs < %%{ write first_final; }%%) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing SPSS command file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing SPSS command file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } error = submit_columns(parser, schema, user_ctx); cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_stata_dictionary_read.c000066400000000000000000000637631410722155500232450ustar00rootroot00000000000000#line 1 "src/txt/readstat_stata_dictionary_read.rl" #include #include "../readstat.h" #include "readstat_schema.h" #include "readstat_copy.h" #line 11 "src/txt/readstat_stata_dictionary_read.c" static const signed char _stata_dictionary_actions[] = { 0, 1, 1, 1, 4, 1, 6, 1, 7, 1, 8, 1, 9, 1, 11, 1, 13, 1, 14, 1, 15, 1, 16, 1, 17, 1, 18, 1, 19, 1, 20, 1, 27, 2, 0, 1, 2, 2, 11, 2, 7, 8, 2, 10, 4, 2, 12, 5, 2, 13, 3, 2, 13, 9, 3, 13, 2, 11, 3, 14, 2, 11, 3, 15, 2, 11, 3, 16, 2, 11, 3, 17, 2, 11, 3, 18, 2, 11, 3, 19, 2, 11, 3, 20, 2, 11, 3, 21, 12, 5, 3, 22, 12, 5, 3, 23, 12, 5, 3, 24, 12, 5, 3, 25, 12, 5, 3, 26, 12, 5, 3, 28, 0, 1, 4, 13, 3, 2, 11, 0 }; static const short _stata_dictionary_key_offsets[] = { 0, 0, 4, 6, 10, 11, 12, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 27, 33, 39, 40, 41, 42, 43, 44, 48, 61, 74, 75, 76, 77, 81, 86, 91, 92, 110, 128, 129, 131, 132, 133, 135, 147, 153, 171, 175, 176, 177, 178, 179, 180, 181, 185, 190, 193, 211, 224, 225, 238, 251, 263, 273, 274, 275, 279, 283, 285, 293, 295, 299, 303, 308, 310, 323, 336, 349, 362, 375, 387, 400, 413, 426, 439, 451, 464, 477, 489, 502, 515, 528, 540, 553, 566, 578, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 609, 614, 617, 635, 637, 638, 639, 641, 645, 650, 653, 671, 672, 676, 681, 684, 702, 703, 704, 705, 706, 710, 715, 718, 736, 737, 738, 739, 740, 741, 742, 761, 765, 770, 773, 791, 803, 804, 805, 806, 807, 808, 812, 817, 822, 823, 824, 0 }; static const char _stata_dictionary_trans_keys[] = { 42, 47, 100, 105, 10, 13, 42, 47, 100, 105, 42, 42, 42, 47, 105, 99, 116, 105, 111, 110, 97, 114, 121, 9, 10, 13, 32, 9, 10, 13, 32, 117, 123, 9, 10, 13, 32, 117, 123, 10, 115, 105, 110, 103, 9, 10, 13, 32, 9, 10, 13, 32, 34, 92, 95, 45, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 92, 95, 45, 57, 65, 90, 97, 122, 10, 34, 34, 9, 10, 13, 32, 9, 10, 13, 32, 123, 9, 10, 13, 32, 123, 10, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 10, 10, 13, 42, 42, 42, 47, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 37, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 99, 102, 108, 110, 111, 108, 117, 109, 110, 40, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 121, 48, 57, 65, 90, 97, 122, 10, 9, 10, 13, 32, 46, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 34, 37, 65, 90, 97, 122, 34, 34, 9, 10, 13, 32, 9, 10, 13, 32, 48, 57, 44, 46, 83, 115, 48, 57, 101, 103, 48, 57, 48, 57, 101, 103, 9, 10, 13, 32, 9, 10, 13, 32, 34, 48, 57, 9, 10, 13, 32, 46, 95, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 117, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 98, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 101, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 108, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 97, 48, 57, 65, 90, 98, 122, 9, 10, 13, 32, 46, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 110, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 111, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 110, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 103, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 116, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 114, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 9, 10, 13, 32, 46, 95, 48, 57, 65, 90, 97, 122, 105, 114, 115, 116, 108, 105, 110, 101, 111, 102, 102, 105, 108, 101, 40, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 105, 114, 110, 101, 40, 115, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 40, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 101, 99, 108, 40, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 101, 119, 108, 105, 110, 101, 9, 10, 13, 32, 40, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 9, 32, 48, 57, 9, 32, 41, 48, 57, 9, 32, 41, 9, 10, 13, 32, 42, 47, 95, 98, 100, 102, 105, 108, 115, 125, 65, 90, 97, 122, 9, 10, 13, 32, 92, 95, 45, 57, 65, 90, 97, 122, 110, 102, 105, 108, 101, 9, 10, 13, 32, 9, 10, 13, 32, 100, 9, 10, 13, 32, 100, 10, 10, 0 }; static const signed char _stata_dictionary_single_lengths[] = { 0, 4, 2, 4, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 6, 6, 1, 1, 1, 1, 1, 4, 7, 7, 1, 1, 1, 4, 5, 5, 1, 14, 14, 1, 2, 1, 1, 2, 6, 6, 14, 4, 1, 1, 1, 1, 1, 1, 2, 3, 3, 14, 7, 1, 7, 7, 6, 6, 1, 1, 4, 4, 0, 4, 0, 0, 4, 5, 0, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 6, 7, 7, 6, 7, 7, 7, 6, 7, 7, 6, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 14, 2, 1, 1, 2, 2, 3, 3, 14, 1, 2, 3, 3, 14, 1, 1, 1, 1, 2, 3, 3, 14, 1, 1, 1, 1, 1, 1, 15, 2, 3, 3, 14, 6, 1, 1, 1, 1, 1, 4, 5, 5, 1, 1, 0, 0 }; static const signed char _stata_dictionary_range_lengths[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 3, 0, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 3, 0, 3, 3, 3, 2, 0, 0, 0, 0, 1, 2, 1, 2, 0, 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 2, 0, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const short _stata_dictionary_index_offsets[] = { 0, 0, 5, 8, 13, 15, 17, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 43, 50, 57, 59, 61, 63, 65, 67, 72, 83, 94, 96, 98, 100, 105, 111, 117, 119, 136, 153, 155, 158, 160, 162, 165, 175, 182, 199, 204, 206, 208, 210, 212, 214, 216, 220, 225, 229, 246, 257, 259, 270, 281, 291, 300, 302, 304, 309, 314, 316, 323, 325, 328, 333, 339, 341, 352, 363, 374, 385, 396, 406, 417, 428, 439, 450, 460, 471, 482, 492, 503, 514, 525, 535, 546, 557, 567, 577, 579, 581, 583, 585, 587, 589, 591, 593, 595, 597, 599, 601, 603, 605, 607, 611, 616, 620, 637, 640, 642, 644, 647, 651, 656, 660, 677, 679, 683, 688, 692, 709, 711, 713, 715, 717, 721, 726, 730, 747, 749, 751, 753, 755, 757, 759, 777, 781, 786, 790, 807, 817, 819, 821, 823, 825, 827, 832, 838, 844, 846, 848, 0 }; static const short _stata_dictionary_cond_targs[] = { 2, 4, 7, 146, 0, 3, 155, 2, 2, 4, 7, 146, 0, 5, 0, 6, 5, 6, 1, 5, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15, 0, 16, 0, 17, 18, 19, 17, 0, 17, 18, 19, 17, 20, 34, 0, 17, 18, 19, 17, 20, 34, 0, 18, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 26, 27, 25, 0, 25, 26, 27, 25, 28, 145, 145, 145, 145, 145, 0, 25, 26, 27, 25, 28, 145, 145, 145, 145, 145, 0, 26, 0, 30, 29, 30, 29, 31, 32, 33, 31, 0, 31, 32, 33, 31, 34, 0, 31, 32, 33, 31, 34, 0, 32, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 35, 0, 35, 36, 37, 39, 0, 40, 39, 40, 34, 39, 42, 43, 56, 42, 41, 41, 41, 41, 41, 0, 42, 43, 56, 42, 61, 65, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 45, 94, 113, 134, 0, 46, 0, 47, 0, 48, 0, 49, 0, 50, 0, 51, 0, 51, 51, 52, 0, 53, 53, 54, 52, 0, 53, 53, 54, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 42, 43, 56, 42, 41, 41, 57, 41, 41, 41, 0, 43, 0, 42, 43, 56, 42, 41, 41, 58, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 59, 41, 41, 41, 0, 60, 43, 56, 60, 41, 41, 41, 41, 41, 0, 60, 43, 56, 60, 61, 65, 41, 41, 0, 63, 62, 63, 62, 64, 43, 56, 64, 0, 64, 43, 56, 64, 0, 66, 0, 67, 71, 69, 69, 66, 69, 0, 68, 0, 68, 69, 0, 70, 43, 56, 70, 0, 70, 43, 56, 70, 61, 0, 68, 0, 42, 43, 56, 42, 41, 41, 73, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 74, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 75, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 76, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 77, 41, 41, 41, 0, 60, 43, 56, 60, 41, 41, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 79, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 80, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 81, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 82, 41, 41, 41, 0, 60, 43, 56, 60, 41, 41, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 84, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 85, 41, 41, 41, 0, 60, 43, 56, 60, 41, 41, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 87, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 88, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 89, 41, 41, 41, 0, 60, 43, 56, 60, 41, 41, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 91, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 92, 41, 41, 41, 0, 42, 43, 56, 42, 41, 41, 93, 41, 41, 0, 60, 43, 56, 60, 41, 41, 93, 41, 41, 0, 95, 0, 96, 0, 97, 0, 98, 0, 99, 0, 100, 0, 101, 0, 102, 0, 103, 0, 104, 0, 105, 0, 106, 0, 107, 0, 108, 0, 109, 0, 109, 109, 110, 0, 111, 111, 112, 110, 0, 111, 111, 112, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 114, 126, 0, 115, 0, 116, 0, 117, 121, 0, 117, 117, 118, 0, 119, 119, 120, 118, 0, 119, 119, 120, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 122, 0, 122, 122, 123, 0, 124, 124, 125, 123, 0, 124, 124, 125, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 127, 0, 128, 0, 129, 0, 130, 0, 130, 130, 131, 0, 132, 132, 133, 131, 0, 132, 132, 133, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 135, 0, 136, 0, 137, 0, 138, 0, 139, 0, 140, 0, 34, 35, 36, 34, 141, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 141, 141, 142, 0, 143, 143, 144, 142, 0, 143, 143, 144, 0, 34, 35, 36, 34, 37, 38, 44, 55, 72, 78, 83, 86, 90, 156, 41, 41, 0, 31, 32, 33, 31, 145, 145, 145, 145, 145, 0, 147, 0, 148, 0, 149, 0, 150, 0, 151, 0, 152, 153, 154, 152, 0, 152, 153, 154, 152, 7, 0, 152, 153, 154, 152, 7, 0, 153, 0, 3, 0, 156, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 0 }; static const signed char _stata_dictionary_cond_actions[] = { 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 11, 11, 0, 15, 15, 15, 15, 15, 51, 51, 51, 51, 51, 0, 0, 0, 39, 7, 9, 0, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 36, 36, 36, 36, 36, 0, 36, 36, 0, 15, 15, 15, 15, 15, 15, 15, 54, 54, 54, 54, 54, 54, 15, 54, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 48, 48, 48, 48, 48, 48, 114, 114, 114, 114, 114, 114, 48, 114, 114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 66, 66, 66, 66, 66, 66, 21, 66, 66, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 86, 45, 45, 86, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 13, 0, 39, 7, 9, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 0, 33, 0, 31, 31, 31, 31, 1, 31, 0, 110, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 102, 45, 45, 102, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 98, 45, 45, 98, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 90, 45, 45, 90, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 94, 45, 45, 94, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 0, 0, 0, 0, 0, 45, 45, 45, 45, 0, 0, 33, 0, 0, 0, 106, 45, 45, 106, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 29, 29, 29, 29, 29, 29, 29, 82, 82, 82, 82, 82, 82, 29, 82, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 62, 62, 62, 62, 62, 62, 19, 62, 62, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 58, 58, 58, 58, 58, 58, 17, 58, 58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 27, 27, 27, 27, 27, 27, 27, 78, 78, 78, 78, 78, 78, 27, 78, 78, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23, 70, 70, 70, 70, 70, 70, 23, 70, 70, 0, 0, 0, 33, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 74, 74, 74, 74, 74, 74, 25, 74, 74, 0, 42, 42, 42, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 15, 15, 15, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const int stata_dictionary_start = 1; static const int stata_dictionary_en_main = 1; #line 11 "src/txt/readstat_stata_dictionary_read.rl" readstat_schema_t *readstat_parse_stata_dictionary(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; int cb_return_value = READSTAT_HANDLER_OK; int total_entry_count = 0; int partial_entry_count = 0; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *str_start = NULL; size_t str_len = 0; int cs; // u_char *eof = pe; int integer = 0; int current_row = 0; int current_col = 0; int line_no = 0; unsigned char *line_start = p; readstat_schema_entry_t current_entry; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; #line 545 "src/txt/readstat_stata_dictionary_read.c" { cs = (int)stata_dictionary_start; } #line 550 "src/txt/readstat_stata_dictionary_read.c" { int _klen; unsigned int _trans = 0; const char * _keys; const signed char * _acts; unsigned int _nacts; _resume: {} if ( p == pe ) goto _out; _keys = ( _stata_dictionary_trans_keys + (_stata_dictionary_key_offsets[cs])); _trans = (unsigned int)_stata_dictionary_index_offsets[cs]; _klen = (int)_stata_dictionary_single_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + _klen - 1; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _keys += _klen; _trans += (unsigned int)_klen; break; } _mid = _lower + ((_upper-_lower) >> 1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 1; else if ( ( (*( p))) > (*( _mid)) ) _lower = _mid + 1; else { _trans += (unsigned int)(_mid - _keys); goto _match; } } } _klen = (int)_stata_dictionary_range_lengths[cs]; if ( _klen > 0 ) { const char *_lower = _keys; const char *_upper = _keys + (_klen<<1) - 2; const char *_mid; while ( 1 ) { if ( _upper < _lower ) { _trans += (unsigned int)_klen; break; } _mid = _lower + (((_upper-_lower) >> 1) & ~1); if ( ( (*( p))) < (*( _mid)) ) _upper = _mid - 2; else if ( ( (*( p))) > (*( _mid + 1)) ) _lower = _mid + 2; else { _trans += (unsigned int)((_mid - _keys)>>1); break; } } } _match: {} cs = (int)_stata_dictionary_cond_targs[_trans]; if ( _stata_dictionary_cond_actions[_trans] != 0 ) { _acts = ( _stata_dictionary_actions + (_stata_dictionary_cond_actions[_trans])); _nacts = (unsigned int)(*( _acts)); _acts += 1; while ( _nacts > 0 ) { switch ( (*( _acts)) ) { case 0: { { #line 63 "src/txt/readstat_stata_dictionary_read.rl" integer = 0; } #line 628 "src/txt/readstat_stata_dictionary_read.c" break; } case 1: { { #line 67 "src/txt/readstat_stata_dictionary_read.rl" integer = 10 * integer + ((( (*( p)))) - '0'); } #line 639 "src/txt/readstat_stata_dictionary_read.c" break; } case 2: { { #line 71 "src/txt/readstat_stata_dictionary_read.rl" memset(¤t_entry, 0, sizeof(readstat_schema_entry_t)); current_entry.decimal_separator = '.'; current_entry.variable.type = READSTAT_TYPE_DOUBLE; current_entry.variable.index = total_entry_count; } #line 653 "src/txt/readstat_stata_dictionary_read.c" break; } case 3: { { #line 78 "src/txt/readstat_stata_dictionary_read.rl" current_entry.row = current_row; current_entry.col = current_col; current_col += current_entry.len; cb_return_value = READSTAT_HANDLER_OK; if (parser->handlers.variable) { current_entry.variable.index_after_skipping = partial_entry_count; cb_return_value = parser->handlers.variable(total_entry_count, ¤t_entry.variable, NULL, user_ctx); if (cb_return_value == READSTAT_HANDLER_ABORT) { error = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (cb_return_value == READSTAT_HANDLER_SKIP_VARIABLE) { current_entry.skip = 1; } else { partial_entry_count++; } schema->entries = realloc(schema->entries, sizeof(readstat_schema_entry_t) * (schema->entry_count+1)); memcpy(&schema->entries[schema->entry_count++], ¤t_entry, sizeof(readstat_schema_entry_t)); total_entry_count++; } #line 683 "src/txt/readstat_stata_dictionary_read.c" break; } case 4: { { #line 101 "src/txt/readstat_stata_dictionary_read.rl" readstat_copy(schema->filename, sizeof(schema->filename), (char *)str_start, str_len); } #line 694 "src/txt/readstat_stata_dictionary_read.c" break; } case 5: { { #line 105 "src/txt/readstat_stata_dictionary_read.rl" readstat_copy(current_entry.variable.name, sizeof(current_entry.variable.name), (char *)str_start, str_len); } #line 706 "src/txt/readstat_stata_dictionary_read.c" break; } case 6: { { #line 110 "src/txt/readstat_stata_dictionary_read.rl" readstat_copy(current_entry.variable.label, sizeof(current_entry.variable.label), (char *)str_start, str_len); } #line 718 "src/txt/readstat_stata_dictionary_read.c" break; } case 7: { { #line 115 "src/txt/readstat_stata_dictionary_read.rl" str_start = p; } #line 727 "src/txt/readstat_stata_dictionary_read.c" break; } case 8: { { #line 115 "src/txt/readstat_stata_dictionary_read.rl" str_len = p - str_start; } #line 736 "src/txt/readstat_stata_dictionary_read.c" break; } case 9: { { #line 117 "src/txt/readstat_stata_dictionary_read.rl" str_start = p; } #line 745 "src/txt/readstat_stata_dictionary_read.c" break; } case 10: { { #line 117 "src/txt/readstat_stata_dictionary_read.rl" str_len = p - str_start; } #line 754 "src/txt/readstat_stata_dictionary_read.c" break; } case 11: { { #line 119 "src/txt/readstat_stata_dictionary_read.rl" str_start = p; } #line 763 "src/txt/readstat_stata_dictionary_read.c" break; } case 12: { { #line 119 "src/txt/readstat_stata_dictionary_read.rl" str_len = p - str_start; } #line 772 "src/txt/readstat_stata_dictionary_read.c" break; } case 13: { { #line 121 "src/txt/readstat_stata_dictionary_read.rl" line_no++; line_start = p; } #line 781 "src/txt/readstat_stata_dictionary_read.c" break; } case 14: { { #line 131 "src/txt/readstat_stata_dictionary_read.rl" schema->rows_per_observation = integer; } #line 790 "src/txt/readstat_stata_dictionary_read.c" break; } case 15: { { #line 133 "src/txt/readstat_stata_dictionary_read.rl" current_row = integer - 1; } #line 799 "src/txt/readstat_stata_dictionary_read.c" break; } case 16: { { #line 135 "src/txt/readstat_stata_dictionary_read.rl" current_col = integer - 1; } #line 808 "src/txt/readstat_stata_dictionary_read.c" break; } case 17: { { #line 137 "src/txt/readstat_stata_dictionary_read.rl" current_row++; } #line 817 "src/txt/readstat_stata_dictionary_read.c" break; } case 18: { { #line 137 "src/txt/readstat_stata_dictionary_read.rl" current_row += (integer - 1); } #line 826 "src/txt/readstat_stata_dictionary_read.c" break; } case 19: { { #line 141 "src/txt/readstat_stata_dictionary_read.rl" schema->cols_per_observation = integer; } #line 835 "src/txt/readstat_stata_dictionary_read.c" break; } case 20: { { #line 143 "src/txt/readstat_stata_dictionary_read.rl" schema->first_line = integer - 1; } #line 844 "src/txt/readstat_stata_dictionary_read.c" break; } case 21: { { #line 147 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_INT8; } #line 853 "src/txt/readstat_stata_dictionary_read.c" break; } case 22: { { #line 148 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_INT16; } #line 862 "src/txt/readstat_stata_dictionary_read.c" break; } case 23: { { #line 149 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_INT32; } #line 871 "src/txt/readstat_stata_dictionary_read.c" break; } case 24: { { #line 150 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_FLOAT; } #line 880 "src/txt/readstat_stata_dictionary_read.c" break; } case 25: { { #line 151 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_DOUBLE; } #line 889 "src/txt/readstat_stata_dictionary_read.c" break; } case 26: { { #line 152 "src/txt/readstat_stata_dictionary_read.rl" current_entry.variable.type = READSTAT_TYPE_STRING; current_entry.variable.storage_width = integer; } #line 899 "src/txt/readstat_stata_dictionary_read.c" break; } case 27: { { #line 159 "src/txt/readstat_stata_dictionary_read.rl" current_entry.len = integer; } #line 908 "src/txt/readstat_stata_dictionary_read.c" break; } case 28: { { #line 160 "src/txt/readstat_stata_dictionary_read.rl" current_entry.decimal_separator = ','; } #line 917 "src/txt/readstat_stata_dictionary_read.c" break; } } _nacts -= 1; _acts += 1; } } if ( cs != 0 ) { p += 1; goto _resume; } _out: {} } #line 174 "src/txt/readstat_stata_dictionary_read.rl" /* suppress warnings */ (void)stata_dictionary_en_main; if (cs < #line 942 "src/txt/readstat_stata_dictionary_read.c" 156 #line 179 "src/txt/readstat_stata_dictionary_read.rl" ) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing .dct file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing .dct file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_stata_dictionary_read.rl000066400000000000000000000162751410722155500234340ustar00rootroot00000000000000#include #include "../readstat.h" #include "readstat_schema.h" #include "readstat_copy.h" %%{ machine stata_dictionary; write data noerror nofinal; }%% readstat_schema_t *readstat_parse_stata_dictionary(readstat_parser_t *parser, const char *filepath, void *user_ctx, readstat_error_t *outError) { if (parser->io->open(filepath, parser->io->io_ctx) == -1) { if (outError) *outError = READSTAT_ERROR_OPEN; return NULL; } readstat_schema_t *schema = NULL; unsigned char *bytes = NULL; int cb_return_value = READSTAT_HANDLER_OK; int total_entry_count = 0; int partial_entry_count = 0; readstat_error_t error = READSTAT_OK; ssize_t len = parser->io->seek(0, READSTAT_SEEK_END, parser->io->io_ctx); if (len == -1) { error = READSTAT_ERROR_SEEK; goto cleanup; } parser->io->seek(0, READSTAT_SEEK_SET, parser->io->io_ctx); bytes = malloc(len); parser->io->read(bytes, len, parser->io->io_ctx); unsigned char *p = bytes; unsigned char *pe = bytes + len; unsigned char *str_start = NULL; size_t str_len = 0; int cs; // u_char *eof = pe; int integer = 0; int current_row = 0; int current_col = 0; int line_no = 0; unsigned char *line_start = p; readstat_schema_entry_t current_entry; if ((schema = calloc(1, sizeof(readstat_schema_t))) == NULL) { error = READSTAT_ERROR_MALLOC; goto cleanup; } schema->rows_per_observation = 1; %%{ action start_integer { integer = 0; } action incr_integer { integer = 10 * integer + (fc - '0'); } action start_entry { memset(¤t_entry, 0, sizeof(readstat_schema_entry_t)); current_entry.decimal_separator = '.'; current_entry.variable.type = READSTAT_TYPE_DOUBLE; current_entry.variable.index = total_entry_count; } action end_entry { current_entry.row = current_row; current_entry.col = current_col; current_col += current_entry.len; cb_return_value = READSTAT_HANDLER_OK; if (parser->handlers.variable) { current_entry.variable.index_after_skipping = partial_entry_count; cb_return_value = parser->handlers.variable(total_entry_count, ¤t_entry.variable, NULL, user_ctx); if (cb_return_value == READSTAT_HANDLER_ABORT) { error = READSTAT_ERROR_USER_ABORT; goto cleanup; } } if (cb_return_value == READSTAT_HANDLER_SKIP_VARIABLE) { current_entry.skip = 1; } else { partial_entry_count++; } schema->entries = realloc(schema->entries, sizeof(readstat_schema_entry_t) * (schema->entry_count+1)); memcpy(&schema->entries[schema->entry_count++], ¤t_entry, sizeof(readstat_schema_entry_t)); total_entry_count++; } action copy_filename { readstat_copy(schema->filename, sizeof(schema->filename), (char *)str_start, str_len); } action copy_varname { readstat_copy(current_entry.variable.name, sizeof(current_entry.variable.name), (char *)str_start, str_len); } action copy_varlabel { readstat_copy(current_entry.variable.label, sizeof(current_entry.variable.label), (char *)str_start, str_len); } quoted_string = "\"" ( [^"]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; } "\""; unquoted_string = [A-Za-z0-9_/\\\.\-]+ >{ str_start = fpc; } %{ str_len = fpc - str_start; }; identifier = ( [A-Za-z] [_\.A-Za-z0-9]* ) >{ str_start = fpc; } %{ str_len = fpc - str_start; }; newline = ( "\n" | "\r\n" ) %{ line_no++; line_start = p; }; spacetab = [ \t]; whitespace = spacetab | newline; filename = ( quoted_string | unquoted_string ) %copy_filename; integer = [0-9]+ >start_integer $incr_integer; lines_marker = "_lines(" spacetab* integer spacetab* ")" %{ schema->rows_per_observation = integer; }; line_marker = "_line(" spacetab* integer spacetab* ")" %{ current_row = integer - 1; }; column_marker = "_column(" spacetab* integer spacetab* ")" %{ current_col = integer - 1; }; newline_marker = "_newline" %{ current_row++; } ( "(" spacetab* integer spacetab* ")" %{ current_row += (integer - 1); } )?; skip_marker = "_skip(" spacetab* integer spacetab* ")" %{ current_col += (integer - 1) }; lrecl_marker = "_lrecl(" spacetab* integer spacetab* ")" %{ schema->cols_per_observation = integer; }; firstlineoffile_marker = "_firstlineoffile(" spacetab* integer spacetab* ")" %{ schema->first_line = integer - 1; }; marker = lrecl_marker | firstlineoffile_marker | lines_marker | line_marker | column_marker | newline_marker; type = "byte" %{ current_entry.variable.type = READSTAT_TYPE_INT8; } | "int" %{ current_entry.variable.type = READSTAT_TYPE_INT16; } | "long" %{ current_entry.variable.type = READSTAT_TYPE_INT32; } | "float" %{ current_entry.variable.type = READSTAT_TYPE_FLOAT; } | "double" %{ current_entry.variable.type = READSTAT_TYPE_DOUBLE; } | "str" integer %{ current_entry.variable.type = READSTAT_TYPE_STRING; current_entry.variable.storage_width = integer; }; varname = identifier %copy_varname; varlabel = quoted_string %copy_varlabel; format = "%" integer %{ current_entry.len = integer; } ( "s" | "S" | ( ( ( "." | "," %{ current_entry.decimal_separator = ','; } ) integer )? ( "f" | "g" | "e" ) ) ); entry = ( ( type spacetab+ )? varname ( spacetab+ format )? ( spacetab+ varlabel )? spacetab* newline ) >start_entry %end_entry; comment = "*" [^\r\n]* newline | "/*" ( any* - ( any* "*/" any* ) ) "*/"; contents = ( whitespace* ( marker | entry | comment ) )* whitespace*; main := comment* ("infile" whitespace+)? "dictionary" whitespace+ ( "using" whitespace+ filename whitespace+ )? "{" contents "}" any*; write init; write exec; }%% /* suppress warnings */ (void)stata_dictionary_en_main; if (cs < %%{ write first_final; }%%) { char error_buf[1024]; if (p == pe) { snprintf(error_buf, sizeof(error_buf), "Error parsing .dct file (end-of-file unexpectedly reached)"); } else { snprintf(error_buf, sizeof(error_buf), "Error parsing .dct file around line #%d, col #%ld (%c)", line_no + 1, (long)(p - line_start + 1), *p); } if (parser->handlers.error) { parser->handlers.error(error_buf, user_ctx); } error = READSTAT_ERROR_PARSE; goto cleanup; } cleanup: parser->io->close(parser->io->io_ctx); free(bytes); if (error != READSTAT_OK) { if (outError) *outError = error; readstat_schema_free(schema); schema = NULL; } return schema; } ReadStat-1.1.7/src/txt/readstat_txt_read.c000066400000000000000000000203141410722155500205040ustar00rootroot00000000000000#include #include #include #include #include "../readstat.h" #include "../readstat_iconv.h" #include "../readstat_convert.h" #include "readstat_schema.h" #if defined _MSC_VER #define restrict __restrict #endif typedef struct txt_ctx_s { int rows; iconv_t converter; readstat_schema_t *schema; } txt_ctx_t; static readstat_error_t handle_value(readstat_parser_t *parser, iconv_t converter, int obs_index, readstat_schema_entry_t *entry, char *bytes, size_t len, void *ctx) { readstat_error_t error = READSTAT_OK; char *converted_value = malloc(4*len+1); readstat_variable_t *variable = &entry->variable; readstat_value_t value = { .type = variable->type }; if (readstat_type_class(variable->type) == READSTAT_TYPE_CLASS_STRING) { error = readstat_convert(converted_value, 4 * len + 1, bytes, len, converter); if (error != READSTAT_OK) goto cleanup; value.v.string_value = converted_value; } else { char *endptr = NULL; if (variable->type == READSTAT_TYPE_DOUBLE) { value.v.double_value = strtod(bytes, &endptr); } else if (variable->type == READSTAT_TYPE_FLOAT) { value.v.float_value = strtof(bytes, &endptr); } else { value.v.i32_value = strtol(bytes, &endptr, 10); value.type = READSTAT_TYPE_INT32; } value.is_system_missing = (endptr == bytes); } if (parser->handlers.value(obs_index, variable, value, ctx) == READSTAT_HANDLER_ABORT) { error = READSTAT_ERROR_USER_ABORT; } cleanup: free(converted_value); return error; } static ssize_t txt_getdelim(char ** restrict linep, size_t * restrict linecapp, int delimiter, readstat_io_t *io) { char *value_buffer = *linep; size_t value_buffer_len = *linecapp; ssize_t i = 0; ssize_t bytes_read = 0; while ((bytes_read = io->read(&value_buffer[i], 1, io->io_ctx)) == 1 && value_buffer[i++] != delimiter) { if (i == value_buffer_len) { value_buffer = realloc(value_buffer, value_buffer_len *= 2); } } *linep = value_buffer; *linecapp = value_buffer_len; if (bytes_read == -1) return -1; return i; } static readstat_error_t txt_parse_delimited(readstat_parser_t *parser, txt_ctx_t *ctx, void *user_ctx) { size_t value_buffer_len = 4096; char *value_buffer = malloc(value_buffer_len); readstat_schema_t *schema = ctx->schema; readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int k=0; while (1) { for (int j=0; jentry_count; j++) { readstat_schema_entry_t *entry = &schema->entries[j]; int delimiter = (j == schema->entry_count-1) ? '\n' : schema->field_delimiter; ssize_t chars_read = txt_getdelim(&value_buffer, &value_buffer_len, delimiter, io); if (chars_read == 0) goto cleanup; if (chars_read == -1) { retval = READSTAT_ERROR_READ; goto cleanup; } if (parser->handlers.value && !entry->skip) { chars_read--; // delimiter if (chars_read > 0 && value_buffer[chars_read-1] == '\r') { chars_read--; // CRLF } value_buffer[chars_read] = '\0'; retval = handle_value(parser, ctx->converter, k, entry, value_buffer, chars_read, user_ctx); if (retval != READSTAT_OK) goto cleanup; } } if (++k == parser->row_limit) break; } cleanup: ctx->rows = k; if (value_buffer) free(value_buffer); return retval; } static readstat_error_t txt_parse_fixed_width(readstat_parser_t *parser, txt_ctx_t *ctx, void *user_ctx, const size_t *line_lens, char *line_buffer) { char value_buffer[4096]; readstat_schema_t *schema = ctx->schema; readstat_io_t *io = parser->io; readstat_error_t retval = READSTAT_OK; int k=0; while (1) { int j=0; for (int i=0; irows_per_observation; i++) { ssize_t bytes_read = io->read(line_buffer, line_lens[i], io->io_ctx); if (bytes_read == 0) goto cleanup; if (bytes_read < line_lens[i]) { retval = READSTAT_ERROR_READ; goto cleanup; } for (; jentry_count && schema->entries[j].row == i; j++) { readstat_schema_entry_t *entry = &schema->entries[j]; size_t field_len = schema->entries[j].len; size_t field_offset = schema->entries[j].col; if (field_len < sizeof(value_buffer) && parser->handlers.value && !entry->skip) { memcpy(value_buffer, &line_buffer[field_offset], field_len); value_buffer[field_len] = '\0'; retval = handle_value(parser, ctx->converter, k, entry, value_buffer, field_len, user_ctx); if (retval != READSTAT_OK) { goto cleanup; } } } if (schema->cols_per_observation == 0) { char throwaway = '\0'; while (io->read(&throwaway, 1, io->io_ctx) == 1 && throwaway != '\n'); } } if (++k == parser->row_limit) break; } cleanup: ctx->rows = k; return retval; } readstat_error_t readstat_parse_txt(readstat_parser_t *parser, const char *filename, readstat_schema_t *schema, void *user_ctx) { readstat_error_t retval = READSTAT_OK; readstat_io_t *io = parser->io; int i; size_t *line_lens = NULL; size_t line_buffer_len = 0; char *line_buffer = NULL; txt_ctx_t ctx = { .schema = schema }; if (parser->output_encoding && parser->input_encoding) { ctx.converter = iconv_open(parser->output_encoding, parser->input_encoding); if (ctx.converter == (iconv_t)-1) { ctx.converter = NULL; retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } } if (io->open(filename, io->io_ctx) == -1) { retval = READSTAT_ERROR_OPEN; goto cleanup; } if ((line_lens = malloc(schema->rows_per_observation * sizeof(size_t))) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } for (i=0; irows_per_observation; i++) { line_lens[i] = schema->cols_per_observation; } for (i=0; ientry_count; i++) { readstat_schema_entry_t *entry = &schema->entries[i]; if (line_lens[entry->row] < entry->col + entry->len) line_lens[entry->row] = entry->col + entry->len; } for (i=0; irows_per_observation; i++) { if (line_buffer_len < line_lens[i]) line_buffer_len = line_lens[i]; } line_buffer_len += 2; /* CRLF */ if ((line_buffer = malloc(line_buffer_len)) == NULL) { retval = READSTAT_ERROR_MALLOC; goto cleanup; } if (schema->first_line > 1) { int throwaway_lines = schema->first_line - 1; char throwaway_char = '\0'; while (throwaway_lines--) { while (io->read(&throwaway_char, 1, io->io_ctx) == 1 && throwaway_char != '\n'); } } if (schema->field_delimiter) { retval = txt_parse_delimited(parser, &ctx, user_ctx); } else { retval = txt_parse_fixed_width(parser, &ctx, user_ctx, line_lens, line_buffer); } if (retval != READSTAT_OK) goto cleanup; if (parser->handlers.metadata) { readstat_metadata_t metadata = { .row_count = ctx.rows, .var_count = schema->entry_count }; int cb_retval = parser->handlers.metadata(&metadata, user_ctx); if (cb_retval == READSTAT_HANDLER_ABORT) retval = READSTAT_ERROR_USER_ABORT; } cleanup: io->close(io->io_ctx); if (line_buffer) free(line_buffer); if (line_lens) free(line_lens); if (ctx.converter) iconv_close(ctx.converter); return retval; } ReadStat-1.1.7/variablemetadata_schema.json000066400000000000000000000337541410722155500207460ustar00rootroot00000000000000{ "type": "object", "oneOf": [ { "$ref": "#/definitions/SPSS" }, { "$ref": "#/definitions/STATA" } ], "definitions": { "SPSS": { "type": "object", "properties": { "type": { "enum": [ "SPSS" ] }, "separator": { "enum": [ ",", ";", "\t" ] }, "variables": { "type": "array", "items": { "type": "object", "oneOf": [ { "$ref": "#/definitions/SPSS-NUMERIC" }, { "$ref": "#/definitions/SPSS-STRING" } ] } } }, "additionalProperties": false }, "SPSS-NUMERIC": { "properties": { "type": { "enum": [ "NUMERIC" ] }, "name": { "type": "string", "minLength": 1, "maxLength": 64 }, "label": { "type": "string", "minLength": 0, "maxLength": 255 }, "format": { "type": { "enum": [ "NUMBER", "PERCENT", "CURRENCY", "DATE", "TIME", "DATE_TIME", "UNSPECIFIED" ] } }, "pattern": { "type": "string", "minLength": 1 }, "decimals": { "type": "integer", "minimum": 0, "maximum": 16 }, "categories": { "type": "array", "items": { "type": "object", "properties": { "code": { "type": "number" }, "label": { "type": "string" } }, "required": [ "code", "label" ], "additionalProperties": false } }, "missing": { "type": "object", "oneOf": [ { "$ref": "#/definitions/NUMERIC-RANGE" }, { "$ref": "#/definitions/SPSS-NUMERIC-DISCRETE" } ] } }, "required": [ "type", "name" ], "additionalProperties": false }, "SPSS-STRING": { "properties": { "type": { "enum": [ "STRING" ] }, "name": { "type": "string", "minLength": 1, "maxLength": 64 }, "label": { "type": "string", "minLength": 0, "maxLength": 255 }, "categories": { "type": "array", "items": { "type": "object", "properties": { "code": { "type": "string" }, "label": { "type": "string" } }, "required": [ "code", "label" ], "additionalProperties": false } }, "missing": { "type": "object", "oneOf": [ { "$ref": "#/definitions/SPSS-STRING-DISCRETE" } ] } }, "required": [ "type", "name" ], "additionalProperties": false }, "SPSS-NUMERIC-DISCRETE": { "type": "object", "properties": { "type": { "enum": [ "DISCRETE" ] }, "values": { "type": "array", "items": { "type": "number" }, "minItems": 1, "maxItems": 3, "uniqueItems": true } }, "required": [ "type", "values" ], "additionalProperties": false }, "SPSS-STRING-DISCRETE": { "type": "object", "properties": { "type": { "enum": [ "DISCRETE" ] }, "values": { "type": "array", "items": { "type": "string" }, "minItems": 1, "maxItems": 3, "uniqueItems": true } }, "required": [ "type", "values" ], "additionalProperties": false }, "NUMERIC-RANGE": { "type": "object", "properties": { "type": { "enum": [ "RANGE" ] }, "low": { "type": "number" }, "high": { "type": "number" }, "discrete-value": { "type": "number" } }, "required": [ "type", "low", "high" ], "additionalProperties": false }, "DATE-RANGE": { "type": "object", "properties": { "type": { "enum": [ "RANGE" ] }, "low": { "type": "string", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" }, "high": { "type": "string", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" }, "discrete-value": { "type": "string", "pattern": "^[0-9]{4}-[0-9]{2}-[0-9]{2}$" } }, "required": [ "type", "low", "high" ], "additionalProperties": false }, "STATA": { "type": "object", "properties": { "type": { "enum": [ "STATA" ] }, "separator": { "enum": [ ",", ";", "\t" ] }, "variables": { "type": "array", "items": { "type": "object", "oneOf": [ { "$ref": "#/definitions/STATA-NUMERIC" }, { "$ref": "#/definitions/STATA-STRING" } ] } } }, "additionalProperties": false }, "STATA-NUMERIC": { "properties": { "type": { "enum": [ "NUMERIC" ] }, "name": { "type": "string", "minLength": 1, "maxLength": 32 }, "label": { "type": "string", "minLength": 0, "maxLength": 255 }, "format": { "type": { "enum": [ "NUMBER", "PERCENT", "CURRENCY", "DATE", "TIME", "DATE_TIME", "UNSPECIFIED" ] } }, "pattern": { "type": "string", "minLength": 1 }, "decimals": { "type": "integer", "minimum": 0, "maximum": 16 }, "categories": { "type": "array", "items": { "type": "object", "properties": { "code": { "type": "number" }, "label": { "type": "string" } }, "required": [ "code", "label" ], "additionalProperties": false } }, "missing": { "type": "object", "oneOf": [ { "$ref": "#/definitions/STATA-NUMERIC-DISCRETE" }, { "$ref": "#/definitions/NUMERIC-RANGE" } ] } }, "required": [ "type", "name" ], "additionalProperties": false }, "STATA-NUMERIC-DISCRETE": { "type": "object", "properties": { "type": { "enum": [ "DISCRETE" ] }, "values": { "type": "array", "items": { "type": "number" }, "minItems": 1, "maxItems": 26, "uniqueItems": true } }, "required": [ "type", "values" ], "additionalProperties": false }, "STATA-STRING": { "properties": { "type": { "enum": [ "STRING" ] }, "name": { "type": "string", "minLength": 1, "maxLength": 32 }, "label": { "type": "string", "minLength": 0, "maxLength": 255 }, "categories": { "type": "array", "items": { "type": "object", "properties": { "code": { "type": "string" }, "label": { "type": "string" } }, "required": [ "code", "label" ], "additionalProperties": false } }, "missing": { "type": "object", "oneOf": [ { "$ref": "#/definitions/STATA-STRING-DISCRETE" } ] } }, "required": [ "type", "name" ], "additionalProperties": false }, "STATA-STRING-DISCRETE": { "type": "object", "properties": { "type": { "enum": [ "DISCRETE" ] }, "values": { "type": "array", "items": { "type": "string" }, "minItems": 1, "maxItems": 3, "uniqueItems": true } }, "required": [ "type", "values" ], "additionalProperties": false } } }