librdata-0~20210223+git-85757dc6/000077500000000000000000000000001403031461700156575ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/.github/000077500000000000000000000000001403031461700172175ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/.github/workflows/000077500000000000000000000000001403031461700212545ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/.github/workflows/build.yml000066400000000000000000000026571403031461700231100ustar00rootroot00000000000000name: build on: [ push, pull_request ] jobs: linux: runs-on: ubuntu-latest strategy: matrix: compiler: [ clang, gcc ] steps: - name: Install packages run: sudo apt install gettext - uses: actions/checkout@v2 - name: Autoconf run: autoreconf -i -f - name: Configure run: ./configure env: CC: ${{ matrix.compiler }} - name: Make run: make - name: Tests run: make check macos: runs-on: macos-latest strategy: matrix: compiler: [ clang, gcc ] steps: - name: Install packages run: brew install automake - uses: actions/checkout@v2 - name: Autoconf run: autoreconf -i -f - name: Configure run: ./configure env: CC: ${{ matrix.compiler }} - name: Make run: make - name: Tests run: make check windows: runs-on: windows-latest env: MSYSTEM: MINGW64 steps: - uses: actions/checkout@v2 - name: Autoconf run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && autoreconf -i -f' - name: Configure run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && ./configure' - name: Make run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && make' - name: Test run: C:\msys64\usr\bin\bash -c -l 'cd "$GITHUB_WORKSPACE" && make check' librdata-0~20210223+git-85757dc6/.gitignore000066400000000000000000000005461403031461700176540ustar00rootroot00000000000000.editorconfig **/.DS_Store **/.deps **/.libs **/.dirstamp **/*.lo **/*.o aclocal.m4 compile config.guess config.log config.status config.sub configure depcomp install-sh librdata.la libtool ltmain.sh Makefile Makefile.in missing readstat test-driver autom4te.cache/* output.* test-suite.log readEx writeEx fuzz_rdata test_rdata test_rdata.log test_rdata.trs librdata-0~20210223+git-85757dc6/LICENSE000066400000000000000000000021031403031461700166600ustar00rootroot00000000000000Copyright (c) 2013-2020 Evan Miller (except where otherwise noted) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. librdata-0~20210223+git-85757dc6/Makefile.am000066400000000000000000000025141403031461700177150ustar00rootroot00000000000000AUTOMAKE_OPTIONS = foreign dist-zip lib_LTLIBRARIES = librdata.la librdata_la_SOURCES = \ src/CKHashTable.c \ src/rdata_bits.c \ src/rdata_error.c \ src/rdata_io_unistd.c \ src/rdata_parser.c \ src/rdata_read.c \ src/rdata_write.c librdata_la_LIBADD = librdata_la_CFLAGS = -Os -Wall -Wstrict-prototypes librdata_la_LDFLAGS = @EXTRA_LDFLAGS@ if HAVE_BZIP2 librdata_la_LIBADD += -lbz2 librdata_la_CFLAGS += -DHAVE_BZIP2 endif if HAVE_ZLIB librdata_la_LIBADD += -lz librdata_la_CFLAGS += -DHAVE_ZLIB=1 endif if HAVE_APPLE_COMPRESSION librdata_la_LIBADD += -lcompression librdata_la_CFLAGS += -DHAVE_APPLE_COMPRESSION=1 else if HAVE_LZMA librdata_la_LIBADD += -llzma librdata_la_CFLAGS += -DHAVE_LZMA=1 endif endif include_HEADERS = src/rdata.h noinst_HEADERS = \ src/CKHashTable.h \ src/rdata_bits.h \ src/rdata_internal.h \ src/rdata_io_unistd.h \ src/test/test_buffer.h \ src/test/test_buffer_io.h check_PROGRAMS = test_rdata test_rdata_SOURCES = \ src/test/test_buffer.c \ src/test/test_buffer_io.c \ src/test/test_rdata.c test_rdata_LDADD = librdata.la TESTS = test_rdata if HAVE_FUZZER noinst_PROGRAMS = fuzz_rdata fuzz_rdata_SOURCES = src/fuzz/fuzz_rdata.c fuzz_rdata_LDADD = librdata.la -lstdc++ -lFuzzer fuzz_rdata_CFLAGS = -g -Wall @EXTRA_WARNINGS@ -Werror -pedantic-errors -std=c99 @SANITIZERS@ endif librdata-0~20210223+git-85757dc6/README.md000066400000000000000000000067051403031461700171460ustar00rootroot00000000000000# librdata - Read and write R data frames from C [![Build Status](https://github.com/WizardMac/librdata/workflows/build/badge.svg)](https://github.com/WizardMac/librdata/actions) [![Build status](https://ci.appveyor.com/api/projects/status/xrao0cdroh5xn950?svg=true)](https://ci.appveyor.com/project/evanmiller/librdata) Originally part of [ReadStat](https://github.com/WizardMac/ReadStat), librdata is a small C library for reading and writing R data frames. Features: * Read both RData and RDS formats * Read compressed files (requires bzip2, zlib, and lzma) * Write factors, timestamps, logical vectors, and more ## Installation ``` ./autogen.sh ./configure make make install ``` If you're on Mac and see errors about `AM_ICONV` when you run `./autogen.sh`, you'll need to install [gettext](https://www.gnu.org/software/gettext/). ## Language bindings * Python: [pyreadr](https://github.com/ofajardo/pyreadr) ## Read API Example usage: ```c #include "rdata.h" static int handle_table(const char *name, void *ctx) { printf("Read table: %s\n", name); return 0; /* non-zero to abort processing */ } // Called once for all columns with the following caveats: // * `name` is NULL for some columns (see handle_column_name below) // * `data` is NULL for text columns (see handle_text_value below) static int handle_column(const char *name, rdata_type_t type, void *data, long count, void *ctx) { /* Do something... */ return 0; } // Some column names appear in the file after the data static int handle_column_name(const char *name, int index, void *ctx) { if (debug) printf("Read column name: %s\n", name); /* Do something... */ return 0; } // Called once per row for a text column static int handle_text_value(const char *value, int index, void *ctx) { /* Do something... */ return 0; } // Called for factor variables, once for each level static int handle_value_label(const char *value, int index, void *ctx) { /* Do something... */ return 0; } rdata_parser_t *parser = rdata_parser_init(); rdata_set_table_handler(parser, &handle_table); rdata_set_column_handler(parser, &handle_column); rdata_set_text_value_handler(parser, &handle_text_value); rdata_set_value_label_handler(parser, &handle_value_label); rdata_parse(parser, "/path/to/something.rdata", NULL); ``` See [`rdata.h`](src/rdata.h) for the full API. ## Write API Example usage: ```c #include "rdata.h" static ssize_t write_data(const void *bytes, size_t len, void *ctx) { int fd = *(int *)ctx; return write(fd, bytes, len); } int row_count = 3; int fd = open("/path/to/somewhere.rdata", O_CREAT | O_WRONLY, 0644); rdata_writer_t *writer = rdata_writer_init(&write_data, RDATA_WORKSPACE); rdata_column_t *col1 = rdata_add_column(writer, "column1", RDATA_TYPE_REAL); rdata_column_t *col2 = rdata_add_column(writer, "column2", RDATA_TYPE_STRING); rdata_begin_file(writer, &fd); rdata_begin_table(writer, "my_table"); rdata_begin_column(writer, col1, row_count); rdata_append_real_value(writer, 0.0); rdata_append_real_value(writer, 100.0); rdata_append_real_value(writer, NAN); rdata_end_column(writer, col1); rdata_begin_column(writer, col2, row_count); rdata_append_string_value(writer, "hello"); rdata_append_string_value(writer, "goodbye"); rdata_append_string_value(writer, NULL); rdata_end_column(writer, col2); rdata_end_table(writer, row_count, "My data set"); rdata_end_file(writer); close(fd); ``` See [`rdata.h`](src/rdata.h) for the full API. librdata-0~20210223+git-85757dc6/appveyor.yml000066400000000000000000000014041403031461700202460ustar00rootroot00000000000000version: 0.1.{build} os: Windows Server 2012 R2 platform: x64 environment: matrix: - TOOLCHAIN: "cygwin" branches: only: - master skip_tags: true install: - C:\cygwin64\setup-x86_64.exe -qnNdO -s http://cygwin.mirror.constant.com -l C:/cygwin64/var/cache/setup -P libbz2-devel -P zlib-devel -P liblzma-devel -P libiconv-devel build_script: - C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/librdata && ./autogen.sh" - C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/librdata && ./configure" - C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/librdata && make" test_script: - C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/librdata && make check" on_finish: - C:\cygwin64\bin\sh -lc "cd /cygdrive/c/projects/librdata && cat ./test-suite.log" librdata-0~20210223+git-85757dc6/autogen.sh000077500000000000000000000000441403031461700176560ustar00rootroot00000000000000#! /usr/bin/env bash autoreconf -i librdata-0~20210223+git-85757dc6/config.rpath000066400000000000000000000000001403031461700201520ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/configure.ac000066400000000000000000000040021403031461700201410ustar00rootroot00000000000000AC_INIT([rdata], [0.1.0-prerelease]) AM_INIT_AUTOMAKE([foreign subdir-objects]) AM_SILENT_RULES([yes]) AC_SUBST([LIBRDATA_VERSION], [0.1.0-prerelease]) LT_INIT([disable-static]) AC_PROG_CC AC_PROG_CC_STDC AC_ARG_ENABLE([code-coverage], AS_HELP_STRING([--enable-code-coverage], [Enable code coverage profiling]), [code_coverage=yes], [code_coverage=no]) AC_ARG_ENABLE([sanitizers], AS_HELP_STRING([--enable-sanitizers], [Enable address sanitizing]), [SANITIZERS="-fsanitize=address,bool,float-cast-overflow,integer-divide-by-zero,return,returns-nonnull-attribute,shift-exponent,signed-integer-overflow,unreachable,vla-bound -fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp"], [SANITIZERS=""]) AC_SUBST([SANITIZERS]) AM_ICONV AC_CANONICAL_HOST AS_CASE([$host], [*mingw*], [EXTRA_WARNINGS="-Wno-pedantic-ms-format -Wno-stringop-truncation"], [EXTRA_WARNINGS=""]) AC_SUBST([EXTRA_WARNINGS]) AS_CASE([$host], [*mingw*|*cygwin*], [EXTRA_LDFLAGS="$LTLIBICONV -no-undefined"], [EXTRA_LDFLAGS="$LTLIBICONV"]) AC_SUBST([EXTRA_LDFLAGS]) AC_CHECK_LIB([bz2], [BZ2_bzCompressInit], [true], [false]) AM_CONDITIONAL([HAVE_BZIP2], test "$ac_cv_lib_bz2_BZ2_bzCompressInit" = yes) AC_CHECK_LIB([compression], [compression_stream_init], [true], [false]) AM_CONDITIONAL([HAVE_APPLE_COMPRESSION], test "$ac_cv_lib_compression_compression_stream_init" = yes) AC_CHECK_LIB([lzma], [lzma_stream_decoder], [true], [false]) AM_CONDITIONAL([HAVE_LZMA], test "$ac_cv_lib_lzma_lzma_stream_decoder" = yes) AC_CHECK_LIB([z], [deflate], [true], [false]) AM_CONDITIONAL([HAVE_ZLIB], test "$ac_cv_lib_z_deflate" = yes) AM_CONDITIONAL([CODE_COVERAGE_ENABLED], test "x$code_coverage" = "xyes") AC_CHECK_LIB([Fuzzer], [__libfuzzer_is_present], [true], [false], [-lstdc++]) AM_CONDITIONAL([HAVE_FUZZER], test "$ac_cv_lib_Fuzzer___libfuzzer_is_present" = yes) AC_OUTPUT([Makefile]) AC_MSG_RESULT([ Configuration: C compiler: $CC CFLAGS: $CFLAGS Host: $host Extra warnings: $EXTRA_WARNINGS Extra libs: $EXTRA_LIBS Extra ld flags: $EXTRA_LDFLAGS]) librdata-0~20210223+git-85757dc6/readEx.c000066400000000000000000000056751403031461700172500ustar00rootroot00000000000000#include #include #include #include #include #include #include const int debug = 0; static int handle_table(const char *name, void *ctx) { if (debug) printf("Read table: %s\n", name); return 0; } char *rdata_type_text[] = { "String", "Integer", "Real", "Logical", "Timestamp" }; // Called once for all columns. "data" is NULL for text columns. static int handle_column(const char *name, rdata_type_t type, void *data, long count, void *ctx) { if (debug) printf("Read column: %s with %ld elements of type %s\n", name, count, rdata_type_text[type]); for (long i=0; i Copyright (c) 2012 Daniel J. Bernstein To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see . */ #include #include #include typedef uint64_t u64; typedef uint32_t u32; typedef uint8_t u8; #define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) ) #define U32TO8_LE(p, v) \ (p)[0] = (u8)((v) ); (p)[1] = (u8)((v) >> 8); \ (p)[2] = (u8)((v) >> 16); (p)[3] = (u8)((v) >> 24); #define U64TO8_LE(p, v) \ U32TO8_LE((p), (u32)((v) )); \ U32TO8_LE((p) + 4, (u32)((v) >> 32)); #define U8TO64_LE(p) \ (((u64)((p)[0]) ) | \ ((u64)((p)[1]) << 8) | \ ((u64)((p)[2]) << 16) | \ ((u64)((p)[3]) << 24) | \ ((u64)((p)[4]) << 32) | \ ((u64)((p)[5]) << 40) | \ ((u64)((p)[6]) << 48) | \ ((u64)((p)[7]) << 56)) #define SIPROUND \ do { \ v0 += v1; v1=ROTL(v1,13); v1 ^= v0; v0=ROTL(v0,32); \ v2 += v3; v3=ROTL(v3,16); v3 ^= v2; \ v0 += v3; v3=ROTL(v3,21); v3 ^= v0; \ v2 += v1; v1=ROTL(v1,17); v1 ^= v2; v2=ROTL(v2,32); \ } while(0) /* SipHash-1-2 */ static int siphash( unsigned char *out, const unsigned char *in, unsigned long long inlen, const unsigned char *k ) { /* "somepseudorandomlygeneratedbytes" */ u64 v0 = 0x736f6d6570736575ULL; u64 v1 = 0x646f72616e646f6dULL; u64 v2 = 0x6c7967656e657261ULL; u64 v3 = 0x7465646279746573ULL; u64 b; u64 k0 = U8TO64_LE( k ); u64 k1 = U8TO64_LE( k + 8 ); u64 m; const u8 *end = in + inlen - ( inlen % sizeof( u64 ) ); const int left = inlen & 7; b = ( ( u64 )inlen ) << 56; v3 ^= k1; v2 ^= k0; v1 ^= k1; v0 ^= k0; for ( ; in != end; in += 8 ) { m = U8TO64_LE( in ); v3 ^= m; SIPROUND; v0 ^= m; } switch( left ) { case 7: b |= ( ( u64 )in[ 6] ) << 48; case 6: b |= ( ( u64 )in[ 5] ) << 40; case 5: b |= ( ( u64 )in[ 4] ) << 32; case 4: b |= ( ( u64 )in[ 3] ) << 24; case 3: b |= ( ( u64 )in[ 2] ) << 16; case 2: b |= ( ( u64 )in[ 1] ) << 8; case 1: b |= ( ( u64 )in[ 0] ); break; case 0: break; } v3 ^= b; SIPROUND; v0 ^= b; v2 ^= 0xff; SIPROUND; SIPROUND; b = v0 ^ v1 ^ v2 ^ v3; U64TO8_LE( out, b ); return 0; } inline uint64_t ck_hash_str(const char *str, size_t keylen) { uint64_t hash; unsigned char k[16] = { 0 }; siphash((unsigned char *)&hash, (const unsigned char *)str, keylen, k); return hash; } const void *ck_float_hash_lookup(float key, ck_hash_table_t *table) { return ck_str_n_hash_lookup((const char *)&key, sizeof(float), table); } int ck_float_hash_insert(float key, const void *value, ck_hash_table_t *table) { return ck_str_n_hash_insert((const char *)&key, sizeof(float), value, table); } const void *ck_double_hash_lookup(double key, ck_hash_table_t *table) { return ck_str_n_hash_lookup((const char *)&key, sizeof(double), table); } int ck_double_hash_insert(double key, const void *value, ck_hash_table_t *table) { return ck_str_n_hash_insert((const char *)&key, sizeof(double), value, table); } const void *ck_str_hash_lookup(const char *key, ck_hash_table_t *table) { size_t keylen = strlen(key); return ck_str_n_hash_lookup(key, keylen, table); } const void *ck_str_n_hash_lookup(const char *key, size_t keylen, ck_hash_table_t *table) { if (table->count == 0) return NULL; if (keylen == 0) return NULL; uint64_t hash_key = ck_hash_str(key, keylen); hash_key %= table->capacity; uint64_t end = hash_key; do { char *this_key = &table->keys[table->entries[hash_key].key_offset]; size_t this_keylen = table->entries[hash_key].key_length; if (this_keylen == 0) return NULL; if (this_keylen == keylen && memcmp(this_key, key, keylen) == 0) { return table->entries[hash_key].value; } hash_key++; hash_key %= table->capacity; } while (hash_key != end); return NULL; } int ck_str_hash_insert(const char *key, const void *value, ck_hash_table_t *table) { size_t keylen = strlen(key); return ck_str_n_hash_insert(key, keylen, value, table); } static int ck_hash_insert_nocopy(off_t key_offset, size_t keylen, uint64_t hash_key, const void *value, ck_hash_table_t *table) { if (table->capacity == 0) return 0; hash_key %= table->capacity; uint64_t end = (hash_key + table->capacity - 1) % table->capacity; while (hash_key != end) { ck_hash_entry_t *entry = &table->entries[hash_key]; if (table->entries[hash_key].key_length == 0) { table->count++; entry->key_offset = key_offset; entry->key_length = keylen; entry->value = value; return 1; } else if (entry->key_length == keylen && entry->key_offset == key_offset) { entry->value = value; return 1; } hash_key++; hash_key %= table->capacity; } return 0; } int ck_str_n_hash_insert(const char *key, size_t keylen, const void *value, ck_hash_table_t *table) { if (table->capacity == 0) return 0; if (keylen == 0) return 0; if (table->count >= 0.75 * table->capacity) { if (ck_hash_table_grow(table) == -1) { return 0; } } uint64_t hash_key = ck_hash_str(key, keylen); hash_key %= table->capacity; uint64_t end = hash_key; do { ck_hash_entry_t *entry = &table->entries[hash_key]; char *this_key = &table->keys[entry->key_offset]; if (entry->key_length == 0) { table->count++; while (table->keys_used + keylen > table->keys_capacity) { table->keys_capacity *= 2; table->keys = realloc(table->keys, table->keys_capacity); } memcpy(table->keys + table->keys_used, key, keylen); entry->key_offset = table->keys_used; entry->key_length = keylen; table->keys_used += keylen; entry->value = value; return 1; } else if (entry->key_length == keylen && memcmp(this_key, key, keylen) == 0) { table->entries[hash_key].value = value; return 1; } hash_key++; hash_key %= table->capacity; } while (hash_key != end); return 0; } ck_hash_table_t *ck_hash_table_init(size_t num_entries, size_t mean_key_length) { ck_hash_table_t *table; if ((table = malloc(sizeof(ck_hash_table_t))) == NULL) return NULL; if ((table->keys = malloc(num_entries * mean_key_length)) == NULL) { free(table); return NULL; } table->keys_capacity = num_entries * mean_key_length; num_entries *= 2; if ((table->entries = malloc(num_entries * sizeof(ck_hash_entry_t))) == NULL) { free(table->keys); free(table); return NULL; } table->capacity = num_entries; ck_hash_table_wipe(table); return table; } void ck_hash_table_free(ck_hash_table_t *table) { free(table->entries); if (table->keys) free(table->keys); free(table); } void ck_hash_table_wipe(ck_hash_table_t *table) { table->keys_used = 0; table->count = 0; memset(table->entries, 0, table->capacity * sizeof(ck_hash_entry_t)); } int ck_hash_table_grow(ck_hash_table_t *table) { ck_hash_entry_t *old_entries = table->entries; uint64_t old_capacity = table->capacity; uint64_t new_capacity = 2 * table->capacity; if ((table->entries = calloc(new_capacity, sizeof(ck_hash_entry_t))) == NULL) { return -1; } table->capacity = new_capacity; table->count = 0; for (int i=0; ikeys[old_entries[i].key_offset]; uint64_t hash_key = ck_hash_str(this_key, old_entries[i].key_length); if (!ck_hash_insert_nocopy(old_entries[i].key_offset, old_entries[i].key_length, hash_key, old_entries[i].value, table)) return -1; } } free(old_entries); return 0; } librdata-0~20210223+git-85757dc6/src/CKHashTable.h000066400000000000000000000025461403031461700206770ustar00rootroot00000000000000// CKHashTable - A simple hash table // Copyright 2010-2020 Evan Miller (see LICENSE) #include #include typedef struct ck_hash_entry_s { off_t key_offset; size_t key_length; const void *value; } ck_hash_entry_t; typedef struct ck_hash_table_s { size_t capacity; size_t count; ck_hash_entry_t *entries; char *keys; size_t keys_used; size_t keys_capacity; } ck_hash_table_t; int ck_str_hash_insert(const char *key, const void *value, ck_hash_table_t *table); const void *ck_str_hash_lookup(const char *key, ck_hash_table_t *table); int ck_str_n_hash_insert(const char *key, size_t keylen, const void *value, ck_hash_table_t *table); const void *ck_str_n_hash_lookup(const char *key, size_t keylen, ck_hash_table_t *table); int ck_float_hash_insert(float key, const void *value, ck_hash_table_t *table); const void *ck_float_hash_lookup(float key, ck_hash_table_t *table); int ck_double_hash_insert(double key, const void *value, ck_hash_table_t *table); const void *ck_double_hash_lookup(double key, ck_hash_table_t *table); ck_hash_table_t *ck_hash_table_init(size_t num_entries, size_t mean_key_length); void ck_hash_table_wipe(ck_hash_table_t *table); int ck_hash_table_grow(ck_hash_table_t *table); void ck_hash_table_free(ck_hash_table_t *table); uint64_t ck_hash_str(const char *str, size_t keylen); librdata-0~20210223+git-85757dc6/src/fuzz/000077500000000000000000000000001403031461700174445ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/src/fuzz/fuzz_rdata.c000066400000000000000000000046061403031461700217670ustar00rootroot00000000000000#include #include #include "../rdata.h" typedef struct buffer { const unsigned char *data; size_t len; ssize_t pos; } buffer_t; int open_handler(const char *path, void *io_ctx) { return 0; } int close_handler(void *io_ctx) { return 0; } rdata_off_t seek_handler(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx) { buffer_t *buffer = (buffer_t *)io_ctx; rdata_off_t newpos = 0; if (whence == RDATA_SEEK_SET) { newpos = offset; } else if (whence == RDATA_SEEK_CUR) { newpos = buffer->pos + offset; } else if (whence == RDATA_SEEK_END) { newpos = buffer->len + offset; } if (newpos > buffer->len || newpos < 0) { return -1; } return (buffer->pos = newpos); } ssize_t read_handler(void *buf, size_t nbyte, void *io_ctx) { buffer_t *buffer = (buffer_t *)io_ctx; if (nbyte > buffer->len - buffer->pos) { nbyte = buffer->len - buffer->pos; } memcpy(buf, &buffer->data[buffer->pos], nbyte); buffer->pos += nbyte; return nbyte; } rdata_error_t update_handler(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx) { return RDATA_OK; } int table_handler(const char *name, void *ctx) { return 0; } int column_handler(const char *name, rdata_type_t type, void *data, long count, void *ctx) { return 0; } int column_name_handler(const char *value, int index, void *ctx) { return 0; } int text_value_handler(const char *value, int index, void *ctx) { return 0; } int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { buffer_t buffer = { .data = Data, .len = Size }; rdata_parser_t *parser = rdata_parser_init(); rdata_set_table_handler(parser, &table_handler); rdata_set_column_handler(parser, &column_handler); rdata_set_column_name_handler(parser, &column_name_handler); rdata_set_text_value_handler(parser, &text_value_handler); rdata_set_value_label_handler(parser, &text_value_handler); rdata_set_open_handler(parser, &open_handler); rdata_set_close_handler(parser, &close_handler); rdata_set_seek_handler(parser, &seek_handler); rdata_set_read_handler(parser, &read_handler); rdata_set_update_handler(parser, &update_handler); rdata_set_io_ctx(parser, &buffer); rdata_parse(parser, NULL, NULL); rdata_parser_free(parser); return 0; } librdata-0~20210223+git-85757dc6/src/rdata.h000066400000000000000000000161051403031461700177150ustar00rootroot00000000000000 #include #include #include #include #ifdef __cplusplus extern "C" { #endif typedef enum rdata_type_e { RDATA_TYPE_STRING, RDATA_TYPE_INT32, RDATA_TYPE_REAL, RDATA_TYPE_LOGICAL, RDATA_TYPE_TIMESTAMP, RDATA_TYPE_DATE } rdata_type_t; typedef enum rdata_error_e { RDATA_OK, RDATA_ERROR_OPEN = 1, RDATA_ERROR_SEEK, RDATA_ERROR_READ, RDATA_ERROR_MALLOC, RDATA_ERROR_USER_ABORT, RDATA_ERROR_PARSE, RDATA_ERROR_WRITE, RDATA_ERROR_FACTOR, RDATA_ERROR_UNSUPPORTED_COMPRESSION, RDATA_ERROR_UNSUPPORTED_CHARSET, RDATA_ERROR_CONVERT, RDATA_ERROR_CONVERT_BAD_STRING, RDATA_ERROR_CONVERT_LONG_STRING, RDATA_ERROR_CONVERT_SHORT_STRING, RDATA_ERROR_UNSUPPORTED_S_EXPRESSION, RDATA_ERROR_UNSUPPORTED_STORAGE_CLASS } rdata_error_t; typedef enum rdata_file_format_e { RDATA_WORKSPACE, RDATA_SINGLE_OBJECT } rdata_file_format_t; const char *rdata_error_message(rdata_error_t error_code); typedef int (*rdata_column_handler)(const char *name, rdata_type_t type, void *data, long count, void *ctx); typedef int (*rdata_table_handler)(const char *name, void *ctx); typedef int (*rdata_text_value_handler)(const char *value, int index, void *ctx); typedef int (*rdata_column_name_handler)(const char *value, int index, void *ctx); typedef void (*rdata_error_handler)(const char *error_message, void *ctx); typedef int (*rdata_progress_handler)(double progress, void *ctx); #if defined(_MSC_VER) #include typedef SSIZE_T ssize_t; typedef __int64 rdata_off_t; #elif defined _WIN32 || defined __CYGWIN__ typedef _off64_t rdata_off_t; #elif defined _AIX typedef off64_t rdata_off_t; #else typedef off_t rdata_off_t; #endif typedef enum rdata_io_flags_e { RDATA_SEEK_SET, RDATA_SEEK_CUR, RDATA_SEEK_END } rdata_io_flags_t; typedef int (*rdata_open_handler)(const char *path, void *io_ctx); typedef int (*rdata_close_handler)(void *io_ctx); typedef rdata_off_t (*rdata_seek_handler)(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx); typedef ssize_t (*rdata_read_handler)(void *buf, size_t nbyte, void *io_ctx); typedef rdata_error_t (*rdata_update_handler)(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx); typedef struct rdata_io_s { rdata_open_handler open; rdata_close_handler close; rdata_seek_handler seek; rdata_read_handler read; rdata_update_handler update; void *io_ctx; int external_io; } rdata_io_t; typedef struct rdata_parser_s { rdata_table_handler table_handler; rdata_column_handler column_handler; rdata_column_name_handler column_name_handler; rdata_column_name_handler row_name_handler; rdata_text_value_handler text_value_handler; rdata_text_value_handler value_label_handler; rdata_column_handler dim_handler; rdata_text_value_handler dim_name_handler; rdata_error_handler error_handler; rdata_io_t *io; } rdata_parser_t; rdata_parser_t *rdata_parser_init(void); void rdata_parser_free(rdata_parser_t *parser); rdata_error_t rdata_set_table_handler(rdata_parser_t *parser, rdata_table_handler table_handler); rdata_error_t rdata_set_column_handler(rdata_parser_t *parser, rdata_column_handler column_handler); rdata_error_t rdata_set_column_name_handler(rdata_parser_t *parser, rdata_column_name_handler column_name_handler); rdata_error_t rdata_set_row_name_handler(rdata_parser_t *parser, rdata_column_name_handler row_name_handler); rdata_error_t rdata_set_text_value_handler(rdata_parser_t *parser, rdata_text_value_handler text_value_handler); rdata_error_t rdata_set_value_label_handler(rdata_parser_t *parser, rdata_text_value_handler value_label_handler); rdata_error_t rdata_set_dim_handler(rdata_parser_t *parser, rdata_column_handler dim_handler); rdata_error_t rdata_set_dim_name_handler(rdata_parser_t *parser, rdata_text_value_handler dim_name_handler); rdata_error_t rdata_set_error_handler(rdata_parser_t *parser, rdata_error_handler error_handler); rdata_error_t rdata_set_open_handler(rdata_parser_t *parser, rdata_open_handler open_handler); rdata_error_t rdata_set_close_handler(rdata_parser_t *parser, rdata_close_handler close_handler); rdata_error_t rdata_set_seek_handler(rdata_parser_t *parser, rdata_seek_handler seek_handler); rdata_error_t rdata_set_read_handler(rdata_parser_t *parser, rdata_read_handler read_handler); rdata_error_t rdata_set_update_handler(rdata_parser_t *parser, rdata_update_handler update_handler); rdata_error_t rdata_set_io_ctx(rdata_parser_t *parser, void *io_ctx); /* rdata_parse works on RData and RDS. The table handler will be called once * per data frame in RData files, and zero times on RDS files. */ rdata_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *user_ctx); // Write API typedef ssize_t (*rdata_data_writer)(const void *data, size_t len, void *ctx); typedef struct rdata_column_s { rdata_type_t type; int index; char name[256]; char label[1024]; int32_t factor_count; char **factor; } rdata_column_t; typedef struct rdata_writer_s { rdata_file_format_t file_format; rdata_data_writer data_writer; size_t bytes_written; rdata_error_handler error_handler; void *user_ctx; void *atom_table; int bswap; rdata_column_t **columns; int32_t columns_count; int32_t columns_capacity; } rdata_writer_t; rdata_writer_t *rdata_writer_init(rdata_data_writer write_callback, rdata_file_format_t format); void rdata_writer_free(rdata_writer_t *writer); rdata_column_t *rdata_add_column(rdata_writer_t *writer, const char *name, rdata_type_t type); rdata_error_t rdata_column_set_label(rdata_column_t *column, const char *label); rdata_error_t rdata_column_add_factor(rdata_column_t *column, const char *factor); rdata_column_t *rdata_get_column(rdata_writer_t *writer, int32_t j); rdata_error_t rdata_begin_file(rdata_writer_t *writer, void *ctx); rdata_error_t rdata_begin_table(rdata_writer_t *writer, const char *variable_name); rdata_error_t rdata_begin_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count); rdata_error_t rdata_append_real_value(rdata_writer_t *writer, double value); rdata_error_t rdata_append_int32_value(rdata_writer_t *writer, int32_t value); rdata_error_t rdata_append_timestamp_value(rdata_writer_t *writer, time_t value); rdata_error_t rdata_append_date_value(rdata_writer_t *writer, struct tm *value); rdata_error_t rdata_append_logical_value(rdata_writer_t *writer, int value); rdata_error_t rdata_append_string_value(rdata_writer_t *writer, const char *value); rdata_error_t rdata_end_column(rdata_writer_t *writer, rdata_column_t *column); rdata_error_t rdata_end_table(rdata_writer_t *writer, int32_t row_count, const char *datalabel); rdata_error_t rdata_end_file(rdata_writer_t *writer); #ifdef __cplusplus } // extern c block #endif librdata-0~20210223+git-85757dc6/src/rdata_bits.c000066400000000000000000000022001403031461700207200ustar00rootroot00000000000000// // readstat_bits.c - Bit-twiddling utility functions // #include #include #include #include "rdata_bits.h" int machine_is_little_endian() { int test_byte_order = 1; return ((char *)&test_byte_order)[0]; } uint16_t byteswap2(uint16_t num) { return ((num & 0xFF00) >> 8) | ((num & 0x00FF) << 8); } uint32_t byteswap4(uint32_t num) { num = ((num & 0xFFFF0000) >> 16) | ((num & 0x0000FFFF) << 16); return ((num & 0xFF00FF00) >> 8) | ((num & 0x00FF00FF) << 8); } uint64_t byteswap8(uint64_t num) { num = ((num & 0xFFFFFFFF00000000) >> 32) | ((num & 0x00000000FFFFFFFF) << 32); num = ((num & 0xFFFF0000FFFF0000) >> 16) | ((num & 0x0000FFFF0000FFFF) << 16); return ((num & 0xFF00FF00FF00FF00) >> 8) | ((num & 0x00FF00FF00FF00FF) << 8); } float byteswap_float(float num) { uint32_t answer = 0; memcpy(&answer, &num, 4); answer = byteswap4(answer); memcpy(&num, &answer, 4); return num; } double byteswap_double(double num) { uint64_t answer = 0; memcpy(&answer, &num, 8); answer = byteswap8(answer); memcpy(&num, &answer, 8); return num; } librdata-0~20210223+git-85757dc6/src/rdata_bits.h000066400000000000000000000004131403031461700207310ustar00rootroot00000000000000// // rdata_bit.h - Bit-twiddling utility functions // int machine_is_little_endian(void); uint16_t byteswap2(uint16_t num); uint32_t byteswap4(uint32_t num); uint64_t byteswap8(uint64_t num); float byteswap_float(float num); double byteswap_double(double num); librdata-0~20210223+git-85757dc6/src/rdata_error.c000066400000000000000000000036751403031461700211310ustar00rootroot00000000000000 #include "rdata.h" const char *rdata_error_message(rdata_error_t error_code) { if (error_code == RDATA_OK) return NULL; if (error_code == RDATA_ERROR_OPEN) return "Unable to open file"; if (error_code == RDATA_ERROR_SEEK) return "Unable to seek within file"; if (error_code == RDATA_ERROR_READ) return "Unable to read from file"; if (error_code == RDATA_ERROR_MALLOC) return "Unable to allocate memory"; if (error_code == RDATA_ERROR_USER_ABORT) return "The parsing was aborted (callback returned non-zero value)"; if (error_code == RDATA_ERROR_PARSE) return "Invalid file, or file has unsupported features"; if (error_code == RDATA_ERROR_WRITE) return "Unable to write to file"; if (error_code == RDATA_ERROR_FACTOR) return "The provided column does not support factors"; if (error_code == RDATA_ERROR_UNSUPPORTED_COMPRESSION) return "The file is compressed using an unsupported compression scheme"; if (error_code == RDATA_ERROR_UNSUPPORTED_CHARSET) return "File has an unsupported character set"; if (error_code == RDATA_ERROR_CONVERT) return "Unable to convert string to the requested encoding"; if (error_code == RDATA_ERROR_CONVERT_BAD_STRING) return "Unable to convert string to the requested encoding (invalid byte sequence)"; if (error_code == RDATA_ERROR_CONVERT_SHORT_STRING) return "Unable to convert string to the requested encoding (incomplete byte sequence)"; if (error_code == RDATA_ERROR_CONVERT_LONG_STRING) return "Unable to convert string to the requested encoding (output buffer too small)"; if (error_code == RDATA_ERROR_UNSUPPORTED_S_EXPRESSION) return "The file contains an unrecognized object"; if (error_code == RDATA_ERROR_UNSUPPORTED_STORAGE_CLASS) return "The file contains an unrecognized object"; return "Unknown error"; } librdata-0~20210223+git-85757dc6/src/rdata_internal.h000066400000000000000000000053551403031461700216160ustar00rootroot00000000000000// // rdata_internal.h // #include "rdata_bits.h" #pragma pack(push, 1) typedef struct rdata_v2_header_s { char header[2]; uint32_t format_version; uint32_t writer_version; uint32_t reader_version; } rdata_v2_header_t; typedef struct rdata_sexptype_header_s { unsigned int type:8; unsigned int object:1; unsigned int attributes:1; unsigned int tag:1; unsigned int unused:1; unsigned int gp:16; unsigned int padding:4; } rdata_sexptype_header_t; typedef struct rdata_sexptype_info_s { rdata_sexptype_header_t header; int32_t attributes; int32_t tag; int32_t ref; } rdata_sexptype_info_t; #pragma pack(pop) #define RDATA_SEXPTYPE_NIL 0 #define RDATA_SEXPTYPE_SYMBOL 1 #define RDATA_SEXPTYPE_PAIRLIST 2 #define RDATA_SEXPTYPE_CLOSURE 3 #define RDATA_SEXPTYPE_ENVIRONMENT 4 #define RDATA_SEXPTYPE_PROMISE 5 #define RDATA_SEXPTYPE_LANGUAGE_OBJECT 6 #define RDATA_SEXPTYPE_SPECIAL_FUNCTION 7 #define RDATA_SEXPTYPE_BUILTIN_FUNCTION 8 #define RDATA_SEXPTYPE_CHARACTER_STRING 9 #define RDATA_SEXPTYPE_LOGICAL_VECTOR 10 #define RDATA_SEXPTYPE_INTEGER_VECTOR 13 #define RDATA_SEXPTYPE_REAL_VECTOR 14 #define RDATA_SEXPTYPE_COMPLEX_VECTOR 15 #define RDATA_SEXPTYPE_CHARACTER_VECTOR 16 #define RDATA_SEXPTYPE_DOT_DOT_DOT 17 #define RDATA_SEXPTYPE_ANY 18 #define RDATA_SEXPTYPE_GENERIC_VECTOR 19 #define RDATA_SEXPTYPE_EXPRESSION_VECTOR 20 #define RDATA_SEXPTYPE_BYTE_CODE 21 #define RDATA_SEXPTYPE_EXTERNAL_POINTER 22 #define RDATA_SEXPTYPE_WEAK_REFERENCE 23 #define RDATA_SEXPTYPE_RAW_VECTOR 24 #define RDATA_SEXPTYPE_S4_CLASS 25 #define RDATA_SEXPTYPE_FUN 99 #define RDATA_PSEUDO_SXP_REF 255 #define RDATA_PSEUDO_SXP_NIL 254 #define RDATA_PSEUDO_SXP_GLOBAL_ENVIRONMENT 253 #define RDATA_PSEUDO_SXP_UNBOUND_VALUE 252 #define RDATA_PSEUDO_SXP_MISSING_ARGUMENT 251 #define RDATA_PSEUDO_SXP_BASE_NAMESPACE 250 #define RDATA_PSEUDO_SXP_NAMESPACE 249 #define RDATA_PSEUDO_SXP_PACKAGE 248 #define RDATA_PSEUDO_SXP_PERSIST 247 #define RDATA_PSEUDO_SXP_CLASS_REF 246 #define RDATA_PSEUDO_SXP_GENERIC_REF 245 #define RDATA_PSEUDO_SXP_BYTE_CODE_REP_DEF 244 #define RDATA_PSEUDO_SXP_BYTE_CODE_REP_REF 243 #define RDATA_PSEUDO_SXP_EMPTY_ENVIRONMENT 242 #define RDATA_PSEUDO_SXP_BASE_ENVIRONMENT 241 #define RDATA_SEXPTYPE_LANGUAGE_OBJECT_ATTR 240 #define RDATA_SEXPTYPE_PAIRLIST_ATTR 239 #define RDATA_PSEUDO_SXP_ALTREP 238 librdata-0~20210223+git-85757dc6/src/rdata_io_unistd.c000066400000000000000000000046321403031461700217670ustar00rootroot00000000000000 #include #include #include #include "rdata.h" #include "rdata_io_unistd.h" #if defined _WIN32 || defined __CYGWIN__ #define UNISTD_OPEN_OPTIONS O_RDONLY | O_BINARY #elif defined _AIX #define UNISTD_OPEN_OPTIONS O_RDONLY | O_LARGEFILE #else #define UNISTD_OPEN_OPTIONS O_RDONLY #endif #if defined _WIN32 || defined _AIX #define lseek lseek64 #endif int rdata_unistd_open_handler(const char *path, void *io_ctx) { int fd = open(path, UNISTD_OPEN_OPTIONS); ((rdata_unistd_io_ctx_t*) io_ctx)->fd = fd; return fd; } int rdata_unistd_close_handler(void *io_ctx) { int fd = ((rdata_unistd_io_ctx_t*) io_ctx)->fd; if (fd != -1) return close(fd); else return 0; } rdata_off_t rdata_unistd_seek_handler(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx) { int flag = 0; switch(whence) { case RDATA_SEEK_SET: flag = SEEK_SET; break; case RDATA_SEEK_CUR: flag = SEEK_CUR; break; case RDATA_SEEK_END: flag = SEEK_END; break; default: return -1; } int fd = ((rdata_unistd_io_ctx_t*) io_ctx)->fd; return lseek(fd, offset, flag); } ssize_t rdata_unistd_read_handler(void *buf, size_t nbyte, void *io_ctx) { int fd = ((rdata_unistd_io_ctx_t*) io_ctx)->fd; ssize_t out = read(fd, buf, nbyte); return out; } rdata_error_t rdata_unistd_update_handler(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx) { if (!progress_handler) return RDATA_OK; int fd = ((rdata_unistd_io_ctx_t*) io_ctx)->fd; long current_offset = lseek(fd, 0, SEEK_CUR); if (current_offset == -1) return RDATA_ERROR_SEEK; if (progress_handler(1.0 * current_offset / file_size, user_ctx)) return RDATA_ERROR_USER_ABORT; return RDATA_OK; } void rdata_unistd_io_init(rdata_parser_t *parser) { rdata_set_open_handler(parser, rdata_unistd_open_handler); rdata_set_close_handler(parser, rdata_unistd_close_handler); rdata_set_seek_handler(parser, rdata_unistd_seek_handler); rdata_set_read_handler(parser, rdata_unistd_read_handler); rdata_set_update_handler(parser, rdata_unistd_update_handler); rdata_unistd_io_ctx_t *io_ctx = calloc(1, sizeof(rdata_unistd_io_ctx_t)); io_ctx->fd = -1; rdata_set_io_ctx(parser, (void*) io_ctx); } librdata-0~20210223+git-85757dc6/src/rdata_io_unistd.h000066400000000000000000000010531403031461700217660ustar00rootroot00000000000000 typedef struct rdata_unistd_io_ctx_s { int fd; } rdata_unistd_io_ctx_t; int rdata_unistd_open_handler(const char *path, void *io_ctx); int rdata_unistd_close_handler(void *io_ctx); rdata_off_t rdata_unistd_seek_handler(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx); ssize_t rdata_unistd_read_handler(void *buf, size_t nbytes, void *io_ctx); rdata_error_t rdata_unistd_update_handler(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx); void rdata_unistd_io_init(rdata_parser_t *parser); librdata-0~20210223+git-85757dc6/src/rdata_parser.c000066400000000000000000000057751403031461700212770ustar00rootroot00000000000000 #include #include "rdata.h" #include "rdata_io_unistd.h" rdata_parser_t *rdata_parser_init() { rdata_parser_t *parser = calloc(1, sizeof(rdata_parser_t)); parser->io = calloc(1, sizeof(rdata_io_t)); rdata_unistd_io_init(parser); return parser; } void rdata_parser_free(rdata_parser_t *parser) { if (parser) { if (parser->io) free(parser->io); free(parser); } } rdata_error_t rdata_set_table_handler(rdata_parser_t *parser, rdata_table_handler table_handler) { parser->table_handler = table_handler; return RDATA_OK; } rdata_error_t rdata_set_column_handler(rdata_parser_t *parser, rdata_column_handler column_handler) { parser->column_handler = column_handler; return RDATA_OK; } rdata_error_t rdata_set_column_name_handler(rdata_parser_t *parser, rdata_column_name_handler column_name_handler) { parser->column_name_handler = column_name_handler; return RDATA_OK; } rdata_error_t rdata_set_row_name_handler(rdata_parser_t *parser, rdata_column_name_handler row_name_handler) { parser->row_name_handler = row_name_handler; return RDATA_OK; } rdata_error_t rdata_set_text_value_handler(rdata_parser_t *parser, rdata_text_value_handler text_value_handler) { parser->text_value_handler = text_value_handler; return RDATA_OK; } rdata_error_t rdata_set_value_label_handler(rdata_parser_t *parser, rdata_text_value_handler value_label_handler) { parser->value_label_handler = value_label_handler; return RDATA_OK; } rdata_error_t rdata_set_dim_handler(rdata_parser_t *parser, rdata_column_handler dim_handler) { parser->dim_handler = dim_handler; return RDATA_OK; } rdata_error_t rdata_set_dim_name_handler(rdata_parser_t *parser, rdata_text_value_handler dim_name_handler) { parser->dim_name_handler = dim_name_handler; return RDATA_OK; } rdata_error_t rdata_set_error_handler(rdata_parser_t *parser, rdata_error_handler error_handler) { parser->error_handler = error_handler; return RDATA_OK; } rdata_error_t rdata_set_open_handler(rdata_parser_t *parser, rdata_open_handler open_handler) { parser->io->open = open_handler; return RDATA_OK; } rdata_error_t rdata_set_close_handler(rdata_parser_t *parser, rdata_close_handler close_handler) { parser->io->close = close_handler; return RDATA_OK; } rdata_error_t rdata_set_seek_handler(rdata_parser_t *parser, rdata_seek_handler seek_handler) { parser->io->seek = seek_handler; return RDATA_OK; } rdata_error_t rdata_set_read_handler(rdata_parser_t *parser, rdata_read_handler read_handler) { parser->io->read = read_handler; return RDATA_OK; } rdata_error_t rdata_set_update_handler(rdata_parser_t *parser, rdata_update_handler update_handler) { parser->io->update = update_handler; return RDATA_OK; } rdata_error_t rdata_set_io_ctx(rdata_parser_t *parser, void *io_ctx) { if (!parser->io->external_io) free(parser->io->io_ctx); parser->io->io_ctx = io_ctx; parser->io->external_io = 1; return RDATA_OK; } librdata-0~20210223+git-85757dc6/src/rdata_read.c000066400000000000000000001604501403031461700207060ustar00rootroot00000000000000 // // rdata_rdata.c // #include #include #include #include #include #include #include #include #include #include #if HAVE_BZIP2 #include #endif #if HAVE_APPLE_COMPRESSION #include #endif #if HAVE_ZLIB #include #endif #if HAVE_LZMA #include #endif #include "rdata.h" #include "rdata_internal.h" #define RDATA_CLASS_POSIXCT 0x01 #define RDATA_CLASS_DATE 0x02 #define STREAM_BUFFER_SIZE 65536 #define MAX_ARRAY_DIMENSIONS 3 /* ICONV_CONST defined by autotools during configure according * to the current platform. Some people copy-paste the source code, so * provide some fallback logic */ #ifndef ICONV_CONST #define ICONV_CONST #endif typedef struct rdata_atom_table_s { int count; char **data; } rdata_atom_table_t; typedef struct rdata_ctx_s { int machine_needs_byteswap; rdata_table_handler table_handler; rdata_column_handler column_handler; rdata_column_name_handler column_name_handler; rdata_column_name_handler row_name_handler; rdata_text_value_handler text_value_handler; rdata_text_value_handler value_label_handler; rdata_column_handler dim_handler; rdata_text_value_handler dim_name_handler; rdata_error_handler error_handler; void *user_ctx; #if HAVE_BZIP2 bz_stream *bz_strm; #endif #if HAVE_APPLE_COMPRESSION compression_stream *compression_strm; #endif #if HAVE_ZLIB z_stream *z_strm; #endif #if HAVE_LZMA lzma_stream *lzma_strm; #endif void *strm_buffer; rdata_io_t *io; size_t bytes_read; rdata_atom_table_t *atom_table; unsigned int column_class; iconv_t converter; int32_t dims[MAX_ARRAY_DIMENSIONS]; bool is_dimnames; } rdata_ctx_t; static int atom_table_add(rdata_atom_table_t *table, char *key); static char *atom_table_lookup(rdata_atom_table_t *table, int index); static rdata_error_t read_environment(const char *table_name, rdata_ctx_t *ctx); static rdata_error_t read_toplevel_object(const char *table_name, const char *key, rdata_ctx_t *ctx); static rdata_error_t read_sexptype_header(rdata_sexptype_info_t *header, rdata_ctx_t *ctx); static rdata_error_t read_length(int32_t *outLength, rdata_ctx_t *ctx); static rdata_error_t read_string_vector_n(int attributes, int32_t length, rdata_text_value_handler text_value_handler, void *callback_ctx, rdata_ctx_t *ctx); static rdata_error_t read_string_vector(int attributes, rdata_text_value_handler text_value_handler, void *callback_ctx, rdata_ctx_t *ctx); static rdata_error_t read_value_vector(rdata_sexptype_header_t header, const char *name, rdata_ctx_t *ctx); static rdata_error_t read_value_vector_cb(rdata_sexptype_header_t header, const char *name, rdata_column_handler column_handler, void *user_ctx, rdata_ctx_t *ctx); static rdata_error_t read_character_string(char **key, rdata_ctx_t *ctx); static rdata_error_t read_generic_list(int attributes, rdata_ctx_t *ctx); static rdata_error_t read_altrep_vector(const char *name, rdata_ctx_t *ctx); static rdata_error_t read_attributes(int (*handle_attribute)(char *key, rdata_sexptype_info_t val_info, rdata_ctx_t *ctx), rdata_ctx_t *ctx); static rdata_error_t recursive_discard(rdata_sexptype_header_t sexptype_header, rdata_ctx_t *ctx); static void *rdata_malloc(size_t len) { if (len == 0) return NULL; return malloc(len); } static void *rdata_realloc(void *buf, size_t len) { if (len == 0) return NULL; return realloc(buf, len); } static int atom_table_add(rdata_atom_table_t *table, char *key) { table->data = realloc(table->data, sizeof(char *) * (table->count + 1)); table->data[table->count++] = strdup(key); return table->count; } static char *atom_table_lookup(rdata_atom_table_t *table, int index) { if (index <= 0 || index > table->count) { return NULL; } return table->data[(index-1)]; } #if HAVE_BZIP2 static ssize_t read_st_bzip2(rdata_ctx_t *ctx, void *buffer, size_t len) { ssize_t bytes_written = 0; int error = 0; int result = BZ_OK; while (1) { ssize_t start_out = ctx->bz_strm->total_out_lo32 + ((ssize_t)ctx->bz_strm->total_out_hi32 << 32LL); ctx->bz_strm->next_out = (char *)buffer + bytes_written; ctx->bz_strm->avail_out = len - bytes_written; result = BZ2_bzDecompress(ctx->bz_strm); if (result != BZ_OK && result != BZ_STREAM_END) { error = -1; break; } bytes_written += ctx->bz_strm->total_out_lo32 + ((ssize_t)ctx->bz_strm->total_out_hi32 << 32LL) - start_out; if (result == BZ_STREAM_END) break; if (ctx->bz_strm->avail_in == 0) { int bytes_read = 0; bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read < 0) { error = bytes_read; break; } if (bytes_read == 0) break; ctx->bz_strm->next_in = ctx->strm_buffer; ctx->bz_strm->avail_in = bytes_read; } if (bytes_written == len) break; } if (error != 0) return error; return bytes_written; } #endif /* HAVE_BZIP2 */ #if HAVE_APPLE_COMPRESSION static ssize_t read_st_compression(rdata_ctx_t *ctx, void *buffer, size_t len) { ssize_t bytes_written = 0; int error = 0; compression_status result = COMPRESSION_STATUS_OK; size_t start_size = len; ctx->compression_strm->dst_ptr = (unsigned char *)buffer; ctx->compression_strm->dst_size = len; while (1) { start_size = ctx->compression_strm->dst_size; result = compression_stream_process(ctx->compression_strm, 0); if (result == COMPRESSION_STATUS_OK) { bytes_written += start_size - ctx->compression_strm->dst_size; } else { error = -1; break; } if (ctx->compression_strm->src_size == 0) { int bytes_read = 0; bytes_read = ctx->io->read(ctx->compression_strm, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read < 0) { error = bytes_read; break; } if (bytes_read == 0) { start_size = ctx->compression_strm->dst_size; result = compression_stream_process(ctx->compression_strm, COMPRESSION_STREAM_FINALIZE); if (result == COMPRESSION_STATUS_END) { bytes_written += start_size - ctx->compression_strm->dst_size; } else { error = -1; } break; } ctx->compression_strm->src_ptr = ctx->strm_buffer; ctx->compression_strm->src_size = bytes_read; } if (bytes_written == len) break; } if (error != 0) return error; return bytes_written; } #endif /* HAVE_APPLE_COMPRESSION */ #if HAVE_ZLIB static ssize_t read_st_z(rdata_ctx_t *ctx, void *buffer, size_t len) { ssize_t bytes_written = 0; int error = 0; int result = Z_OK; while (1) { long start_out = ctx->z_strm->total_out; ctx->z_strm->next_out = (unsigned char *)buffer + bytes_written; ctx->z_strm->avail_out = len - bytes_written; result = inflate(ctx->z_strm, Z_SYNC_FLUSH); if (result != Z_OK && result != Z_STREAM_END) { error = -1; break; } bytes_written += ctx->z_strm->total_out - start_out; if (result == Z_STREAM_END) break; if (ctx->z_strm->avail_in == 0) { int bytes_read = 0; bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read < 0) { error = bytes_read; break; } if (bytes_read == 0) break; ctx->z_strm->next_in = ctx->strm_buffer; ctx->z_strm->avail_in = bytes_read; } if (bytes_written == len) break; } if (error != 0) return error; return bytes_written; } #endif /* HAVE_ZLIB */ #if HAVE_LZMA static ssize_t read_st_lzma(rdata_ctx_t *ctx, void *buffer, size_t len) { ssize_t bytes_written = 0; int error = 0; int result = LZMA_OK; while (1) { long start_out = ctx->lzma_strm->total_out; ctx->lzma_strm->next_out = (unsigned char *)buffer + bytes_written; ctx->lzma_strm->avail_out = len - bytes_written; result = lzma_code(ctx->lzma_strm, LZMA_RUN); if (result != LZMA_OK && result != LZMA_STREAM_END) { error = -1; break; } bytes_written += ctx->lzma_strm->total_out - start_out; if (result == LZMA_STREAM_END) break; if (ctx->lzma_strm->avail_in == 0) { int bytes_read = 0; bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read < 0) { error = bytes_read; break; } if (bytes_read == 0) break; ctx->lzma_strm->next_in = ctx->strm_buffer; ctx->lzma_strm->avail_in = bytes_read; } if (bytes_written == len) break; } if (error != 0) return error; return bytes_written; } #endif /* HAVE_LZMA */ static ssize_t read_st(rdata_ctx_t *ctx, void *buffer, size_t len) { ssize_t bytes_read = 0; if (len == 0) return 0; #if HAVE_BZIP2 if (ctx->bz_strm) { bytes_read = read_st_bzip2(ctx, buffer, len); } else #endif #if HAVE_APPLE_COMPRESSION if (ctx->compression_strm) { bytes_read = read_st_compression(ctx, buffer, len); } else #endif #if HAVE_ZLIB if (ctx->z_strm) { bytes_read = read_st_z(ctx, buffer, len); } else #endif #if HAVE_LZMA if (ctx->lzma_strm) { bytes_read = read_st_lzma(ctx, buffer, len); } else #endif { bytes_read = ctx->io->read(buffer, len, ctx->io->io_ctx); } if (bytes_read > 0) { ctx->bytes_read += bytes_read; } return bytes_read; } static int lseek_st(rdata_ctx_t *ctx, size_t len) { if (0 #if HAVE_BZIP2 || ctx->bz_strm #endif #if HAVE_APPLE_COMPRESSION || ctx->compression_strm #endif #if HAVE_ZLIB || ctx->z_strm #endif #if HAVE_LZMA || ctx->lzma_strm #endif ) { int retval = 0; char *buf = rdata_malloc(len); if (buf == NULL) { retval = -1; } else if (read_st(ctx, buf, len) != len) { retval = -1; } if (buf) free(buf); return retval; } return ctx->io->seek(len, SEEK_CUR, ctx->io->io_ctx); } static rdata_error_t init_bz_stream(rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; ctx->strm_buffer = malloc(STREAM_BUFFER_SIZE); int bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read <= 0) { retval = RDATA_ERROR_READ; goto cleanup; } #if HAVE_BZIP2 ctx->bz_strm = calloc(1, sizeof(bz_stream)); ctx->bz_strm->next_in = ctx->strm_buffer; ctx->bz_strm->avail_in = bytes_read; if (BZ2_bzDecompressInit(ctx->bz_strm, 0, 0) != BZ_OK) { retval = RDATA_ERROR_MALLOC; goto cleanup; } #else retval = RDATA_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; #endif cleanup: return retval; } static rdata_error_t init_z_stream(rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; ctx->strm_buffer = malloc(STREAM_BUFFER_SIZE); int bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read <= 0) { retval = RDATA_ERROR_READ; goto cleanup; } #if HAVE_ZLIB ctx->z_strm = calloc(1, sizeof(z_stream)); ctx->z_strm->next_in = ctx->strm_buffer; ctx->z_strm->avail_in = bytes_read; if (inflateInit2(ctx->z_strm, (15+32)) != Z_OK) { retval = RDATA_ERROR_MALLOC; goto cleanup; } #else retval = RDATA_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; #endif cleanup: return retval; } static rdata_error_t init_lzma_stream(rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; ctx->strm_buffer = malloc(STREAM_BUFFER_SIZE); int bytes_read = ctx->io->read(ctx->strm_buffer, STREAM_BUFFER_SIZE, ctx->io->io_ctx); if (bytes_read <= 0) { retval = RDATA_ERROR_READ; goto cleanup; } #if HAVE_APPLE_COMPRESSION ctx->compression_strm = calloc(1, sizeof(compression_stream)); if (compression_stream_init(ctx->compression_strm, COMPRESSION_STREAM_DECODE, COMPRESSION_LZMA) == COMPRESSION_STATUS_ERROR) { retval = RDATA_ERROR_MALLOC; goto cleanup; } ctx->compression_strm->src_ptr = ctx->strm_buffer; ctx->compression_strm->src_size = bytes_read; #elif HAVE_LZMA ctx->lzma_strm = calloc(1, sizeof(lzma_stream)); if (lzma_stream_decoder(ctx->lzma_strm, UINT64_MAX, 0) != LZMA_OK) { retval = RDATA_ERROR_MALLOC; goto cleanup; } ctx->lzma_strm->next_in = ctx->strm_buffer; ctx->lzma_strm->avail_in = bytes_read; #else retval = RDATA_ERROR_UNSUPPORTED_COMPRESSION; goto cleanup; #endif cleanup: return retval; } static rdata_error_t init_stream(rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; char header[5]; if (ctx->io->read(&header, sizeof(header), ctx->io->io_ctx) != sizeof(header)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->io->seek(0, SEEK_SET, ctx->io->io_ctx) == -1) { retval = RDATA_ERROR_SEEK; goto cleanup; } if (header[0] == 'B' && header[1] == 'Z' && header[2] == 'h' && header[3] >= '0' && header[3] <= '9') { return init_bz_stream(ctx); } if (header[0] == '\x1f' && header[1] == '\x8b') { return init_z_stream(ctx); } if (strncmp("\xFD" "7zXZ", header, sizeof(header)) == 0) { return init_lzma_stream(ctx); } cleanup: return retval; } static rdata_error_t reset_stream(rdata_ctx_t *ctx) { #if HAVE_BZIP2 if (ctx->bz_strm) { BZ2_bzDecompressEnd(ctx->bz_strm); free(ctx->bz_strm); ctx->bz_strm = NULL; } #endif #if HAVE_APPLE_COMPRESSION if (ctx->compression_strm) { compression_stream_destroy(ctx->compression_strm); free(ctx->compression_strm); ctx->compression_strm = NULL; } #endif #if HAVE_ZLIB if (ctx->z_strm) { inflateEnd(ctx->z_strm); free(ctx->z_strm); ctx->z_strm = NULL; } #endif #if HAVE_LZMA if (ctx->lzma_strm) { lzma_end(ctx->lzma_strm); free(ctx->lzma_strm); ctx->lzma_strm = NULL; } #endif if (ctx->io->seek(0, SEEK_SET, ctx->io->io_ctx) == -1) { return RDATA_ERROR_SEEK; } return init_stream(ctx); } static rdata_error_t rdata_convert(char *dst, size_t dst_len, const char *src, size_t src_len, iconv_t converter) { if (dst_len == 0) { return RDATA_ERROR_CONVERT_LONG_STRING; } else if (converter) { size_t dst_left = dst_len - 1; char *dst_end = dst; size_t status = iconv(converter, (ICONV_CONST char **)&src, &src_len, &dst_end, &dst_left); if (status == (size_t)-1) { if (errno == E2BIG) { return RDATA_ERROR_CONVERT_LONG_STRING; } else if (errno == EILSEQ) { return RDATA_ERROR_CONVERT_BAD_STRING; } else if (errno != EINVAL) { /* EINVAL indicates improper truncation; accept it */ return RDATA_ERROR_CONVERT; } } dst[dst_len - dst_left - 1] = '\0'; } else if (src_len + 1 > dst_len) { return RDATA_ERROR_CONVERT_LONG_STRING; } else { memcpy(dst, src, src_len); dst[src_len] = '\0'; } return RDATA_OK; } rdata_ctx_t *rdata_ctx_init(rdata_io_t *io, const char *filename) { int fd = io->open(filename, io->io_ctx); if (fd == -1) { return NULL; } rdata_ctx_t *ctx = calloc(1, sizeof(rdata_ctx_t)); rdata_atom_table_t *atom_table = malloc(sizeof(rdata_atom_table_t)); atom_table->count = 0; atom_table->data = NULL; ctx->atom_table = atom_table; ctx->machine_needs_byteswap = 0; if (machine_is_little_endian()) { ctx->machine_needs_byteswap = 1; } ctx->io = io; return ctx; } void free_rdata_ctx(rdata_ctx_t *ctx) { if (ctx->io) { ctx->io->close(ctx->io->io_ctx); } if (ctx->atom_table) { if (ctx->atom_table->data) { int i; for (i=0; iatom_table->count; i++) free(ctx->atom_table->data[i]); free(ctx->atom_table->data); } free(ctx->atom_table); } #if HAVE_BZIP2 if (ctx->bz_strm) { BZ2_bzDecompressEnd(ctx->bz_strm); free(ctx->bz_strm); } #endif #if HAVE_APPLE_COMPRESSION if (ctx->compression_strm) { compression_stream_destroy(ctx->compression_strm); free(ctx->compression_strm); } #endif #if HAVE_ZLIB if (ctx->z_strm) { inflateEnd(ctx->z_strm); free(ctx->z_strm); } #endif #if HAVE_LZMA if (ctx->lzma_strm) { lzma_end(ctx->lzma_strm); free(ctx->lzma_strm); } #endif if (ctx->strm_buffer) { free(ctx->strm_buffer); } if (ctx->converter) { iconv_close(ctx->converter); } free(ctx); } rdata_error_t rdata_parse(rdata_parser_t *parser, const char *filename, void *user_ctx) { int is_rdata = 0; rdata_error_t retval = RDATA_OK; rdata_v2_header_t v2_header; rdata_ctx_t *ctx = rdata_ctx_init(parser->io, filename); char *encoding = NULL; if (ctx == NULL) { retval = RDATA_ERROR_OPEN; goto cleanup; } ctx->user_ctx = user_ctx; ctx->table_handler = parser->table_handler; ctx->column_handler = parser->column_handler; ctx->column_name_handler = parser->column_name_handler; ctx->row_name_handler = parser->row_name_handler; ctx->text_value_handler = parser->text_value_handler; ctx->value_label_handler = parser->value_label_handler; ctx->dim_handler = parser->dim_handler; ctx->dim_name_handler = parser->dim_name_handler; ctx->error_handler = parser->error_handler; ctx->is_dimnames = false; if ((retval = init_stream(ctx)) != RDATA_OK) { goto cleanup; } char header_line[5]; if (read_st(ctx, &header_line, sizeof(header_line)) != sizeof(header_line)) { retval = RDATA_ERROR_READ; goto cleanup; } if (memcmp("RDX", header_line, 3) == 0 && header_line[4] == '\n') { is_rdata = 1; } else { reset_stream(ctx); } if (read_st(ctx, &v2_header, sizeof(v2_header)) != sizeof(v2_header)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { v2_header.format_version = byteswap4(v2_header.format_version); v2_header.writer_version = byteswap4(v2_header.writer_version); v2_header.reader_version = byteswap4(v2_header.reader_version); } if (is_rdata && v2_header.format_version != header_line[3] - '0') { retval = RDATA_ERROR_PARSE; goto cleanup; } if (v2_header.format_version == 3) { retval = read_character_string(&encoding, ctx); if (retval != RDATA_OK) goto cleanup; if (strcmp("UTF-8", encoding) != 0) { if ((ctx->converter = iconv_open("UTF-8", encoding)) == (iconv_t)-1) { ctx->converter = NULL; retval = RDATA_ERROR_UNSUPPORTED_CHARSET; goto cleanup; } } } if (is_rdata) { retval = read_environment(NULL, ctx); } else { retval = read_toplevel_object(NULL, NULL, ctx); } if (retval != RDATA_OK) goto cleanup; char test; if (read_st(ctx, &test, 1) == 1) { retval = RDATA_ERROR_PARSE; goto cleanup; } cleanup: if (encoding) free(encoding); if (ctx) { free_rdata_ctx(ctx); } return retval; } static rdata_error_t read_toplevel_object(const char *table_name, const char *key, rdata_ctx_t *ctx) { rdata_sexptype_info_t sexptype_info; rdata_error_t retval = RDATA_OK; if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type == RDATA_SEXPTYPE_REAL_VECTOR || sexptype_info.header.type == RDATA_SEXPTYPE_INTEGER_VECTOR || sexptype_info.header.type == RDATA_SEXPTYPE_LOGICAL_VECTOR) { if (table_name == NULL && ctx->table_handler) { if (ctx->table_handler(key, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } if ((retval = read_value_vector(sexptype_info.header, key, ctx)) != RDATA_OK) goto cleanup; } else if (sexptype_info.header.type == RDATA_SEXPTYPE_CHARACTER_VECTOR) { if (table_name == NULL && ctx->table_handler) { if (ctx->table_handler(key, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } int32_t length; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; if (ctx->column_handler) { if (ctx->column_handler(key, RDATA_TYPE_STRING, NULL, length, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } if ((retval = read_string_vector_n(sexptype_info.header.attributes, length, ctx->text_value_handler, ctx->user_ctx, ctx)) != RDATA_OK) goto cleanup; } else if (sexptype_info.header.type == RDATA_PSEUDO_SXP_ALTREP) { if (table_name == NULL && ctx->table_handler) { if (ctx->table_handler(key, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } if ((retval = read_altrep_vector(key, ctx)) != RDATA_OK) goto cleanup; } else if (sexptype_info.header.type == RDATA_SEXPTYPE_GENERIC_VECTOR && sexptype_info.header.object && sexptype_info.header.attributes) { if (table_name != NULL) { retval = recursive_discard(sexptype_info.header, ctx); } else { if (ctx->table_handler) { if (ctx->table_handler(key, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } retval = read_generic_list(sexptype_info.header.attributes, ctx); } if (retval != RDATA_OK) goto cleanup; } else { if ((retval = recursive_discard(sexptype_info.header, ctx)) != RDATA_OK) goto cleanup; } cleanup: return retval; } static rdata_error_t read_environment(const char *table_name, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; char *key = NULL; while (1) { rdata_sexptype_info_t sexptype_info; if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type == RDATA_PSEUDO_SXP_NIL) break; if (sexptype_info.header.type != RDATA_SEXPTYPE_PAIRLIST) { if ((retval = recursive_discard(sexptype_info.header, ctx)) != RDATA_OK) goto cleanup; continue; } if ((key = atom_table_lookup(ctx->atom_table, sexptype_info.ref)) == NULL) { retval = RDATA_ERROR_PARSE; goto cleanup; } if ((retval = read_toplevel_object(table_name, key, ctx)) != RDATA_OK) goto cleanup; } cleanup: return retval; } static rdata_error_t read_sexptype_header(rdata_sexptype_info_t *header_info, rdata_ctx_t *ctx) { uint32_t sexptype; rdata_sexptype_header_t header; rdata_error_t retval = RDATA_OK; if (read_st(ctx, &sexptype, sizeof(sexptype)) != sizeof(sexptype)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) sexptype = byteswap4(sexptype); memcpy(&header, &sexptype, sizeof(sexptype)); uint32_t attributes = 0, tag = 0, ref = 0; if (header.type == RDATA_SEXPTYPE_PAIRLIST_ATTR) { header.attributes = 1; header.type = RDATA_SEXPTYPE_PAIRLIST; } if (header.type == RDATA_SEXPTYPE_LANGUAGE_OBJECT_ATTR) { header.attributes = 1; header.type = RDATA_SEXPTYPE_LANGUAGE_OBJECT; } if (header.type == RDATA_SEXPTYPE_PAIRLIST) { if (header.attributes) { if (read_st(ctx, &attributes, sizeof(attributes)) != sizeof(attributes)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) header_info->attributes = byteswap4(header_info->attributes); } if (header.tag) { if (read_st(ctx, &tag, sizeof(tag)) != sizeof(tag)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) tag = byteswap4(tag); } if (tag == 1) { rdata_sexptype_info_t key_info; if ((retval = read_sexptype_header(&key_info, ctx)) != RDATA_OK) goto cleanup; if (key_info.header.type != RDATA_SEXPTYPE_CHARACTER_STRING) { retval = RDATA_ERROR_PARSE; goto cleanup; } char *key = NULL; if ((retval = read_character_string(&key, ctx)) != RDATA_OK) goto cleanup; ref = atom_table_add(ctx->atom_table, key); free(key); } else if ((tag & 0xFF) == RDATA_PSEUDO_SXP_REF) { ref = (tag >> 8); } } if (header.type == RDATA_PSEUDO_SXP_REF) { ref = (sexptype >> 8); } header_info->header = header; header_info->attributes = attributes; header_info->tag = tag; header_info->ref = ref; cleanup: return retval; } static int handle_class_name(const char *buf, int i, void *ctx) { unsigned int *column_class = (unsigned int *)ctx; if (buf) { if (strcmp(buf, "POSIXct") == 0) { *column_class |= RDATA_CLASS_POSIXCT; } if (strcmp(buf, "Date") == 0) { *column_class |= RDATA_CLASS_DATE; } } return RDATA_OK; } static int handle_vector_attribute(char *key, rdata_sexptype_info_t val_info, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; if (strcmp(key, "levels") == 0) { retval = read_string_vector(val_info.header.attributes, ctx->value_label_handler, ctx->user_ctx, ctx); } else if (strcmp(key, "class") == 0) { ctx->column_class = 0; retval = read_string_vector(val_info.header.attributes, &handle_class_name, &ctx->column_class, ctx); } else if (strcmp(key, "dim") == 0) { if (val_info.header.type == RDATA_SEXPTYPE_INTEGER_VECTOR) { int32_t length; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; if (length <= sizeof(ctx->dims)/sizeof(ctx->dims[0])) { int buf_len = length * sizeof(int32_t); if (read_st(ctx, ctx->dims, buf_len) != buf_len) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { int i; for (i=0; idims[i] = byteswap4(ctx->dims[i]); } } if (ctx->dim_handler) { if (ctx->dim_handler(key, RDATA_TYPE_INT32, ctx->dims, length, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; } } } } } else if (strcmp(key, "dimnames") == 0) { ctx->is_dimnames = true; retval = read_generic_list(val_info.header.attributes, ctx); } else { retval = recursive_discard(val_info.header, ctx); } cleanup: return retval; } static rdata_error_t read_character_string(char **key, rdata_ctx_t *ctx) { uint32_t length; char *string = NULL; char *utf8_string = NULL; rdata_error_t retval = RDATA_OK; if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) length = byteswap4(length); if (length == -1 || length == 0) { *key = strdup(""); return RDATA_OK; } if (length < 0) { return RDATA_ERROR_PARSE; } if ((string = rdata_malloc(length)) == NULL) { retval = RDATA_ERROR_MALLOC; goto cleanup; } if (read_st(ctx, string, length) != length) { retval = RDATA_ERROR_READ; goto cleanup; } if ((utf8_string = rdata_malloc(4*length+1)) == NULL) { retval = RDATA_ERROR_MALLOC; goto cleanup; } retval = rdata_convert(utf8_string, 4*length+1, string, length, ctx->converter); if (retval != RDATA_OK) goto cleanup; cleanup: if (string) free(string); if (retval == RDATA_OK) { *key = utf8_string; } else if (utf8_string) { free(utf8_string); } return retval; } static int handle_data_frame_attribute(char *key, rdata_sexptype_info_t val_info, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; if (strcmp(key, "names") == 0 && val_info.header.type == RDATA_SEXPTYPE_CHARACTER_VECTOR) { retval = read_string_vector(val_info.header.attributes, ctx->column_name_handler, ctx->user_ctx, ctx); } else if (strcmp(key, "row.names") == 0 && val_info.header.type == RDATA_SEXPTYPE_CHARACTER_VECTOR) { retval = read_string_vector(val_info.header.attributes, ctx->row_name_handler, ctx->user_ctx, ctx); } else if (strcmp(key, "label.table") == 0) { retval = recursive_discard(val_info.header, ctx); } else { retval = recursive_discard(val_info.header, ctx); } return retval; } static rdata_error_t read_attributes(int (*handle_attribute)(char *key, rdata_sexptype_info_t val_info, rdata_ctx_t *ctx), rdata_ctx_t *ctx) { rdata_sexptype_info_t pairlist_info, val_info; rdata_error_t retval = RDATA_OK; char *key = NULL; retval = read_sexptype_header(&pairlist_info, ctx); if (retval != RDATA_OK) goto cleanup; while (pairlist_info.header.type == RDATA_SEXPTYPE_PAIRLIST) { /* value */ if ((retval = read_sexptype_header(&val_info, ctx)) != RDATA_OK) goto cleanup; if (handle_attribute) { if ((key = atom_table_lookup(ctx->atom_table, pairlist_info.ref)) == NULL) { retval = RDATA_ERROR_PARSE; goto cleanup; } if ((retval = handle_attribute(key, val_info, ctx)) != RDATA_OK) goto cleanup; } else { if ((retval = recursive_discard(val_info.header, ctx)) != RDATA_OK) goto cleanup; } /* next */ if ((retval = read_sexptype_header(&pairlist_info, ctx)) != RDATA_OK) goto cleanup; } cleanup: return retval; } static rdata_error_t read_wrap_real(const char *name, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; rdata_sexptype_info_t sexptype_info; /* pairlist */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_SEXPTYPE_PAIRLIST) { retval = RDATA_ERROR_PARSE; goto cleanup; } /* representation */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if ((retval = read_value_vector(sexptype_info.header, name, ctx)) != RDATA_OK) goto cleanup; /* alt representation */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if ((retval = recursive_discard(sexptype_info.header, ctx)) != RDATA_OK) goto cleanup; /* nil */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_PSEUDO_SXP_NIL) { retval = RDATA_ERROR_PARSE; goto cleanup; } cleanup: return retval; } static rdata_error_t read_compact_intseq(const char *name, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; rdata_sexptype_info_t sexptype_info; if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; int32_t length; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; if (length != 3) { retval = RDATA_ERROR_PARSE; goto cleanup; } double vals[3]; if (read_st(ctx, vals, sizeof(vals)) != sizeof(vals)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { vals[0] = byteswap_double(vals[0]); vals[1] = byteswap_double(vals[1]); vals[2] = byteswap_double(vals[2]); } if (sexptype_info.header.attributes) { if ((retval = read_attributes(&handle_vector_attribute, ctx)) != RDATA_OK) goto cleanup; } if (ctx->column_handler) { int32_t *integers = rdata_malloc(vals[0] * sizeof(int32_t)); int32_t val = vals[1]; for (int i=0; icolumn_handler(name, RDATA_TYPE_INT32, integers, vals[0], ctx->user_ctx); free(integers); if (cb_retval) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } /* nil */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_PSEUDO_SXP_NIL) { retval = RDATA_ERROR_PARSE; goto cleanup; } cleanup: return retval; } static int deferred_string_handler(const char *name, enum rdata_type_e type, void *vals, long length, void *user_ctx) { rdata_ctx_t *ctx = (rdata_ctx_t *)user_ctx; if (ctx->column_handler) ctx->column_handler(name, RDATA_TYPE_STRING, NULL, length, ctx->user_ctx); if (ctx->text_value_handler) { for (int i=0; itext_value_handler(buf, i, ctx->user_ctx); } } return 0; } static rdata_error_t read_deferred_string(const char *name, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; rdata_sexptype_info_t sexptype_info; /* pairlist */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_SEXPTYPE_PAIRLIST) { retval = RDATA_ERROR_PARSE; goto cleanup; } /* representation */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if ((retval = read_value_vector_cb(sexptype_info.header, name, &deferred_string_handler, ctx, ctx)) != RDATA_OK) goto cleanup; /* alt representation */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if ((retval = recursive_discard(sexptype_info.header, ctx)) != RDATA_OK) goto cleanup; /* nil */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_PSEUDO_SXP_NIL) { retval = RDATA_ERROR_PARSE; goto cleanup; } cleanup: return retval; } static rdata_error_t read_altrep_vector(const char *name, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; rdata_sexptype_info_t sexptype_info; /* pairlist */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_SEXPTYPE_PAIRLIST) { retval = RDATA_ERROR_PARSE; goto cleanup; } /* class name */ char *class = NULL; if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type == RDATA_SEXPTYPE_SYMBOL) { if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_SEXPTYPE_CHARACTER_STRING) { retval = RDATA_ERROR_PARSE; goto cleanup; } if ((retval = read_character_string(&class, ctx)) != RDATA_OK) goto cleanup; atom_table_add(ctx->atom_table, class); } else if (sexptype_info.header.type == RDATA_PSEUDO_SXP_REF) { if ((class = atom_table_lookup(ctx->atom_table, sexptype_info.ref)) == NULL) { retval = RDATA_ERROR_PARSE; goto cleanup; } } else { retval = RDATA_ERROR_PARSE; goto cleanup; } /* package and class ID */ if ((retval = read_sexptype_header(&sexptype_info, ctx)) != RDATA_OK) goto cleanup; if (sexptype_info.header.type != RDATA_SEXPTYPE_PAIRLIST) { retval = RDATA_ERROR_PARSE; goto cleanup; } if ((retval = recursive_discard(sexptype_info.header, ctx)) != RDATA_OK) goto cleanup; if (strcmp(class, "wrap_real") == 0) { if ((retval = read_wrap_real(name, ctx)) != RDATA_OK) goto cleanup; } else if (strcmp(class, "compact_intseq") == 0) { if ((retval = read_compact_intseq(name, ctx)) != RDATA_OK) goto cleanup; } else if (strcmp(class, "deferred_string") == 0) { if ((retval = read_deferred_string(name, ctx)) != RDATA_OK) goto cleanup; } else { if (ctx->error_handler) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Unrecognized ALTREP class: %s\n", class); ctx->error_handler(error_buf, ctx->user_ctx); } retval = RDATA_ERROR_UNSUPPORTED_STORAGE_CLASS; } cleanup: return retval; } static rdata_error_t read_generic_list(int attributes, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; int32_t length; int i; rdata_sexptype_info_t sexptype_info; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; for (i=0; iis_dimnames) { retval = read_string_vector_n(sexptype_info.header.attributes, vec_length, ctx->dim_name_handler, ctx->user_ctx, ctx); } else { if (ctx->column_handler) { if (ctx->column_handler(NULL, RDATA_TYPE_STRING, NULL, vec_length, ctx->user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } retval = read_string_vector_n(sexptype_info.header.attributes, vec_length, ctx->text_value_handler, ctx->user_ctx, ctx); } } else if (sexptype_info.header.type == RDATA_PSEUDO_SXP_ALTREP) { retval = read_altrep_vector(NULL, ctx); } else if (sexptype_info.header.type == RDATA_PSEUDO_SXP_NIL) { if (ctx->is_dimnames && ctx->dim_name_handler && i < sizeof(ctx->dims)/sizeof(ctx->dims[0])) { int j; for (j=0; jdims[i]; j++) { ctx->dim_name_handler(NULL, j, ctx->user_ctx); } } } else { retval = read_value_vector(sexptype_info.header, NULL, ctx); } if (retval != RDATA_OK) goto cleanup; } if (attributes) { if ((retval = read_attributes(&handle_data_frame_attribute, ctx)) != RDATA_OK) goto cleanup; } cleanup: if (ctx->is_dimnames) ctx->is_dimnames = false; return retval; } static rdata_error_t read_length(int32_t *outLength, rdata_ctx_t *ctx) { int32_t length; rdata_error_t retval = RDATA_OK; if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) length = byteswap4(length); if (outLength) *outLength = length; cleanup: return retval; } static rdata_error_t read_string_vector_n(int attributes, int32_t length, rdata_text_value_handler text_value_handler, void *callback_ctx, rdata_ctx_t *ctx) { int32_t string_length; rdata_error_t retval = RDATA_OK; rdata_sexptype_info_t info; size_t buffer_size = 4096; char *buffer = NULL; size_t utf8_buffer_size = 16384; char *utf8_buffer = NULL; int i; buffer = rdata_malloc(buffer_size); if (ctx->converter) utf8_buffer = rdata_malloc(utf8_buffer_size); for (i=0; i buffer_size) { buffer_size = string_length + 1; if ((buffer = rdata_realloc(buffer, buffer_size)) == NULL) { retval = RDATA_ERROR_MALLOC; goto cleanup; } } if (string_length >= 0) { if (read_st(ctx, buffer, string_length) != string_length) { retval = RDATA_ERROR_READ; goto cleanup; } buffer[string_length] = '\0'; } if (text_value_handler) { int cb_retval = 0; if (string_length < 0) { cb_retval = text_value_handler(NULL, i, callback_ctx); } else if (!ctx->converter) { cb_retval = text_value_handler(buffer, i, callback_ctx); } else { if (4*string_length + 1 > utf8_buffer_size) { utf8_buffer_size = 4*string_length + 1; if ((utf8_buffer = rdata_realloc(utf8_buffer, utf8_buffer_size)) == NULL) { retval = RDATA_ERROR_MALLOC; goto cleanup; } } retval = rdata_convert(utf8_buffer, utf8_buffer_size, buffer, string_length, ctx->converter); if (retval != RDATA_OK) goto cleanup; cb_retval = text_value_handler(utf8_buffer, i, callback_ctx); } if (cb_retval) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } } if (attributes) { if ((retval = read_attributes(&handle_vector_attribute, ctx)) != RDATA_OK) goto cleanup; } cleanup: if (buffer) free(buffer); if (utf8_buffer) free(utf8_buffer); return retval; } static rdata_error_t read_string_vector(int attributes, rdata_text_value_handler text_value_handler, void *callback_ctx, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; int32_t length; if ((retval = read_length(&length, ctx)) != RDATA_OK) return retval; return read_string_vector_n(attributes, length, text_value_handler, callback_ctx, ctx); } static rdata_error_t read_value_vector_cb(rdata_sexptype_header_t header, const char *name, rdata_column_handler column_handler, void *user_ctx, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; int32_t length; size_t input_elem_size = 0; void *vals = NULL; size_t buf_len = 0; enum rdata_type_e output_data_type; int i; switch (header.type) { case RDATA_SEXPTYPE_REAL_VECTOR: input_elem_size = sizeof(double); output_data_type = RDATA_TYPE_REAL; break; case RDATA_SEXPTYPE_INTEGER_VECTOR: input_elem_size = sizeof(int32_t); output_data_type = RDATA_TYPE_INT32; break; case RDATA_SEXPTYPE_LOGICAL_VECTOR: input_elem_size = sizeof(int32_t); output_data_type = RDATA_TYPE_LOGICAL; break; default: retval = RDATA_ERROR_PARSE; break; } if (retval != RDATA_OK) goto cleanup; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; buf_len = length * input_elem_size; if (buf_len) { vals = rdata_malloc(buf_len); if (vals == NULL) { retval = RDATA_ERROR_MALLOC; goto cleanup; } if (read_st(ctx, vals, buf_len) != buf_len) { retval = RDATA_ERROR_READ; goto cleanup; } if (ctx->machine_needs_byteswap) { if (input_elem_size == sizeof(double)) { double *d_vals = (double *)vals; for (i=0; icolumn_class = 0; if (header.attributes) { if ((retval = read_attributes(&handle_vector_attribute, ctx)) != RDATA_OK) goto cleanup; } if (ctx->column_class == RDATA_CLASS_POSIXCT) output_data_type = RDATA_TYPE_TIMESTAMP; if (ctx->column_class == RDATA_CLASS_DATE) output_data_type = RDATA_TYPE_DATE; if (column_handler) { if (column_handler(name, output_data_type, vals, length, user_ctx)) { retval = RDATA_ERROR_USER_ABORT; goto cleanup; } } cleanup: if (vals) free(vals); return retval; } static rdata_error_t read_value_vector(rdata_sexptype_header_t header, const char *name, rdata_ctx_t *ctx) { return read_value_vector_cb(header, name, ctx->column_handler, ctx->user_ctx, ctx); } static rdata_error_t discard_vector(rdata_sexptype_header_t sexptype_header, size_t element_size, rdata_ctx_t *ctx) { int32_t length; rdata_error_t retval = RDATA_OK; if ((retval = read_length(&length, ctx)) != RDATA_OK) goto cleanup; if (length > 0) { if (lseek_st(ctx, length * element_size) == -1) { return RDATA_ERROR_SEEK; } } else if (ctx->error_handler) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Vector with non-positive length: %d\n", length); ctx->error_handler(error_buf, ctx->user_ctx); } if (sexptype_header.attributes) { rdata_sexptype_info_t temp_info; if ((retval = read_sexptype_header(&temp_info, ctx)) != RDATA_OK) goto cleanup; retval = recursive_discard(temp_info.header, ctx); } cleanup: return retval; } static rdata_error_t discard_character_string(int add_to_table, rdata_ctx_t *ctx) { rdata_error_t retval = RDATA_OK; char *key = NULL; if ((retval = read_character_string(&key, ctx)) != RDATA_OK) goto cleanup; if (strlen(key) > 0 && add_to_table) { atom_table_add(ctx->atom_table, key); } free(key); cleanup: return retval; } static rdata_error_t discard_pairlist(rdata_sexptype_header_t sexptype_header, rdata_ctx_t *ctx) { rdata_sexptype_info_t temp_info; rdata_error_t error = 0; while (1) { switch (sexptype_header.type) { case RDATA_SEXPTYPE_PAIRLIST: /* value */ if ((error = read_sexptype_header(&temp_info, ctx)) != RDATA_OK) return error; if ((error = recursive_discard(temp_info.header, ctx)) != RDATA_OK) return error; /* tail */ if ((error = read_sexptype_header(&temp_info, ctx)) != RDATA_OK) return error; sexptype_header = temp_info.header; break; case RDATA_PSEUDO_SXP_NIL: goto done; default: return RDATA_ERROR_PARSE; } } done: return 0; } static rdata_error_t recursive_discard(rdata_sexptype_header_t sexptype_header, rdata_ctx_t *ctx) { uint32_t length; rdata_sexptype_info_t info; rdata_sexptype_info_t prot, tag; rdata_error_t error = 0; int i; switch (sexptype_header.type) { case RDATA_SEXPTYPE_SYMBOL: if ((error = read_sexptype_header(&info, ctx)) != RDATA_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != RDATA_OK) goto cleanup; break; case RDATA_PSEUDO_SXP_PERSIST: case RDATA_PSEUDO_SXP_NAMESPACE: case RDATA_PSEUDO_SXP_PACKAGE: if ((error = read_sexptype_header(&info, ctx)) != RDATA_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != RDATA_OK) goto cleanup; break; case RDATA_SEXPTYPE_BUILTIN_FUNCTION: case RDATA_SEXPTYPE_SPECIAL_FUNCTION: error = discard_character_string(0, ctx); break; case RDATA_SEXPTYPE_PAIRLIST: error = discard_pairlist(sexptype_header, ctx); break; case RDATA_SEXPTYPE_CHARACTER_STRING: error = discard_character_string(1, ctx); break; case RDATA_SEXPTYPE_RAW_VECTOR: error = discard_vector(sexptype_header, 1, ctx); break; case RDATA_SEXPTYPE_LOGICAL_VECTOR: error = discard_vector(sexptype_header, 4, ctx); break; case RDATA_SEXPTYPE_INTEGER_VECTOR: error = discard_vector(sexptype_header, 4, ctx); break; case RDATA_SEXPTYPE_REAL_VECTOR: error = discard_vector(sexptype_header, 8, ctx); break; case RDATA_SEXPTYPE_COMPLEX_VECTOR: error = discard_vector(sexptype_header, 16, ctx); break; case RDATA_SEXPTYPE_CHARACTER_VECTOR: case RDATA_SEXPTYPE_GENERIC_VECTOR: case RDATA_SEXPTYPE_EXPRESSION_VECTOR: if (read_st(ctx, &length, sizeof(length)) != sizeof(length)) { return RDATA_ERROR_READ; } if (ctx->machine_needs_byteswap) length = byteswap4(length); for (i=0; ifd, sizeof(uint32_t), SEEK_CUR) == -1) { return RDATA_ERROR_SEEK; } } */ break; case RDATA_PSEUDO_SXP_REF: case RDATA_PSEUDO_SXP_NIL: case RDATA_PSEUDO_SXP_GLOBAL_ENVIRONMENT: case RDATA_PSEUDO_SXP_UNBOUND_VALUE: case RDATA_PSEUDO_SXP_MISSING_ARGUMENT: case RDATA_PSEUDO_SXP_BASE_NAMESPACE: case RDATA_PSEUDO_SXP_EMPTY_ENVIRONMENT: case RDATA_PSEUDO_SXP_BASE_ENVIRONMENT: break; case RDATA_PSEUDO_SXP_ALTREP: /* class, package, type */ if ((error = read_sexptype_header(&info, ctx)) != RDATA_OK) goto cleanup; if ((error = recursive_discard(info.header, ctx)) != RDATA_OK) goto cleanup; while (1) { if ((error = read_sexptype_header(&info, ctx)) != RDATA_OK) goto cleanup; if (info.header.type == RDATA_SEXPTYPE_PAIRLIST) continue; if (info.header.type == RDATA_PSEUDO_SXP_NIL) break; if ((error = recursive_discard(info.header, ctx)) != RDATA_OK) goto cleanup; } break; default: if (ctx->error_handler) { char error_buf[1024]; snprintf(error_buf, sizeof(error_buf), "Unhandled S-Expression: %d", sexptype_header.type); ctx->error_handler(error_buf, ctx->user_ctx); } return RDATA_ERROR_UNSUPPORTED_S_EXPRESSION; } cleanup: return error; } librdata-0~20210223+git-85757dc6/src/rdata_write.c000066400000000000000000000405071403031461700211250ustar00rootroot00000000000000 #include #include #include #include "CKHashTable.h" #include "rdata.h" #include "rdata_internal.h" #define R_TAG 0x01 #define R_OBJECT 0x02 #define R_ATTRIBUTES 0x04 #define INITIAL_COLUMNS_CAPACITY 100 #ifdef _WIN32 #define timegm _mkgmtime #endif rdata_writer_t *rdata_writer_init(rdata_data_writer write_callback, rdata_file_format_t format) { rdata_writer_t *writer = calloc(1, sizeof(rdata_writer_t)); writer->file_format = format; writer->bswap = machine_is_little_endian(); writer->atom_table = ck_hash_table_init(100, 24); writer->data_writer = write_callback; writer->columns_capacity = INITIAL_COLUMNS_CAPACITY; writer->columns = malloc(writer->columns_capacity * sizeof(rdata_column_t *)); return writer; } void rdata_writer_free(rdata_writer_t *writer) { ck_hash_table_free(writer->atom_table); int i, j; for (i=0; icolumns_count; i++) { rdata_column_t *column = writer->columns[i]; for (j=0; jfactor_count; j++) { free(column->factor[j]); } free(column->factor); free(column); } free(writer->columns); free(writer); } rdata_column_t *rdata_add_column(rdata_writer_t *writer, const char *name, rdata_type_t type) { if (writer->columns_count == writer->columns_capacity) { writer->columns_capacity *= 2; writer->columns = realloc(writer->columns, writer->columns_capacity * sizeof(rdata_column_t *)); } rdata_column_t *new_column = calloc(1, sizeof(rdata_column_t)); new_column->index = writer->columns_count++; writer->columns[new_column->index] = new_column; new_column->type = type; if (name) { snprintf(new_column->name, sizeof(new_column->name), "%s", name); } return new_column; } rdata_column_t *rdata_get_column(rdata_writer_t *writer, int32_t j) { return writer->columns[j]; } rdata_error_t rdata_column_set_label(rdata_column_t *column, const char *label) { snprintf(column->label, sizeof(column->label), "%s", label); return RDATA_OK; } rdata_error_t rdata_column_add_factor(rdata_column_t *column, const char *factor) { if (column->type != RDATA_TYPE_INT32) return RDATA_ERROR_FACTOR; char *factor_copy = malloc(strlen(factor)+1); strcpy(factor_copy, factor); column->factor_count++; column->factor = realloc(column->factor, sizeof(char *) * column->factor_count); column->factor[column->factor_count-1] = factor_copy; return RDATA_OK; } static rdata_error_t rdata_write_bytes(rdata_writer_t *writer, const void *data, size_t len) { size_t bytes_written = writer->data_writer(data, len, writer->user_ctx); if (bytes_written < len) { return RDATA_ERROR_WRITE; } writer->bytes_written += bytes_written; return RDATA_OK; } static rdata_error_t rdata_write_integer(rdata_writer_t *writer, int32_t val) { if (writer->bswap) { val = byteswap4(val); } return rdata_write_bytes(writer, &val, sizeof(val)); } static rdata_error_t rdata_write_double(rdata_writer_t *writer, double val) { if (writer->bswap) { val = byteswap_double(val); } return rdata_write_bytes(writer, &val, sizeof(val)); } static rdata_error_t rdata_write_header(rdata_writer_t *writer, int type, int flags) { rdata_sexptype_header_t header; memset(&header, 0, sizeof(header)); header.type = type; header.object = !!(flags & R_OBJECT); header.tag = !!(flags & R_TAG); header.attributes = !!(flags & R_ATTRIBUTES); uint32_t sexp_int; memcpy(&sexp_int, &header, sizeof(header)); return rdata_write_integer(writer, sexp_int); } static rdata_error_t rdata_write_string(rdata_writer_t *writer, const char *string) { rdata_error_t retval = RDATA_OK; retval = rdata_write_header(writer, RDATA_SEXPTYPE_CHARACTER_STRING, 0); if (retval != RDATA_OK) goto cleanup; ssize_t len = string ? strlen(string) : -1; retval = rdata_write_integer(writer, len); if (retval != RDATA_OK) goto cleanup; if (len > 0) return rdata_write_bytes(writer, string, len); cleanup: return retval; } static rdata_error_t rdata_write_pairlist_key(rdata_writer_t *writer, const char *key) { rdata_error_t retval = RDATA_OK; ck_hash_table_t *atom_table = (ck_hash_table_t *)writer->atom_table; uint64_t ref = (uint64_t)ck_str_hash_lookup(key, atom_table); if (ref == 0) { ck_str_hash_insert(key, (void *)(atom_table->count + 1), atom_table); retval = rdata_write_integer(writer, 1); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_string(writer, key); } else { retval = rdata_write_integer(writer, (ref << 8) | 0xFF); } cleanup: return retval; } static rdata_error_t rdata_write_pairlist_header(rdata_writer_t *writer, const char *key) { rdata_error_t retval = RDATA_OK; retval = rdata_write_header(writer, RDATA_SEXPTYPE_PAIRLIST, R_TAG); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_pairlist_key(writer, key); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_write_attributed_vector_header(rdata_writer_t *writer, int type, int32_t size) { rdata_error_t retval = RDATA_OK; retval = rdata_write_header(writer, type, R_OBJECT | R_ATTRIBUTES); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_integer(writer, size); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_write_simple_vector_header(rdata_writer_t *writer, int type, int32_t size) { rdata_error_t retval = RDATA_OK; retval = rdata_write_header(writer, type, 0); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_integer(writer, size); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_write_class_pairlist(rdata_writer_t *writer, const char *class) { rdata_error_t retval = RDATA_OK; retval = rdata_write_pairlist_header(writer, "class"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, 1); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_string(writer, class); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } rdata_error_t rdata_begin_file(rdata_writer_t *writer, void *user_ctx) { rdata_error_t retval = RDATA_OK; writer->user_ctx = user_ctx; if (writer->file_format == RDATA_WORKSPACE) { retval = rdata_write_bytes(writer, "RDX2\n", 5); if (retval != RDATA_OK) goto cleanup; } rdata_v2_header_t v2_header; memcpy(v2_header.header, "X\n", sizeof("X\n")-1); v2_header.format_version = 2; v2_header.reader_version = 131840; v2_header.writer_version = 131840; if (writer->bswap) { v2_header.format_version = byteswap4(v2_header.format_version); v2_header.reader_version = byteswap4(v2_header.reader_version); v2_header.writer_version = byteswap4(v2_header.writer_version); } retval = rdata_write_bytes(writer, &v2_header, sizeof(v2_header)); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } rdata_error_t rdata_begin_table(rdata_writer_t *writer, const char *variable_name) { rdata_error_t retval = RDATA_OK; if (writer->file_format == RDATA_WORKSPACE) { retval = rdata_write_pairlist_header(writer, variable_name); if (retval != RDATA_OK) goto cleanup; } retval = rdata_write_attributed_vector_header(writer, RDATA_SEXPTYPE_GENERIC_VECTOR, writer->columns_count); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_begin_factor_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_attributed_vector_header(writer, RDATA_SEXPTYPE_INTEGER_VECTOR, row_count); } static rdata_error_t rdata_end_factor_column(rdata_writer_t *writer, rdata_column_t *column) { int i; rdata_error_t retval = RDATA_OK; retval = rdata_write_pairlist_header(writer, "levels"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, column->factor_count); if (retval != RDATA_OK) goto cleanup; for (i=0; ifactor_count; i++) { retval = rdata_write_string(writer, column->factor[i]); if (retval != RDATA_OK) goto cleanup; } retval = rdata_write_class_pairlist(writer, "factor"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_header(writer, RDATA_PSEUDO_SXP_NIL, 0); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_begin_real_column(rdata_writer_t *writer,rdata_column_t *column, int32_t row_count) { return rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_REAL_VECTOR, row_count); } static rdata_error_t rdata_end_real_column(rdata_writer_t *writer, rdata_column_t *column) { return RDATA_OK; } static rdata_error_t rdata_begin_timestamp_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_attributed_vector_header(writer, RDATA_SEXPTYPE_REAL_VECTOR, row_count); } static rdata_error_t rdata_end_timestamp_column(rdata_writer_t *writer, rdata_column_t *column) { rdata_error_t retval = RDATA_OK; retval = rdata_write_class_pairlist(writer, "POSIXct"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_header(writer, RDATA_PSEUDO_SXP_NIL, 0); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_begin_date_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_attributed_vector_header(writer, RDATA_SEXPTYPE_REAL_VECTOR, row_count); } static rdata_error_t rdata_end_date_column(rdata_writer_t *writer, rdata_column_t *column) { rdata_error_t retval = RDATA_OK; retval = rdata_write_class_pairlist(writer, "Date"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_header(writer, RDATA_PSEUDO_SXP_NIL, 0); if (retval != RDATA_OK) goto cleanup; cleanup: return retval; } static rdata_error_t rdata_begin_integer_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_INTEGER_VECTOR, row_count); } static rdata_error_t rdata_end_integer_column(rdata_writer_t *writer, rdata_column_t *column) { return RDATA_OK; } static rdata_error_t rdata_begin_logical_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_LOGICAL_VECTOR, row_count); } static rdata_error_t rdata_end_logical_column(rdata_writer_t *writer, rdata_column_t *column) { return RDATA_OK; } static rdata_error_t rdata_begin_string_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { return rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, row_count); } static rdata_error_t rdata_end_string_column(rdata_writer_t *writer, rdata_column_t *column) { return RDATA_OK; } rdata_error_t rdata_begin_column(rdata_writer_t *writer, rdata_column_t *column, int32_t row_count) { rdata_type_t type = column->type; if (type == RDATA_TYPE_INT32) { if (column->factor_count) return rdata_begin_factor_column(writer, column, row_count); return rdata_begin_integer_column(writer, column, row_count); } if (type == RDATA_TYPE_REAL) return rdata_begin_real_column(writer, column, row_count); if (type == RDATA_TYPE_TIMESTAMP) return rdata_begin_timestamp_column(writer, column, row_count); if (type == RDATA_TYPE_DATE) return rdata_begin_date_column(writer, column, row_count); if (type == RDATA_TYPE_LOGICAL) return rdata_begin_logical_column(writer, column, row_count); if (type == RDATA_TYPE_STRING) return rdata_begin_string_column(writer, column, row_count); return RDATA_OK; } rdata_error_t rdata_append_real_value(rdata_writer_t *writer, double value) { return rdata_write_double(writer, value); } rdata_error_t rdata_append_int32_value(rdata_writer_t *writer, int32_t value) { return rdata_write_integer(writer, value); } rdata_error_t rdata_append_timestamp_value(rdata_writer_t *writer, time_t value) { return rdata_write_double(writer, value); } rdata_error_t rdata_append_date_value(rdata_writer_t *writer, struct tm *value) { return rdata_write_double(writer, timegm(value) / 86400); } rdata_error_t rdata_append_logical_value(rdata_writer_t *writer, int value) { if (value < 0) return rdata_write_integer(writer, INT32_MIN); return rdata_write_integer(writer, (value > 0)); } rdata_error_t rdata_append_string_value(rdata_writer_t *writer, const char *value) { return rdata_write_string(writer, value); } rdata_error_t rdata_end_column(rdata_writer_t *writer, rdata_column_t *column) { rdata_type_t type = column->type; if (type == RDATA_TYPE_INT32) { if (column->factor_count) return rdata_end_factor_column(writer, column); return rdata_end_integer_column(writer, column); } if (type == RDATA_TYPE_REAL) return rdata_end_real_column(writer, column); if (type == RDATA_TYPE_TIMESTAMP) return rdata_end_timestamp_column(writer, column); if (type == RDATA_TYPE_DATE) return rdata_end_date_column(writer, column); if (type == RDATA_TYPE_LOGICAL) return rdata_end_logical_column(writer, column); if (type == RDATA_TYPE_STRING) return rdata_end_string_column(writer, column); return RDATA_OK; } rdata_error_t rdata_end_table(rdata_writer_t *writer, int32_t row_count, const char *datalabel) { int i; rdata_error_t retval = RDATA_OK; retval = rdata_write_pairlist_header(writer, "datalabel"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, 1); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_string(writer, datalabel); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_pairlist_header(writer, "names"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, writer->columns_count); if (retval != RDATA_OK) goto cleanup; for (i=0; icolumns_count; i++) { retval = rdata_write_string(writer, writer->columns[i]->name); if (retval != RDATA_OK) goto cleanup; } retval = rdata_write_pairlist_header(writer, "var.labels"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, writer->columns_count); if (retval != RDATA_OK) goto cleanup; for (i=0; icolumns_count; i++) { retval = rdata_write_string(writer, writer->columns[i]->label); if (retval != RDATA_OK) goto cleanup; } retval = rdata_write_class_pairlist(writer, "data.frame"); if (retval != RDATA_OK) goto cleanup; if (row_count > 0) { retval = rdata_write_pairlist_header(writer, "row.names"); if (retval != RDATA_OK) goto cleanup; retval = rdata_write_simple_vector_header(writer, RDATA_SEXPTYPE_CHARACTER_VECTOR, row_count); if (retval != RDATA_OK) goto cleanup; char buf[128]; for (i=0; ifile_format == RDATA_WORKSPACE) return rdata_write_header(writer, RDATA_PSEUDO_SXP_NIL, 0); return RDATA_OK; } librdata-0~20210223+git-85757dc6/src/test/000077500000000000000000000000001403031461700174255ustar00rootroot00000000000000librdata-0~20210223+git-85757dc6/src/test/CMakeLists.txt000066400000000000000000000011041403031461700221610ustar00rootroot00000000000000#=========================================================== #add_executable(test_c tests/test_c.cpp) #target_link_libraries(test_c alglib) #INSTALL(TARGETS test_c DESTINATION ${CMAKE_INSTALL_BINDIR}/) #=========================================================== #add_executable(test_i tests/test_i.cpp) #target_link_libraries(test_i alglibstatic) #install(TARGETS test_i DESTINATION ${CMAKE_INSTALL_BINDIR}/) #=========================================================== #enable_testing() #add_test( test_c ${CMAKE_BUILD_DIR}/test_c) #add_test( test_i ${CMAKE_BUILD_DIR}/test_i) librdata-0~20210223+git-85757dc6/src/test/test_buffer.c000066400000000000000000000015511403031461700221030ustar00rootroot00000000000000#include #include "test_buffer.h" rt_buffer_t *buffer_init() { rt_buffer_t *buffer = calloc(1, sizeof(rt_buffer_t)); buffer->size = 1024; buffer->bytes = malloc(buffer->size); return buffer; } void buffer_reset(rt_buffer_t *buffer) { buffer->used = 0; } void buffer_grow(rt_buffer_t *buffer, size_t len) { while (len > buffer->size - buffer->used) { buffer->size *= 2; } buffer->bytes = realloc(buffer->bytes, buffer->size); } void buffer_free(rt_buffer_t *buffer) { free(buffer->bytes); free(buffer); } rt_buffer_ctx_t *buffer_ctx_init(rt_buffer_t *buffer) { rt_buffer_ctx_t *buffer_ctx = calloc(1, sizeof(rt_buffer_ctx_t)); buffer_ctx->buffer = buffer; return buffer_ctx; } void buffer_ctx_reset(rt_buffer_ctx_t *buffer_ctx) { buffer_reset(buffer_ctx->buffer); buffer_ctx->pos = 0; } librdata-0~20210223+git-85757dc6/src/test/test_buffer.h000066400000000000000000000007401403031461700221070ustar00rootroot00000000000000 typedef struct rt_buffer_s { size_t used; size_t size; char *bytes; } rt_buffer_t; typedef struct rt_buffer_ctx_s { rt_buffer_t *buffer; size_t pos; } rt_buffer_ctx_t; rt_buffer_t *buffer_init(); void buffer_reset(rt_buffer_t *buffer); void buffer_grow(rt_buffer_t *buffer, size_t len); void buffer_free(rt_buffer_t *buffer); rt_buffer_ctx_t *buffer_ctx_init(rt_buffer_t *buffer); void buffer_ctx_reset(rt_buffer_ctx_t *buffer_ctx); librdata-0~20210223+git-85757dc6/src/test/test_buffer_io.c000066400000000000000000000034171403031461700225750ustar00rootroot00000000000000#include #include #include "../rdata.h" #include "test_buffer.h" #include "test_buffer_io.h" int rt_open_handler(const char *path, void *io_ctx) { return 0; } int rt_close_handler(void *io_ctx) { return 0; } rdata_off_t rt_seek_handler(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx) { rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; rdata_off_t newpos = -1; if (whence == RDATA_SEEK_SET) { newpos = offset; } else if (whence == RDATA_SEEK_CUR) { newpos = buffer_ctx->pos + offset; } else if (whence == RDATA_SEEK_END) { newpos = buffer_ctx->buffer->used + offset; } if (newpos < 0) return -1; if (newpos > buffer_ctx->buffer->used) return -1; buffer_ctx->pos = newpos; return newpos; } ssize_t rt_read_handler(void *buf, size_t nbytes, void *io_ctx) { rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; ssize_t bytes_copied = 0; ssize_t bytes_left = buffer_ctx->buffer->used - buffer_ctx->pos; if (nbytes <= bytes_left) { memcpy(buf, buffer_ctx->buffer->bytes + buffer_ctx->pos, nbytes); bytes_copied = nbytes; } else if (bytes_left > 0) { memcpy(buf, buffer_ctx->buffer->bytes + buffer_ctx->pos, bytes_left); bytes_copied = bytes_left; } buffer_ctx->pos += bytes_copied; return bytes_copied; } rdata_error_t rt_update_handler(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx) { if (!progress_handler) return RDATA_OK; rt_buffer_ctx_t *buffer_ctx = (rt_buffer_ctx_t *)io_ctx; if (progress_handler(1.0 * buffer_ctx->pos / buffer_ctx->buffer->used, user_ctx)) return RDATA_ERROR_USER_ABORT; return RDATA_OK; } librdata-0~20210223+git-85757dc6/src/test/test_buffer_io.h000066400000000000000000000005731403031461700226020ustar00rootroot00000000000000 int rt_open_handler(const char *path, void *io_ctx); int rt_close_handler(void *io_ctx); rdata_off_t rt_seek_handler(rdata_off_t offset, rdata_io_flags_t whence, void *io_ctx); ssize_t rt_read_handler(void *buf, size_t nbytes, void *io_ctx); rdata_error_t rt_update_handler(long file_size, rdata_progress_handler progress_handler, void *user_ctx, void *io_ctx); librdata-0~20210223+git-85757dc6/src/test/test_rdata.c000066400000000000000000000143721403031461700217320ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "../rdata.h" #include "test_buffer.h" #include "test_buffer_io.h" #ifdef _WIN32 #define timegm _mkgmtime #endif typedef struct test_rdata_ctx_s { int column_count; int row_count; const char *table_name; time_t timestamp; struct tm date; } test_rdata_ctx_t; static void handle_error(const char *error_message, void *ctx) { printf("%s\n", error_message); } static ssize_t write_data(const void *bytes, size_t len, void *ctx) { rt_buffer_t *buffer = (rt_buffer_t *)ctx; buffer_grow(buffer, len); if (buffer->bytes == NULL) { return -1; } memcpy(buffer->bytes + buffer->used, bytes, len); buffer->used += len; return len; } static int handle_table(const char *name, void *ctx) { test_rdata_ctx_t *test_ctx = (test_rdata_ctx_t *)ctx; return (name != NULL && strcmp(name, test_ctx->table_name) != 0); } static int handle_column_name(const char *name, int index, void *ctx) { if (index == 0) return strcmp(name, "column1"); if (index == 1) return strcmp(name, "column2"); if (index == 2) return strcmp(name, "column3"); if (index == 3) return strcmp(name, "column4"); return 1; } static int handle_column(const char *name, rdata_type_t type, void *data, long count, void *ctx) { test_rdata_ctx_t *test_ctx = (test_rdata_ctx_t *)ctx; if (name != NULL && strcmp(name, "column1") == 0) { if (type != RDATA_TYPE_REAL) return 1; } else if (name != NULL && strcmp(name, "column2") == 0) { return (type != RDATA_TYPE_STRING); } else if (name != NULL && strcmp(name, "column3") == 0) { if (type != RDATA_TYPE_TIMESTAMP) return 1; } else if (name != NULL && strcmp(name, "column4") == 0) { if (type != RDATA_TYPE_DATE) return 1; } else if (name != NULL) { return 1; } if (data == NULL) return 0; double *dp = data; if (count != test_ctx->row_count) { printf("Unexpected row count: %ld\n", count); return 1; } if (type == RDATA_TYPE_REAL) { if (dp[0] != 0.0) { printf("Unexpected real value[0]: %lf\n", dp[0]); return 1; } if (dp[1] != 100.0) { printf("Unexpected real value[1]: %lf\n", dp[1]); return 1; } if (!isnan(dp[2])) { printf("Unexpected real value[2]: %lf\n", dp[2]); return 1; } } if (type == RDATA_TYPE_TIMESTAMP) { int i; for (i=0; i<3; i++) { if (dp[i] != test_ctx->timestamp) { printf("Unexpected timestamp value[%d]: %lf\n", i, dp[i]); return 1; } } } if (type == RDATA_TYPE_DATE) { int i; for (i=0; i<3; i++) { if (dp[i] * 86400 != timegm(&test_ctx->date)) { printf("Unexpected date value[%d]: %lf\n", i, dp[i]); return 1; } } } return 0; } static int handle_text_value(const char *value, int index, void *ctx) { if (index == 0) return strcmp(value, "hello"); if (index == 1) return strcmp(value, "goodbye"); if (index == 2) return value != NULL; return 1; } int main(int argc, char *argv[]) { struct timeval time; gettimeofday(&time, NULL); test_rdata_ctx_t ctx = { .row_count = 3, .table_name = "table1", .timestamp = time.tv_sec, .date = { .tm_year = 95, .tm_mon = 7, .tm_mday = 15 } }; rt_buffer_t *buffer = buffer_init(); rdata_writer_t *writer = rdata_writer_init(&write_data, RDATA_WORKSPACE); rdata_column_t *col1 = rdata_add_column(writer, "column1", RDATA_TYPE_REAL); rdata_column_t *col2 = rdata_add_column(writer, "column2", RDATA_TYPE_STRING); rdata_column_t *col3 = rdata_add_column(writer, "column3", RDATA_TYPE_TIMESTAMP); rdata_column_t *col4 = rdata_add_column(writer, "column4", RDATA_TYPE_DATE); rdata_begin_file(writer, buffer); rdata_begin_table(writer, ctx.table_name); rdata_begin_column(writer, col1, ctx.row_count); rdata_append_real_value(writer, 0.0); rdata_append_real_value(writer, 100.0); rdata_append_real_value(writer, NAN); rdata_end_column(writer, col1); rdata_begin_column(writer, col2, ctx.row_count); rdata_append_string_value(writer, "hello"); rdata_append_string_value(writer, "goodbye"); rdata_append_string_value(writer, NULL); rdata_end_column(writer, col2); rdata_begin_column(writer, col3, ctx.row_count); rdata_append_timestamp_value(writer, ctx.timestamp); rdata_append_timestamp_value(writer, ctx.timestamp); rdata_append_timestamp_value(writer, ctx.timestamp); rdata_end_column(writer, col3); rdata_begin_column(writer, col4, ctx.row_count); rdata_append_date_value(writer, &ctx.date); rdata_append_date_value(writer, &ctx.date); rdata_append_date_value(writer, &ctx.date); rdata_end_column(writer, col4); rdata_end_table(writer, ctx.row_count, "My data set"); rdata_end_file(writer); rt_buffer_ctx_t *buffer_ctx = buffer_ctx_init(buffer); rdata_parser_t *parser = rdata_parser_init(); rdata_set_open_handler(parser, rt_open_handler); rdata_set_close_handler(parser, rt_close_handler); rdata_set_seek_handler(parser, rt_seek_handler); rdata_set_read_handler(parser, rt_read_handler); rdata_set_update_handler(parser, rt_update_handler); rdata_set_io_ctx(parser, buffer_ctx); rdata_set_table_handler(parser, &handle_table); rdata_set_column_handler(parser, &handle_column); rdata_set_column_name_handler(parser, &handle_column_name); rdata_set_text_value_handler(parser, &handle_text_value); rdata_error_t err = rdata_parse(parser, "example.RData", &ctx); if (err != RDATA_OK) { printf("Returned: %s\n", rdata_error_message(err)); const char *path = "/tmp/rdata_test.RData"; int fd = open("/tmp/rdata_test.RData", O_CREAT | O_TRUNC | O_WRONLY, 0644); write(fd, buffer->bytes, buffer->used); close(fd); printf("Wrote test file out to %s\n", path); } return (err != RDATA_OK); } librdata-0~20210223+git-85757dc6/writeEx.c000066400000000000000000000035451403031461700174610ustar00rootroot00000000000000#include #include #include #include #include #include #include static ssize_t write_data(const void *bytes, size_t len, void *ctx) { int fd = *(int *)ctx; return write(fd, bytes, len); } void writeRData() { int row_count = 3; int fd = open("example.RData", O_CREAT | O_WRONLY, 0644); rdata_writer_t *writer = rdata_writer_init(&write_data, RDATA_WORKSPACE); rdata_column_t *col1 = rdata_add_column(writer, "column1", RDATA_TYPE_REAL); rdata_column_t *col2 = rdata_add_column(writer, "column2", RDATA_TYPE_STRING); rdata_begin_file(writer, &fd); rdata_begin_table(writer, "my_table"); rdata_begin_column(writer, col1, row_count); rdata_append_real_value(writer, 0.0); rdata_append_real_value(writer, 100.0); rdata_append_real_value(writer, NAN); rdata_end_column(writer, col1); rdata_begin_column(writer, col2, row_count); rdata_append_string_value(writer, "hello"); rdata_append_string_value(writer, "goodbye"); rdata_append_string_value(writer, NULL); rdata_end_column(writer, col2); rdata_end_table(writer, row_count, "My data set"); rdata_end_file(writer); close(fd); } void writeRDS() { int row_count = 3; int fd = open("example.rds", O_CREAT | O_WRONLY, 0644); rdata_writer_t *writer = rdata_writer_init(&write_data, RDATA_SINGLE_OBJECT); rdata_column_t *col = rdata_add_column(writer, "column1", RDATA_TYPE_REAL); rdata_begin_file(writer, &fd); rdata_begin_column(writer, col, row_count); rdata_append_real_value(writer, 42.0); rdata_append_real_value(writer, -7.0); rdata_append_real_value(writer, NAN); rdata_end_column(writer, col); rdata_end_file(writer); close(fd); } int main() { writeRData(); writeRDS(); printf("Done\n"); exit(0); }