pax_global_header00006660000000000000000000000064134637411360014522gustar00rootroot0000000000000052 comment=8d2028804cbb3be56cbf2bab47eb55012fd325a7 undeaD-1.0.10/000077500000000000000000000000001346374113600130015ustar00rootroot00000000000000undeaD-1.0.10/.editorconfig000066400000000000000000000002411346374113600154530ustar00rootroot00000000000000root = true [*.{c,h,d,di,dd}] end_of_line = lf insert_final_newline = true indent_style = space indent_size = 4 trim_trailing_whitespace = true charset = utf-8 undeaD-1.0.10/.gitignore000066400000000000000000000004461346374113600147750ustar00rootroot00000000000000.B* *.bak *.lst # Object files *.o *.ko *.obj *.elf # Precompiled Headers *.gch *.pch # Libraries *.lib *.a *.la *.lo # Shared objects (inc. Windows DLLs) *.dll *.so *.so.* *.dylib # Executables *.exe *.out *.app *.i*86 *.x86_64 *.hex # Extra dirs/build files bin obj .dub .vs undead.sln undeaD-1.0.10/.travis.yml000066400000000000000000000001561346374113600151140ustar00rootroot00000000000000language: d os: - linux - osx d: - dmd - dmd-beta - dmd-nightly - ldc - ldc-beta sudo: false undeaD-1.0.10/LICENSE000066400000000000000000000024721346374113600140130ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. undeaD-1.0.10/LICENSE.txt000066400000000000000000000024721346374113600146310ustar00rootroot00000000000000Boost Software License - Version 1.0 - August 17th, 2003 Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. undeaD-1.0.10/README.md000066400000000000000000000011201346374113600142520ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/dlang/undeaD.svg?branch=master)](https://travis-ci.org/dlang/undeaD) undeaD ====== Need an obsolete Phobos module? Here they are, back from the dead and upgraded to work with the latest D Current modules included: * std.bitarray * std.date * std.datebase * std.dateparse * std.regexp * std.stream and friends Some individual functions have been moved here rather than full Phobos modules. They are * undead.doformat: Contains the `doFormat` function from std.format * undead.string: Contains regex style pattern matching functions from std.string undeaD-1.0.10/dub.json000066400000000000000000000003731346374113600144510ustar00rootroot00000000000000{ "name": "undead", "description": "Obsolete Phobos modules, back from the dead", "authors": ["various"], "homepage": "https://github.com/dlang/undeaD", "license": "BSL-1.0", "targetType": "library", "targetPath": "bin", } undeaD-1.0.10/posix.mak000066400000000000000000000016701346374113600146410ustar00rootroot00000000000000#_ posix.mak # Build posix version of undead # Needs Digital Mars D compiler to build, available free from: # http://www.digitalmars.com/d/ DMD=dmd DEL=rm S=src/undead O=obj B=bin TARGET=undead DFLAGS=-g -Isrc/ LFLAGS=-L/map/co #DFLAGS= #LFLAGS= .d.obj : $(DMD) -c $(DFLAGS) $* SRC= $S/bitarray.d $S/regexp.d $S/datebase.d $S/date.d $S/dateparse.d \ $S/cstream.d $S/stream.d $S/socketstream.d $S/doformat.d $S/string.d \ $S/internal/file.d SOURCE= $(SRC) win32.mak posix.mak LICENSE README.md dub.json all: $B/$(TARGET).a ################################################# $B/$(TARGET).a : $(SRC) $(DMD) -lib -of$B/$(TARGET).a $(SRC) $(DFLAGS) unittest : $(DMD) -unittest -main -cov -of$O/unittest $(SRC) $(DFLAGS) $O/unittest clean: $(DEL) $O/unittest *.lst tolf: tolf $(SOURCE) detab: detab $(SRC) zip: detab tolf $(SOURCE) $(DEL) undead.zip zip undead $(SOURCE) gitzip: git archive --format=zip HEAD > undead.zip undeaD-1.0.10/src/000077500000000000000000000000001346374113600135705ustar00rootroot00000000000000undeaD-1.0.10/src/undead/000077500000000000000000000000001346374113600150305ustar00rootroot00000000000000undeaD-1.0.10/src/undead/bitarray.d000066400000000000000000000476051346374113600170260ustar00rootroot00000000000000/*********************** * Source: $(PHOBOSSRC std/_bitarray.d) * Macros: * WIKI = StdBitarray */ module undead.bitarray; //debug = bitarray; // uncomment to turn on debugging printf's private import core.bitop; /** * An array of bits. */ struct BitArray { size_t len; size_t* ptr; size_t dim() { return (len + 31) / 32; } size_t length() const pure nothrow { return len; } void length(size_t newlen) { if (newlen != len) { size_t olddim = dim(); size_t newdim = (newlen + 31) / 32; if (newdim != olddim) { // Create a fake array so we can use D's realloc machinery auto b = ptr[0 .. olddim]; b.length = newdim; // realloc ptr = b.ptr; if (newdim & 31) { // Set any pad bits to 0 ptr[newdim - 1] &= ~(~0 << (newdim & 31)); } } len = newlen; } } /********************************************** * Support for [$(I index)] operation for BitArray. */ bool opIndex(size_t i) in { assert(i < len); } body { return cast(bool)bt(ptr, i); } /** ditto */ bool opIndexAssign(bool b, size_t i) in { assert(i < len); } body { if (b) bts(ptr, i); else btr(ptr, i); return b; } /********************************************** * Support for array.dup property for BitArray. */ BitArray dup() { BitArray ba; auto b = ptr[0 .. dim].dup; ba.len = len; ba.ptr = b.ptr; return ba; } unittest { BitArray a; BitArray b; debug(bitarray) printf("BitArray.dup.unittest\n"); a.length = 3; a[0] = 1; a[1] = 0; a[2] = 1; b = a.dup; assert(b.length == 3); for (int i = 0; i < 3; i++) { debug(bitarray) printf("b[%d] = %d\n", i, b[i]); assert(b[i] == (((i ^ 1) & 1) ? true : false)); } } /********************************************** * Support for foreach loops for BitArray. */ int opApply(int delegate(ref bool) dg) { int result; for (size_t i = 0; i < len; i++) { bool b = opIndex(i); result = dg(b); (this)[i] = b; if (result) break; } return result; } /** ditto */ int opApply(int delegate(ref size_t, ref bool) dg) { int result; for (size_t i = 0; i < len; i++) { bool b = opIndex(i); result = dg(i, b); (this)[i] = b; if (result) break; } return result; } unittest { debug(bitarray) printf("BitArray.opApply unittest\n"); static bool[] ba = [1,0,1]; BitArray a; a.init(ba); int i; foreach (b;a) { switch (i) { case 0: assert(b == true); break; case 1: assert(b == false); break; case 2: assert(b == true); break; default: assert(0); } i++; } foreach (j,b;a) { switch (j) { case 0: assert(b == true); break; case 1: assert(b == false); break; case 2: assert(b == true); break; default: assert(0); } } } /********************************************** * Support for array.reverse property for BitArray. */ BitArray reverse() out (result) { assert(result == this); } body { if (len >= 2) { bool t; size_t lo, hi; lo = 0; hi = len - 1; for (; lo < hi; lo++, hi--) { t = (this)[lo]; (this)[lo] = (this)[hi]; (this)[hi] = t; } } return this; } unittest { debug(bitarray) printf("BitArray.reverse.unittest\n"); BitArray b; static bool[5] data = [1,0,1,1,0]; int i; b.init(data); b.reverse; for (i = 0; i < data.length; i++) { assert(b[i] == data[4 - i]); } } /********************************************** * Support for array.sort property for BitArray. */ BitArray sort() out (result) { assert(result == this); } body { if (len >= 2) { size_t lo, hi; lo = 0; hi = len - 1; while (1) { while (1) { if (lo >= hi) goto Ldone; if ((this)[lo] == true) break; lo++; } while (1) { if (lo >= hi) goto Ldone; if ((this)[hi] == false) break; hi--; } (this)[lo] = false; (this)[hi] = true; lo++; hi--; } Ldone: ; } return this; } unittest { debug(bitarray) printf("BitArray.sort.unittest\n"); __gshared size_t x = 0b1100011000; __gshared BitArray ba = { 10, &x }; ba.sort; for (size_t i = 0; i < 6; i++) assert(ba[i] == false); for (size_t i = 6; i < 10; i++) assert(ba[i] == true); } /*************************************** * Support for operators == and != for bit arrays. */ bool opEquals(const ref BitArray a2) const pure nothrow { size_t i; if (this.length != a2.length) return false; // not equal byte *p1 = cast(byte*)this.ptr; byte *p2 = cast(byte*)a2.ptr; auto n = this.length / 8; for (i = 0; i < n; i++) { if (p1[i] != p2[i]) return false; // not equal } n = this.length & 7; auto mask = cast(ubyte)((1 << n) - 1); //printf("i = %d, n = %d, mask = %x, %x, %x\n", i, n, mask, p1[i], p2[i]); return (mask == 0) || (p1[i] & mask) == (p2[i] & mask); } unittest { debug(bitarray) printf("BitArray.opEquals unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1]; static bool[] bc = [1,0,1,0,1,0,1]; static bool[] bd = [1,0,1,1,1]; static bool[] be = [1,0,1,0,1]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c; c.init(bc); BitArray d; d.init(bd); BitArray e; e.init(be); assert(a != b); assert(a != c); assert(a != d); assert(a == e); } /*************************************** * Implement comparison operators. */ int opCmp(const ref BitArray a2) const pure nothrow { size_t i; auto len = this.length; if (a2.length < len) len = a2.length; auto p1 = cast(ubyte*)this.ptr; auto p2 = cast(ubyte*)a2.ptr; auto n = len / 8; for (i = 0; i < n; i++) { if (p1[i] != p2[i]) break; // not equal } for (auto j = i * 8; j < len; j++) { auto mask = cast(ubyte)(1 << j); auto c = cast(int)(p1[i] & mask) - cast(int)(p2[i] & mask); if (c) return c; } version (D_LP64) { long c = this.len - a2.length; if (c < 0) return -1; else return c != 0; } else return cast(int)this.len - cast(int)a2.length; } unittest { debug(bitarray) printf("BitArray.opCmp unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1]; static bool[] bc = [1,0,1,0,1,0,1]; static bool[] bd = [1,0,1,1,1]; static bool[] be = [1,0,1,0,1]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c; c.init(bc); BitArray d; d.init(bd); BitArray e; e.init(be); assert(a > b); assert(a >= b); assert(a < c); assert(a <= c); assert(a < d); assert(a <= d); assert(a == e); assert(a <= e); assert(a >= e); } /*************************************** * Set BitArray to contents of ba[] */ void init(bool[] ba) { length = ba.length; foreach (i, b; ba) { (this)[i] = b; } } /*************************************** * Map BitArray onto v[], with numbits being the number of bits * in the array. Does not copy the data. * * This is the inverse of opCast. */ void init(void[] v, size_t numbits) in { assert(numbits <= v.length * 8); assert((v.length & 3) == 0); } body { ptr = cast(typeof(ptr))v.ptr; len = numbits; } unittest { debug(bitarray) printf("BitArray.init unittest\n"); static bool[] ba = [1,0,1,0,1]; BitArray a; a.init(ba); BitArray b; void[] v; v = cast(void[])a; b.init(v, a.length); assert(b[0] == 1); assert(b[1] == 0); assert(b[2] == 1); assert(b[3] == 0); assert(b[4] == 1); a[0] = 0; assert(b[0] == 0); assert(a == b); } /*************************************** * Convert to void[]. */ void[] opCast() { return cast(void[])ptr[0 .. dim]; } unittest { debug(bitarray) printf("BitArray.opCast unittest\n"); static bool[] ba = [1,0,1,0,1]; BitArray a; a.init(ba); void[] v = cast(void[])a; assert(v.length == a.dim * size_t.sizeof); } /*************************************** * Support for unary operator ~ for bit arrays. */ BitArray opCom() { auto dim = this.dim(); BitArray result; result.length = len; for (size_t i = 0; i < dim; i++) result.ptr[i] = ~this.ptr[i]; if (len & 31) result.ptr[dim - 1] &= ~(~0 << (len & 31)); return result; } unittest { debug(bitarray) printf("BitArray.opCom unittest\n"); static bool[] ba = [1,0,1,0,1]; BitArray a; a.init(ba); BitArray b = ~a; assert(b[0] == 0); assert(b[1] == 1); assert(b[2] == 0); assert(b[3] == 1); assert(b[4] == 0); } /*************************************** * Support for binary operator & for bit arrays. */ BitArray opAnd(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); BitArray result; result.length = len; for (size_t i = 0; i < dim; i++) result.ptr[i] = this.ptr[i] & e2.ptr[i]; return result; } unittest { debug(bitarray) printf("BitArray.opAnd unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c = a & b; assert(c[0] == 1); assert(c[1] == 0); assert(c[2] == 1); assert(c[3] == 0); assert(c[4] == 0); } /*************************************** * Support for binary operator | for bit arrays. */ BitArray opOr(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); BitArray result; result.length = len; for (size_t i = 0; i < dim; i++) result.ptr[i] = this.ptr[i] | e2.ptr[i]; return result; } unittest { debug(bitarray) printf("BitArray.opOr unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c = a | b; assert(c[0] == 1); assert(c[1] == 0); assert(c[2] == 1); assert(c[3] == 1); assert(c[4] == 1); } /*************************************** * Support for binary operator ^ for bit arrays. */ BitArray opXor(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); BitArray result; result.length = len; for (size_t i = 0; i < dim; i++) result.ptr[i] = this.ptr[i] ^ e2.ptr[i]; return result; } unittest { debug(bitarray) printf("BitArray.opXor unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c = a ^ b; assert(c[0] == 0); assert(c[1] == 0); assert(c[2] == 0); assert(c[3] == 1); assert(c[4] == 1); } /*************************************** * Support for binary operator - for bit arrays. * * $(I a - b) for BitArrays means the same thing as $(I a & ~b). */ BitArray opSub(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); BitArray result; result.length = len; for (size_t i = 0; i < dim; i++) result.ptr[i] = this.ptr[i] & ~e2.ptr[i]; return result; } unittest { debug(bitarray) printf("BitArray.opSub unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c = a - b; assert(c[0] == 0); assert(c[1] == 0); assert(c[2] == 0); assert(c[3] == 0); assert(c[4] == 1); } /*************************************** * Support for operator &= bit arrays. */ BitArray opAndAssign(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); for (size_t i = 0; i < dim; i++) ptr[i] &= e2.ptr[i]; return this; } unittest { debug(bitarray) printf("BitArray.opAndAssign unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); a &= b; assert(a[0] == 1); assert(a[1] == 0); assert(a[2] == 1); assert(a[3] == 0); assert(a[4] == 0); } /*************************************** * Support for operator |= for bit arrays. */ BitArray opOrAssign(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); for (size_t i = 0; i < dim; i++) ptr[i] |= e2.ptr[i]; return this; } unittest { debug(bitarray) printf("BitArray.opOrAssign unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); a |= b; assert(a[0] == 1); assert(a[1] == 0); assert(a[2] == 1); assert(a[3] == 1); assert(a[4] == 1); } /*************************************** * Support for operator ^= for bit arrays. */ BitArray opXorAssign(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); for (size_t i = 0; i < dim; i++) ptr[i] ^= e2.ptr[i]; return this; } unittest { debug(bitarray) printf("BitArray.opXorAssign unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); a ^= b; assert(a[0] == 0); assert(a[1] == 0); assert(a[2] == 0); assert(a[3] == 1); assert(a[4] == 1); } /*************************************** * Support for operator -= for bit arrays. * * $(I a -= b) for BitArrays means the same thing as $(I a &= ~b). */ BitArray opSubAssign(BitArray e2) in { assert(len == e2.length); } body { auto dim = this.dim(); for (size_t i = 0; i < dim; i++) ptr[i] &= ~e2.ptr[i]; return this; } unittest { debug(bitarray) printf("BitArray.opSubAssign unittest\n"); static bool[] ba = [1,0,1,0,1]; static bool[] bb = [1,0,1,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); a -= b; assert(a[0] == 0); assert(a[1] == 0); assert(a[2] == 0); assert(a[3] == 0); assert(a[4] == 1); } /*************************************** * Support for operator ~= for bit arrays. */ BitArray opCatAssign(bool b) { length = len + 1; (this)[len - 1] = b; return this; } unittest { debug(bitarray) printf("BitArray.opCatAssign unittest\n"); static bool[] ba = [1,0,1,0,1]; BitArray a; a.init(ba); BitArray b; b = (a ~= true); assert(a[0] == 1); assert(a[1] == 0); assert(a[2] == 1); assert(a[3] == 0); assert(a[4] == 1); assert(a[5] == 1); assert(b == a); } /*************************************** * ditto */ BitArray opCatAssign(BitArray b) { auto istart = len; length = len + b.length; for (auto i = istart; i < len; i++) (this)[i] = b[i - istart]; return this; } unittest { debug(bitarray) printf("BitArray.opCatAssign unittest\n"); static bool[] ba = [1,0]; static bool[] bb = [0,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c; c = (a ~= b); assert(a.length == 5); assert(a[0] == 1); assert(a[1] == 0); assert(a[2] == 0); assert(a[3] == 1); assert(a[4] == 0); assert(c == a); } /*************************************** * Support for binary operator ~ for bit arrays. */ BitArray opCat(bool b) { auto r = this.dup; r.length = len + 1; r[len] = b; return r; } /** ditto */ BitArray opCat_r(bool b) { BitArray r; r.length = len + 1; r[0] = b; for (size_t i = 0; i < len; i++) r[1 + i] = (this)[i]; return r; } /** ditto */ BitArray opCat(BitArray b) { BitArray r; r = this.dup(); r ~= b; return r; } unittest { debug(bitarray) printf("BitArray.opCat unittest\n"); static bool[] ba = [1,0]; static bool[] bb = [0,1,0]; BitArray a; a.init(ba); BitArray b; b.init(bb); BitArray c; c = (a ~ b); assert(c.length == 5); assert(c[0] == 1); assert(c[1] == 0); assert(c[2] == 0); assert(c[3] == 1); assert(c[4] == 0); c = (a ~ true); assert(c.length == 3); assert(c[0] == 1); assert(c[1] == 0); assert(c[2] == 1); c = (false ~ a); assert(c.length == 3); assert(c[0] == 0); assert(c[1] == 1); assert(c[2] == 0); } } undeaD-1.0.10/src/undead/cstream.d000066400000000000000000000142571346374113600166440ustar00rootroot00000000000000// Written in the D programming language. /** * $(RED Deprecated: This module is considered out-dated and not up to Phobos' * current standards.) * * The std.cstream module bridges core.stdc.stdio (or std.stdio) and std.stream. * Both core.stdc.stdio and std.stream are publicly imported by std.cstream. * * Macros: * WIKI=Phobos/StdCstream * * Copyright: Copyright Ben Hinkle 2007 - 2009. * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0). * Authors: Ben Hinkle * Source: $(PHOBOSSRC std/_cstream.d) */ /* Copyright Ben Hinkle 2007 - 2009. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ module undead.cstream; public import core.stdc.stdio; public import undead.stream; version(unittest) import std.stdio; import std.algorithm; /** * A Stream wrapper for a C file of type FILE*. */ class CFile : Stream { protected FILE* cfile; /** * Create the stream wrapper for the given C file. * Params: * cfile = a valid C $(B FILE) pointer to wrap. * mode = a bitwise combination of $(B FileMode.In) for a readable file * and $(B FileMode.Out) for a writeable file. * seekable = indicates if the stream should be _seekable. */ this(FILE* cfile, FileMode mode, bool seekable = false) { super(); this.file = cfile; readable = cast(bool)(mode & FileMode.In); writeable = cast(bool)(mode & FileMode.Out); this.seekable = seekable; } /** * Closes the stream. */ ~this() { close(); } /** * Property to get or set the underlying file for this stream. * Setting the file marks the stream as open. */ @property FILE* file() { return cfile; } /** * Ditto */ @property void file(FILE* cfile) { this.cfile = cfile; isopen = true; } /** * Overrides of the $(B Stream) methods to call the underlying $(B FILE*) * C functions. */ override void flush() { fflush(cfile); } /** * Ditto */ override void close() { if (isopen) fclose(cfile); isopen = readable = writeable = seekable = false; } /** * Ditto */ override bool eof() { return cast(bool)(readEOF || feof(cfile)); } /** * Ditto */ override char getc() { return cast(char)fgetc(cfile); } /** * Ditto */ override char ungetc(char c) { return cast(char)core.stdc.stdio.ungetc(c,cfile); } /** * Ditto */ override size_t readBlock(void* buffer, size_t size) { size_t n = fread(buffer,1,size,cfile); readEOF = cast(bool)(n == 0); return n; } /** * Ditto */ override size_t writeBlock(const void* buffer, size_t size) { return fwrite(buffer,1,size,cfile); } /** * Ditto */ override ulong seek(long offset, SeekPos rel) { readEOF = false; if (fseek(cfile,cast(int)offset,rel) != 0) throw new SeekException("unable to move file pointer"); return ftell(cfile); } /** * Ditto */ override void writeLine(const(char)[] s) { writeString(s); writeString("\n"); } /** * Ditto */ override void writeLineW(const(wchar)[] s) { writeStringW(s); writeStringW("\n"); } // run a few tests unittest { import undead.internal.file; import std.internal.cstring : tempCString; auto stream_file = (undead.internal.file.deleteme ~ "-stream.txt").tempCString(); FILE* f = fopen(stream_file,"w"); assert(f !is null); CFile file = new CFile(f,FileMode.Out); int i = 666; // should be ok to write assert(file.writeable); file.writeLine("Testing stream.d:"); file.writeString("Hello, world!"); file.write(i); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); file.close(); // no operations are allowed when file is closed assert(!file.readable && !file.writeable && !file.seekable); f = fopen(stream_file,"r"); file = new CFile(f,FileMode.In,true); // should be ok to read assert(file.readable); auto line = file.readLine(); auto exp = "Testing stream.d:"; assert(line[0] == 'T'); assert(line.length == exp.length); assert(!std.algorithm.cmp(line, "Testing stream.d:")); // jump over "Hello, " file.seek(7, SeekPos.Current); version (Windows) assert(file.position == 19 + 7); version (Posix) assert(file.position == 18 + 7); assert(!std.algorithm.cmp(file.readString(6), "world!")); i = 0; file.read(i); assert(i == 666); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); // we must be at the end of file file.close(); f = fopen(stream_file,"w+"); file = new CFile(f,FileMode.In|FileMode.Out,true); file.writeLine("Testing stream.d:"); file.writeLine("Another line"); file.writeLine(""); file.writeLine("That was blank"); file.position = 0; char[][] lines; foreach(char[] line; file) { lines ~= line.dup; } assert( lines.length == 5 ); assert( lines[0] == "Testing stream.d:"); assert( lines[1] == "Another line"); assert( lines[2] == ""); assert( lines[3] == "That was blank"); file.position = 0; lines = new char[][5]; foreach(ulong n, char[] line; file) { lines[cast(size_t)(n-1)] = line.dup; } assert( lines[0] == "Testing stream.d:"); assert( lines[1] == "Another line"); assert( lines[2] == ""); assert( lines[3] == "That was blank"); file.close(); remove(stream_file); } } /** * CFile wrapper of core.stdc.stdio.stdin (not seekable). */ __gshared CFile din; /** * CFile wrapper of core.stdc.stdio.stdout (not seekable). */ __gshared CFile dout; /** * CFile wrapper of core.stdc.stdio.stderr (not seekable). */ __gshared CFile derr; shared static this() { // open standard I/O devices din = new CFile(core.stdc.stdio.stdin,FileMode.In); dout = new CFile(core.stdc.stdio.stdout,FileMode.Out); derr = new CFile(core.stdc.stdio.stderr,FileMode.Out); } undeaD-1.0.10/src/undead/date.d000066400000000000000000000760461346374113600161270ustar00rootroot00000000000000// Written in the D programming language. /** * $(RED Deprecated. It will be removed in February 2012. * Please use std.datetime instead.) * * Dates are represented in several formats. The date implementation * revolves around a central type, $(D d_time), from which other * formats are converted to and from. Dates are calculated using the * Gregorian calendar. * * References: $(WEB wikipedia.org/wiki/Gregorian_calendar, Gregorian * calendar (Wikipedia)) * * Macros: WIKI = Phobos/StdDate * * Copyright: Copyright Digital Mars 2000 - 2009. * License: Boost License 1.0. * Authors: $(WEB digitalmars.com, Walter Bright) * Source: $(PHOBOSSRC std/_date.d) */ /* Copyright Digital Mars 2000 - 2009. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ module undead.date; import std.conv, std.exception, std.stdio; import core.stdc.stdlib; import undead.datebase; import undead.dateparse; /+ pragma(msg, "Notice: As of Phobos 2.055, std.date and std.dateparse have been " ~ "deprecated. They will be removed in February 2012. " ~ "Please use std.datetime instead."); deprecated: +/ /** * $(D d_time) is a signed arithmetic type giving the time elapsed * since January 1, 1970. Negative values are for dates preceding * 1970. The time unit used is Ticks. Ticks are milliseconds or * smaller intervals. * * The usual arithmetic operations can be performed on d_time, such as adding, * subtracting, etc. Elapsed time in Ticks can be computed by subtracting a * starting d_time from an ending d_time. */ alias long d_time; /** * A value for d_time that does not represent a valid time. */ enum d_time d_time_nan = long.min; /** * Time broken down into its components. */ struct Date { int year = int.min; /// use int.min as "nan" year value int month; /// 1..12 int day; /// 1..31 int hour; /// 0..23 int minute; /// 0..59 int second; /// 0..59 int ms; /// 0..999 int weekday; /// 0: not specified, 1..7: Sunday..Saturday int tzcorrection = int.min; /// -1200..1200 correction in hours /// Parse date out of string s[] and store it in this Date instance. void parse(string s) { DateParse dp; dp.parse(s, this); } } enum { hoursPerDay = 24, minutesPerHour = 60, msPerMinute = 60 * 1000, msPerHour = 60 * msPerMinute, msPerDay = 86_400_000, ticksPerMs = 1, ticksPerSecond = 1000, /// Will be at least 1000 ticksPerMinute = ticksPerSecond * 60, ticksPerHour = ticksPerMinute * 60, ticksPerDay = ticksPerHour * 24, } deprecated alias ticksPerSecond TicksPerSecond; deprecated alias ticksPerMs TicksPerMs; deprecated alias ticksPerMinute TicksPerMinute; deprecated alias ticksPerHour TicksPerHour; deprecated alias ticksPerDay TicksPerDay; deprecated unittest { assert(ticksPerSecond == TicksPerSecond); } __gshared d_time localTZA = 0; private immutable char[] daystr = "SunMonTueWedThuFriSat"; private immutable char[] monstr = "JanFebMarAprMayJunJulAugSepOctNovDec"; private immutable int[12] mdays = [ 0,31,59,90,120,151,181,212,243,273,304,334 ]; /******************************** * Compute year and week [1..53] from t. The ISO 8601 week 1 is the first week * of the year that includes January 4. Monday is the first day of the week. * References: * $(LINK2 http://en.wikipedia.org/wiki/ISO_8601, ISO 8601 (Wikipedia)) */ void toISO8601YearWeek(d_time t, out int year, out int week) { year = yearFromTime(t); auto yday = day(t) - dayFromYear(year); /* Determine day of week Jan 4 falls on. * Weeks begin on a Monday. */ auto d = dayFromYear(year); auto w = (d + 3/*Jan4*/ + 3) % 7; if (w < 0) w += 7; /* Find yday of beginning of ISO 8601 year */ auto ydaybeg = 3/*Jan4*/ - w; /* Check if yday is actually the last week of the previous year */ if (yday < ydaybeg) { year -= 1; week = 53; return; } /* Check if yday is actually the first week of the next year */ if (yday >= 362) // possible { int d2; int ydaybeg2; d2 = dayFromYear(year + 1); w = (d2 + 3/*Jan4*/ + 3) % 7; if (w < 0) w += 7; //printf("w = %d\n", w); ydaybeg2 = 3/*Jan4*/ - w; if (d + yday >= d2 + ydaybeg2) { year += 1; week = 1; return; } } week = (yday - ydaybeg) / 7 + 1; } /* *********************************** * Divide time by divisor. Always round down, even if d is negative. */ pure d_time floor(d_time d, int divisor) { return (d < 0 ? d - divisor - 1 : d) / divisor; } int dmod(d_time n, d_time d) { d_time r; r = n % d; if (r < 0) r += d; assert(cast(int)r == r); return cast(int)r; } /******************************** * Calculates the hour from time. * * Params: * time = The time to compute the hour from. * Returns: * The calculated hour, 0..23. */ int hourFromTime(d_time time) { return dmod(floor(time, msPerHour), hoursPerDay); } /******************************** * Calculates the minute from time. * * Params: * time = The time to compute the minute from. * Returns: * The calculated minute, 0..59. */ int minFromTime(d_time time) { return dmod(floor(time, msPerMinute), minutesPerHour); } /******************************** * Calculates the second from time. * * Params: * time = The time to compute the second from. * Returns: * The calculated second, 0..59. */ int secFromTime(d_time time) { return dmod(floor(time, ticksPerSecond), 60); } /******************************** * Calculates the milisecond from time. * * Params: * time = The time to compute the milisecond from. * Returns: * The calculated milisecond, 0..999. */ int msFromTime(d_time time) { return dmod(time / (ticksPerSecond / 1000), 1000); } int timeWithinDay(d_time t) { return dmod(t, msPerDay); } d_time toInteger(d_time n) { return n; } int day(d_time t) { return cast(int)floor(t, msPerDay); } pure bool leapYear(uint y) { return (y % 4) == 0 && (y % 100 || (y % 400) == 0); } unittest { assert(!leapYear(1970)); assert(leapYear(1984)); assert(leapYear(2000)); assert(!leapYear(2100)); } /******************************** * Calculates the number of days that exists in a year. * * Leap years have 366 days, while other years have 365. * * Params: * year = The year to compute the number of days from. * Returns: * The number of days in the year, 365 or 366. */ pure uint daysInYear(uint year) { return (leapYear(year) ? 366 : 365); } /******************************** * Calculates the number of days elapsed since 1 January 1970 * until 1 January of the given year. * * Params: * year = The year to compute the number of days from. * Returns: * The number of days elapsed. * * Example: * ---------- * writeln(dayFromYear(1970)); // writes '0' * writeln(dayFromYear(1971)); // writes '365' * writeln(dayFromYear(1972)); // writes '730' * ---------- */ pure int dayFromYear(int year) { return cast(int) (365 * (year - 1970) + floor((year - 1969), 4) - floor((year - 1901), 100) + floor((year - 1601), 400)); } pure d_time timeFromYear(int y) { return cast(d_time)msPerDay * dayFromYear(y); } /***************************** * Calculates the year from the d_time t. */ pure int yearFromTime(d_time t) { if (t == d_time_nan) return 0; // Hazard a guess //y = 1970 + cast(int) (t / (365.2425 * msPerDay)); // Use integer only math int y = 1970 + cast(int) (t / (3652425 * (msPerDay / 10000))); if (timeFromYear(y) <= t) { while (timeFromYear(y + 1) <= t) y++; } else { do { y--; } while (timeFromYear(y) > t); } return y; } /******************************* * Determines if d_time t is a leap year. * * A leap year is every 4 years except years ending in 00 that are not * divsible by 400. * * Returns: !=0 if it is a leap year. * * References: * $(LINK2 http://en.wikipedia.org/wiki/Leap_year, Wikipedia) */ pure bool inLeapYear(d_time t) { return leapYear(yearFromTime(t)); } /***************************** * Calculates the month from the d_time t. * * Returns: Integer in the range 0..11, where * 0 represents January and 11 represents December. */ int monthFromTime(d_time t) { auto year = yearFromTime(t); auto day = day(t) - dayFromYear(year); int month; if (day < 59) { if (day < 31) { assert(day >= 0); month = 0; } else month = 1; } else { day -= leapYear(year); if (day < 212) { if (day < 59) month = 1; else if (day < 90) month = 2; else if (day < 120) month = 3; else if (day < 151) month = 4; else if (day < 181) month = 5; else month = 6; } else { if (day < 243) month = 7; else if (day < 273) month = 8; else if (day < 304) month = 9; else if (day < 334) month = 10; else if (day < 365) month = 11; else assert(0); } } return month; } /******************************* * Compute which day in a month a d_time t is. * Returns: * Integer in the range 1..31 */ int dateFromTime(d_time t) { auto year = yearFromTime(t); auto day = day(t) - dayFromYear(year); auto leap = leapYear(year); auto month = monthFromTime(t); int date; switch (month) { case 0: date = day + 1; break; case 1: date = day - 30; break; case 2: date = day - 58 - leap; break; case 3: date = day - 89 - leap; break; case 4: date = day - 119 - leap; break; case 5: date = day - 150 - leap; break; case 6: date = day - 180 - leap; break; case 7: date = day - 211 - leap; break; case 8: date = day - 242 - leap; break; case 9: date = day - 272 - leap; break; case 10: date = day - 303 - leap; break; case 11: date = day - 333 - leap; break; default: assert(0); } return date; } /******************************* * Compute which day of the week a d_time t is. * Returns: * Integer in the range 0..6, where 0 represents Sunday * and 6 represents Saturday. */ int weekDay(d_time t) { auto w = (cast(int)day(t) + 4) % 7; if (w < 0) w += 7; return w; } /*********************************** * Convert from UTC to local time. */ d_time UTCtoLocalTime(d_time t) { return (t == d_time_nan) ? d_time_nan : t + localTZA + daylightSavingTA(t); } /*********************************** * Convert from local time to UTC. */ d_time localTimetoUTC(d_time t) { return (t == d_time_nan) ? d_time_nan /* BUGZILLA 1752 says this line should be: * : t - localTZA - daylightSavingTA(t); */ : t - localTZA - daylightSavingTA(t - localTZA); } d_time makeTime(d_time hour, d_time min, d_time sec, d_time ms) { return hour * ticksPerHour + min * ticksPerMinute + sec * ticksPerSecond + ms * ticksPerMs; } /* ***************************** * Params: * month = 0..11 * date = day of month, 1..31 * Returns: * number of days since start of epoch */ d_time makeDay(d_time year, d_time month, d_time date) { const y = cast(int)(year + floor(month, 12)); const m = dmod(month, 12); const leap = leapYear(y); auto t = timeFromYear(y) + cast(d_time) mdays[m] * msPerDay; if (leap && month >= 2) t += msPerDay; if (yearFromTime(t) != y || monthFromTime(t) != m || dateFromTime(t) != 1) { return d_time_nan; } return day(t) + date - 1; } d_time makeDate(d_time day, d_time time) { if (day == d_time_nan || time == d_time_nan) return d_time_nan; return day * ticksPerDay + time; } d_time timeClip(d_time time) { //printf("TimeClip(%g) = %g\n", time, toInteger(time)); return toInteger(time); } /*************************************** * Determine the date in the month, 1..31, of the nth * weekday. * Params: * year = year * month = month, 1..12 * weekday = day of week 0..6 representing Sunday..Saturday * n = nth occurrence of that weekday in the month, 1..5, where * 5 also means "the last occurrence in the month" * Returns: * the date in the month, 1..31, of the nth weekday */ int dateFromNthWeekdayOfMonth(int year, int month, int weekday, int n) in { assert(1 <= month && month <= 12); assert(0 <= weekday && weekday <= 6); assert(1 <= n && n <= 5); } body { // Get day of the first of the month auto x = makeDay(year, month - 1, 1); // Get the week day 0..6 of the first of this month auto wd = weekDay(makeDate(x, 0)); // Get monthday of first occurrence of weekday in this month auto mday = weekday - wd + 1; if (mday < 1) mday += 7; // Add in number of weeks mday += (n - 1) * 7; // If monthday is more than the number of days in the month, // back up to 'last' occurrence if (mday > 28 && mday > daysInMonth(year, month)) { assert(n == 5); mday -= 7; } return mday; } unittest { assert(dateFromNthWeekdayOfMonth(2003, 3, 0, 5) == 30); assert(dateFromNthWeekdayOfMonth(2003, 10, 0, 5) == 26); assert(dateFromNthWeekdayOfMonth(2004, 3, 0, 5) == 28); assert(dateFromNthWeekdayOfMonth(2004, 10, 0, 5) == 31); } /************************************** * Determine the number of days in a month, 1..31. * Params: * month = 1..12 */ int daysInMonth(int year, int month) { switch (month) { case 1: case 3: case 5: case 7: case 8: case 10: case 12: return 31; case 2: return 28 + leapYear(year); case 4: case 6: case 9: case 11: return 30; default: break; } return enforce(false, "Invalid month passed to daysInMonth"); } unittest { assert(daysInMonth(2003, 2) == 28); assert(daysInMonth(2004, 2) == 29); } /************************************* * Converts UTC time into a text string of the form: * "Www Mmm dd hh:mm:ss GMT+-TZ yyyy". * For example, "Tue Apr 02 02:04:57 GMT-0800 1996". * If time is invalid, i.e. is d_time_nan, * the string "Invalid date" is returned. * * Example: * ------------------------------------ d_time lNow; char[] lNowString; // Grab the date and time relative to UTC lNow = std.date.getUTCtime(); // Convert this into the local date and time for display. lNowString = std.date.UTCtoString(lNow); * ------------------------------------ */ string UTCtoString(d_time time) { // Years are supposed to be -285616 .. 285616, or 7 digits // "Tue Apr 02 02:04:57 GMT-0800 1996" auto buffer = new char[29 + 7 + 1]; if (time == d_time_nan) return "Invalid Date"; auto dst = daylightSavingTA(time); auto offset = localTZA + dst; auto t = time + offset; auto sign = '+'; if (offset < 0) { sign = '-'; // offset = -offset; offset = -(localTZA + dst); } auto mn = cast(int)(offset / msPerMinute); auto hr = mn / 60; mn %= 60; //printf("hr = %d, offset = %g, localTZA = %g, dst = %g, + = %g\n", hr, offset, localTZA, dst, localTZA + dst); auto len = sprintf(buffer.ptr, "%.3s %.3s %02d %02d:%02d:%02d GMT%c%02d%02d %d", &daystr[weekDay(t) * 3], &monstr[monthFromTime(t) * 3], dateFromTime(t), hourFromTime(t), minFromTime(t), secFromTime(t), sign, hr, mn, cast(long)yearFromTime(t)); // Ensure no buggy buffer overflows //printf("len = %d, buffer.length = %d\n", len, buffer.length); assert(len < buffer.length); buffer = buffer[0 .. len]; return assumeUnique(buffer); } /// Alias for UTCtoString (deprecated). deprecated alias UTCtoString toString; /*********************************** * Converts t into a text string of the form: "Www, dd Mmm yyyy hh:mm:ss UTC". * If t is invalid, "Invalid date" is returned. */ string toUTCString(d_time t) { // Years are supposed to be -285616 .. 285616, or 7 digits // "Tue, 02 Apr 1996 02:04:57 GMT" auto buffer = new char[25 + 7 + 1]; if (t == d_time_nan) return "Invalid Date"; auto len = sprintf(buffer.ptr, "%.3s, %02d %.3s %d %02d:%02d:%02d UTC", &daystr[weekDay(t) * 3], dateFromTime(t), &monstr[monthFromTime(t) * 3], yearFromTime(t), hourFromTime(t), minFromTime(t), secFromTime(t)); // Ensure no buggy buffer overflows assert(len < buffer.length); return cast(string) buffer[0 .. len]; } /************************************ * Converts the date portion of time into a text string of the form: "Www Mmm dd * yyyy", for example, "Tue Apr 02 1996". * If time is invalid, "Invalid date" is returned. */ string toDateString(d_time time) { // Years are supposed to be -285616 .. 285616, or 7 digits // "Tue Apr 02 1996" auto buffer = new char[29 + 7 + 1]; if (time == d_time_nan) return "Invalid Date"; auto dst = daylightSavingTA(time); auto offset = localTZA + dst; auto t = time + offset; auto len = sprintf(buffer.ptr, "%.3s %.3s %02d %d", &daystr[weekDay(t) * 3], &monstr[monthFromTime(t) * 3], dateFromTime(t), cast(long)yearFromTime(t)); // Ensure no buggy buffer overflows assert(len < buffer.length); return cast(string) buffer[0 .. len]; } /****************************************** * Converts the time portion of t into a text string of the form: "hh:mm:ss * GMT+-TZ", for example, "02:04:57 GMT-0800". * If t is invalid, "Invalid date" is returned. * The input must be in UTC, and the output is in local time. */ string toTimeString(d_time time) { // "02:04:57 GMT-0800" auto buffer = new char[17 + 1]; if (time == d_time_nan) return "Invalid Date"; auto dst = daylightSavingTA(time); auto offset = localTZA + dst; auto t = time + offset; auto sign = '+'; if (offset < 0) { sign = '-'; // offset = -offset; offset = -(localTZA + dst); } auto mn = cast(int)(offset / msPerMinute); auto hr = mn / 60; mn %= 60; //printf("hr = %d, offset = %g, localTZA = %g, dst = %g, + = %g\n", hr, offset, localTZA, dst, localTZA + dst); auto len = sprintf(buffer.ptr, "%02d:%02d:%02d GMT%c%02d%02d", hourFromTime(t), minFromTime(t), secFromTime(t), sign, hr, mn); // Ensure no buggy buffer overflows assert(len < buffer.length); // Lop off terminating 0 return cast(string) buffer[0 .. len]; } /****************************************** * Parses s as a textual date string, and returns it as a d_time. If * the string is not a valid date, $(D d_time_nan) is returned. */ d_time parse(string s) { try { Date dp; dp.parse(s); auto time = makeTime(dp.hour, dp.minute, dp.second, dp.ms); // Assume UTC if no tzcorrection is set (runnable/testdate). if (dp.tzcorrection != int.min) { time += cast(d_time)(dp.tzcorrection / 100) * msPerHour + cast(d_time)(dp.tzcorrection % 100) * msPerMinute; } auto day = makeDay(dp.year, dp.month - 1, dp.day); auto result = makeDate(day,time); return timeClip(result); } catch (Exception e) { return d_time_nan; // erroneous date string } } extern(C) void std_date_static_this() { localTZA = getLocalTZA(); } version (Windows) { private import core.sys.windows.windows; //import c.time; /****** * Get current UTC time. */ d_time getUTCtime() { SYSTEMTIME st; GetSystemTime(&st); // get time in UTC return SYSTEMTIME2d_time(&st, 0); //return c.time.time(null) * ticksPerSecond; } static d_time FILETIME2d_time(const FILETIME *ft) { SYSTEMTIME st = void; if (!FileTimeToSystemTime(ft, &st)) return d_time_nan; return SYSTEMTIME2d_time(&st, 0); } FILETIME d_time2FILETIME(d_time dt) { static assert(10_000_000 >= ticksPerSecond); static assert(10_000_000 % ticksPerSecond == 0); enum ulong ticksFrom1601To1970 = 11_644_473_600UL * ticksPerSecond; ulong t = (dt + ticksFrom1601To1970) * (10_000_000 / ticksPerSecond); FILETIME result = void; result.dwLowDateTime = cast(uint) (t & uint.max); result.dwHighDateTime = cast(uint) (t >> 32); return result; } unittest { auto dt = getUTCtime(); auto ft = d_time2FILETIME(dt); auto dt1 = FILETIME2d_time(&ft); assert(dt == dt1, text(dt, " != ", dt1)); } static d_time SYSTEMTIME2d_time(const SYSTEMTIME *st, d_time t) { /* More info: http://delphicikk.atw.hu/listaz.php?id=2667&oldal=52 */ d_time day = void; d_time time = void; if (st.wYear) { time = makeTime(st.wHour, st.wMinute, st.wSecond, st.wMilliseconds); day = makeDay(st.wYear, st.wMonth - 1, st.wDay); } else { /* wYear being 0 is a flag to indicate relative time: * wMonth is the month 1..12 * wDayOfWeek is weekday 0..6 corresponding to Sunday..Saturday * wDay is the nth time, 1..5, that wDayOfWeek occurs */ auto year = yearFromTime(t); auto mday = dateFromNthWeekdayOfMonth(year, st.wMonth, st.wDay, st.wDayOfWeek); day = makeDay(year, st.wMonth - 1, mday); time = makeTime(st.wHour, st.wMinute, 0, 0); } auto n = makeDate(day,time); return timeClip(n); } d_time getLocalTZA() { TIME_ZONE_INFORMATION tzi = void; /* http://msdn.microsoft.com/library/en-us/sysinfo/base/gettimezoneinformation.asp * http://msdn2.microsoft.com/en-us/library/ms725481.aspx */ auto r = GetTimeZoneInformation(&tzi); //printf("bias = %d\n", tzi.Bias); //printf("standardbias = %d\n", tzi.StandardBias); //printf("daylightbias = %d\n", tzi.DaylightBias); switch (r) { case TIME_ZONE_ID_STANDARD: return -(tzi.Bias + tzi.StandardBias) * cast(d_time)(60 * ticksPerSecond); case TIME_ZONE_ID_DAYLIGHT: // falthrough //t = -(tzi.Bias + tzi.DaylightBias) * cast(d_time)(60 * ticksPerSecond); //break; case TIME_ZONE_ID_UNKNOWN: return -(tzi.Bias) * cast(d_time)(60 * ticksPerSecond); default: return 0; } } /* * Get daylight savings time adjust for time dt. */ int daylightSavingTA(d_time dt) { TIME_ZONE_INFORMATION tzi = void; d_time ts; d_time td; /* http://msdn.microsoft.com/library/en-us/sysinfo/base/gettimezoneinformation.asp */ auto r = GetTimeZoneInformation(&tzi); auto t = 0; switch (r) { case TIME_ZONE_ID_STANDARD: case TIME_ZONE_ID_DAYLIGHT: if (tzi.StandardDate.wMonth == 0 || tzi.DaylightDate.wMonth == 0) break; ts = SYSTEMTIME2d_time(&tzi.StandardDate, dt); td = SYSTEMTIME2d_time(&tzi.DaylightDate, dt); if (td <= dt && dt < ts) { t = -tzi.DaylightBias * (60 * ticksPerSecond); //printf("DST is in effect, %d\n", t); } else { //printf("no DST\n"); } break; case TIME_ZONE_ID_UNKNOWN: // Daylight savings time not used in this time zone break; default: assert(0); } return t; } } version (Posix) { private import core.sys.posix.time; private import core.sys.posix.sys.time; /****** * Get current UTC time. */ d_time getUTCtime() { timeval tv; //printf("getUTCtime()\n"); if (gettimeofday(&tv, null)) { // Some error happened - try time() instead return time(null) * ticksPerSecond; } return tv.tv_sec * cast(d_time)ticksPerSecond + (tv.tv_usec / (1000000 / cast(d_time)ticksPerSecond)); } d_time getLocalTZA() { time_t t; time(&t); version (OSX) { tm result; localtime_r(&t, &result); return result.tm_gmtoff * ticksPerSecond; } else version (FreeBSD) { tm result; localtime_r(&t, &result); return result.tm_gmtoff * ticksPerSecond; } else { localtime(&t); // this will set timezone return -(timezone * ticksPerSecond); } } /* * Get daylight savings time adjust for time dt. */ int daylightSavingTA(d_time dt) { tm *tmp; time_t t; int dst = 0; if (dt != d_time_nan) { d_time seconds = dt / ticksPerSecond; t = cast(time_t) seconds; if (t == seconds) // if in range { tmp = localtime(&t); if (tmp.tm_isdst > 0) dst = ticksPerHour; // BUG: Assume daylight savings time is plus one hour. } else // out of range for system time, use our own calculation { /* BUG: this works for the US, but not other timezones. */ dt -= localTZA; int year = yearFromTime(dt); /* Compute time given year, month 1..12, * week in month, weekday, hour */ d_time dstt(int year, int month, int week, int weekday, int hour) { auto mday = dateFromNthWeekdayOfMonth(year, month, weekday, week); return timeClip(makeDate( makeDay(year, month - 1, mday), makeTime(hour, 0, 0, 0))); } d_time start; d_time end; if (year < 2007) { // Daylight savings time goes from 2 AM the first Sunday // in April through 2 AM the last Sunday in October start = dstt(year, 4, 1, 0, 2); end = dstt(year, 10, 5, 0, 2); } else { // the second Sunday of March to // the first Sunday in November start = dstt(year, 3, 2, 0, 2); end = dstt(year, 11, 1, 0, 2); } if (start <= dt && dt < end) dst = ticksPerHour; //writefln("start = %s, dt = %s, end = %s, dst = %s", start, dt, end, dst); } } return dst; } } /+ DOS File Time +/ /*** * Type representing the DOS file date/time format. */ alias uint DosFileTime; /************************************ * Convert from DOS file date/time to d_time. */ d_time toDtime(DosFileTime time) { uint dt = cast(uint)time; if (dt == 0) return d_time_nan; int year = ((dt >> 25) & 0x7F) + 1980; int month = ((dt >> 21) & 0x0F) - 1; // 0..12 int dayofmonth = ((dt >> 16) & 0x1F); // 0..31 int hour = (dt >> 11) & 0x1F; // 0..23 int minute = (dt >> 5) & 0x3F; // 0..59 int second = (dt << 1) & 0x3E; // 0..58 (in 2 second increments) d_time t; t = undead.date.makeDate(undead.date.makeDay(year, month, dayofmonth), undead.date.makeTime(hour, minute, second, 0)); assert(yearFromTime(t) == year); assert(monthFromTime(t) == month); assert(dateFromTime(t) == dayofmonth); assert(hourFromTime(t) == hour); assert(minFromTime(t) == minute); assert(secFromTime(t) == second); t -= localTZA + daylightSavingTA(t); return t; } /**************************************** * Convert from d_time to DOS file date/time. */ DosFileTime toDosFileTime(d_time t) { uint dt; if (t == d_time_nan) return cast(DosFileTime)0; t += localTZA + daylightSavingTA(t); uint year = yearFromTime(t); uint month = monthFromTime(t); uint dayofmonth = dateFromTime(t); uint hour = hourFromTime(t); uint minute = minFromTime(t); uint second = secFromTime(t); dt = (year - 1980) << 25; dt |= ((month + 1) & 0x0F) << 21; dt |= (dayofmonth & 0x1F) << 16; dt |= (hour & 0x1F) << 11; dt |= (minute & 0x3F) << 5; dt |= (second >> 1) & 0x1F; return cast(DosFileTime)dt; } /** Benchmarks code for speed assessment and comparison. Params: fun = aliases of callable objects (e.g. function names). Each should take no arguments. times = The number of times each function is to be executed. result = The optional store for the return value. If $(D null) is passed in, new store is allocated appropriately. Returns: An array of $(D n) $(D uint)s. Element at slot $(D i) contains the number of milliseconds spent in calling the $(D i)th function $(D times) times. Example: ---- int a; void f0() { } void f1() { auto b = a; } void f2() { auto b = to!(string)(a); } auto r = benchmark!(f0, f1, f2)(10_000_000); ---- */ ulong[] benchmark(fun...)(uint times, ulong[] result = null) { result.length = fun.length; result.length = 0; foreach (i, Unused; fun) { immutable t = getUTCtime(); foreach (j; 0 .. times) { fun[i](); } immutable delta = getUTCtime() - t; result ~= cast(uint)delta; } foreach (ref e; result) { e *= 1000; e /= ticksPerSecond; } return result; } unittest { int a; void f0() { } //void f1() { auto b = to!(string)(a); } void f2() { auto b = (a); } auto r = benchmark!(f0, f2)(100); //writeln(r); } undeaD-1.0.10/src/undead/datebase.d000066400000000000000000000013561346374113600167520ustar00rootroot00000000000000// Written in the D programming language. /** * The only purpose of this module is to do the static construction for * std.date, to eliminate cyclic construction errors. * * Copyright: Copyright Digital Mars 2000 - 2009. * License: Boost License 1.0. * Authors: $(WEB digitalmars.com, Walter Bright) * Source: $(PHOBOSSRC std/_datebase.d) */ /* * Copyright Digital Mars 2000 - 2009. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ module undead.datebase; extern(C) void std_date_static_this(); shared static this() { std_date_static_this(); } undeaD-1.0.10/src/undead/dateparse.d000066400000000000000000000576171346374113600171650ustar00rootroot00000000000000// Written in the D programming language. /** * $(RED Deprecated. It will be removed in February 2012. * Please use std.datetime instead.) * * dateparse module. * * Copyright: Copyright Digital Mars 2000 - 2009. * License: Boost License 1.0. * Authors: $(WEB digitalmars.com, Walter Bright) * Source: $(PHOBOSSRC std/_dateparse.d) */ /* * Copyright Digital Mars 2000 - 2009. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ module undead.dateparse; private { import std.algorithm, std.string; import core.stdc.stdlib; import undead.date; } //deprecated: //debug=dateparse; class DateParseError : Error { this(string s) { super("Invalid date string: " ~ s); } } struct DateParse { void parse(string s, out Date date) { this = DateParse.init; //version (Win32) buffer = (cast(char *)alloca(s.length))[0 .. s.length]; //else //buffer = new char[s.length]; debug(dateparse) printf("DateParse.parse('%.*s')\n", s); if (!parseString(s)) { goto Lerror; } /+ if (year == year.init) year = 0; else +/ debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n", year, month, day, hours, minutes, seconds, ms, weekday, tzcorrection); if ( year == year.init || (month < 1 || month > 12) || (day < 1 || day > 31) || (hours < 0 || hours > 23) || (minutes < 0 || minutes > 59) || (seconds < 0 || seconds > 59) || (tzcorrection != int.min && ((tzcorrection < -2300 || tzcorrection > 2300) || (tzcorrection % 10))) ) { Lerror: throw new DateParseError(s); } if (ampm) { if (hours > 12) goto Lerror; if (hours < 12) { if (ampm == 2) // if P.M. hours += 12; } else if (ampm == 1) // if 12am { hours = 0; // which is midnight } } // if (tzcorrection != tzcorrection.init) // tzcorrection /= 100; if (year >= 0 && year <= 99) year += 1900; date.year = year; date.month = month; date.day = day; date.hour = hours; date.minute = minutes; date.second = seconds; date.ms = ms; date.weekday = weekday; date.tzcorrection = tzcorrection; } private: int year = int.min; // our "nan" Date value int month; // 1..12 int day; // 1..31 int hours; // 0..23 int minutes; // 0..59 int seconds; // 0..59 int ms; // 0..999 int weekday; // 1..7 int ampm; // 0: not specified // 1: AM // 2: PM int tzcorrection = int.min; // -1200..1200 correction in hours string s; int si; int number; char[] buffer; enum DP : byte { err, weekday, month, number, end, colon, minus, slash, ampm, plus, tz, dst, dsttz, } DP nextToken() { int nest; uint c; int bi; DP result = DP.err; //printf("DateParse::nextToken()\n"); for (;;) { assert(si <= s.length); if (si == s.length) { result = DP.end; goto Lret; } //printf("\ts[%d] = '%c'\n", si, s[si]); switch (s[si]) { case ':': result = DP.colon; goto ret_inc; case '+': result = DP.plus; goto ret_inc; case '-': result = DP.minus; goto ret_inc; case '/': result = DP.slash; goto ret_inc; case '.': version(DATE_DOT_DELIM) { result = DP.slash; goto ret_inc; } else { si++; break; } ret_inc: si++; goto Lret; case ' ': case '\n': case '\r': case '\t': case ',': si++; break; case '(': // comment nest = 1; for (;;) { si++; if (si == s.length) goto Lret; // error switch (s[si]) { case '(': nest++; break; case ')': if (--nest == 0) goto Lendofcomment; break; default: break; } } Lendofcomment: si++; break; default: number = 0; for (;;) { if (si == s.length) // c cannot be undefined here break; c = s[si]; if (!(c >= '0' && c <= '9')) break; result = DP.number; number = number * 10 + (c - '0'); si++; } if (result == DP.number) goto Lret; bi = 0; bufloop: while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { if (c < 'a') // if upper case c += cast(uint)'a' - cast(uint)'A'; // to lower case buffer[bi] = cast(char)c; bi++; do { si++; if (si == s.length) break bufloop; c = s[si]; } while (c == '.'); // ignore embedded '.'s } result = classify(buffer[0 .. bi].idup); goto Lret; } } Lret: //printf("-DateParse::nextToken()\n"); return result; } DP classify(string buf) { struct DateID { string name; DP tok; short value; } static immutable DateID[] dateidtab = [ { "january", DP.month, 1}, { "february", DP.month, 2}, { "march", DP.month, 3}, { "april", DP.month, 4}, { "may", DP.month, 5}, { "june", DP.month, 6}, { "july", DP.month, 7}, { "august", DP.month, 8}, { "september", DP.month, 9}, { "october", DP.month, 10}, { "november", DP.month, 11}, { "december", DP.month, 12}, { "jan", DP.month, 1}, { "feb", DP.month, 2}, { "mar", DP.month, 3}, { "apr", DP.month, 4}, { "jun", DP.month, 6}, { "jul", DP.month, 7}, { "aug", DP.month, 8}, { "sep", DP.month, 9}, { "sept", DP.month, 9}, { "oct", DP.month, 10}, { "nov", DP.month, 11}, { "dec", DP.month, 12}, { "sunday", DP.weekday, 1}, { "monday", DP.weekday, 2}, { "tuesday", DP.weekday, 3}, { "tues", DP.weekday, 3}, { "wednesday", DP.weekday, 4}, { "wednes", DP.weekday, 4}, { "thursday", DP.weekday, 5}, { "thur", DP.weekday, 5}, { "thurs", DP.weekday, 5}, { "friday", DP.weekday, 6}, { "saturday", DP.weekday, 7}, { "sun", DP.weekday, 1}, { "mon", DP.weekday, 2}, { "tue", DP.weekday, 3}, { "wed", DP.weekday, 4}, { "thu", DP.weekday, 5}, { "fri", DP.weekday, 6}, { "sat", DP.weekday, 7}, { "am", DP.ampm, 1}, { "pm", DP.ampm, 2}, { "gmt", DP.tz, +000}, { "ut", DP.tz, +000}, { "utc", DP.tz, +000}, { "wet", DP.tz, +000}, { "z", DP.tz, +000}, { "wat", DP.tz, +100}, { "a", DP.tz, +100}, { "at", DP.tz, +200}, { "b", DP.tz, +200}, { "c", DP.tz, +300}, { "ast", DP.tz, +400}, { "d", DP.tz, +400}, { "est", DP.tz, +500}, { "e", DP.tz, +500}, { "cst", DP.tz, +600}, { "f", DP.tz, +600}, { "mst", DP.tz, +700}, { "g", DP.tz, +700}, { "pst", DP.tz, +800}, { "h", DP.tz, +800}, { "yst", DP.tz, +900}, { "i", DP.tz, +900}, { "ahst", DP.tz, +1000}, { "cat", DP.tz, +1000}, { "hst", DP.tz, +1000}, { "k", DP.tz, +1000}, { "nt", DP.tz, +1100}, { "l", DP.tz, +1100}, { "idlw", DP.tz, +1200}, { "m", DP.tz, +1200}, { "cet", DP.tz, -100}, { "fwt", DP.tz, -100}, { "met", DP.tz, -100}, { "mewt", DP.tz, -100}, { "swt", DP.tz, -100}, { "n", DP.tz, -100}, { "eet", DP.tz, -200}, { "o", DP.tz, -200}, { "bt", DP.tz, -300}, { "p", DP.tz, -300}, { "zp4", DP.tz, -400}, { "q", DP.tz, -400}, { "zp5", DP.tz, -500}, { "r", DP.tz, -500}, { "zp6", DP.tz, -600}, { "s", DP.tz, -600}, { "wast", DP.tz, -700}, { "t", DP.tz, -700}, { "cct", DP.tz, -800}, { "u", DP.tz, -800}, { "jst", DP.tz, -900}, { "v", DP.tz, -900}, { "east", DP.tz, -1000}, { "gst", DP.tz, -1000}, { "w", DP.tz, -1000}, { "x", DP.tz, -1100}, { "idle", DP.tz, -1200}, { "nzst", DP.tz, -1200}, { "nzt", DP.tz, -1200}, { "y", DP.tz, -1200}, { "bst", DP.dsttz, 000}, { "adt", DP.dsttz, +400}, { "edt", DP.dsttz, +500}, { "cdt", DP.dsttz, +600}, { "mdt", DP.dsttz, +700}, { "pdt", DP.dsttz, +800}, { "ydt", DP.dsttz, +900}, { "hdt", DP.dsttz, +1000}, { "mest", DP.dsttz, -100}, { "mesz", DP.dsttz, -100}, { "sst", DP.dsttz, -100}, { "fst", DP.dsttz, -100}, { "wadt", DP.dsttz, -700}, { "eadt", DP.dsttz, -1000}, { "nzdt", DP.dsttz, -1200}, { "dst", DP.dst, 0}, ]; //message(DTEXT("DateParse::classify('%s')\n"), buf); // Do a linear search. Yes, it would be faster with a binary // one. for (uint i = 0; i < dateidtab.length; i++) { if (cmp(dateidtab[i].name, buf) == 0) { number = dateidtab[i].value; return dateidtab[i].tok; } } return DP.err; } int parseString(string s) { int n1; int dp; int sisave; int result; //message(DTEXT("DateParse::parseString('%ls')\n"), s); this.s = s; si = 0; dp = nextToken(); for (;;) { //message(DTEXT("\tdp = %d\n"), dp); switch (dp) { case DP.end: result = 1; Lret: return result; case DP.err: case_error: //message(DTEXT("\terror\n")); default: result = 0; goto Lret; case DP.minus: break; // ignore spurious '-' case DP.weekday: weekday = number; break; case DP.month: // month day, [year] month = number; dp = nextToken(); if (dp == DP.number) { day = number; sisave = si; dp = nextToken(); if (dp == DP.number) { n1 = number; dp = nextToken(); if (dp == DP.colon) { // back up, not a year si = sisave; } else { year = n1; continue; } break; } } continue; case DP.number: n1 = number; dp = nextToken(); switch (dp) { case DP.end: year = n1; break; case DP.minus: case DP.slash: // n1/ ? ? ? dp = parseCalendarDate(n1); if (dp == DP.err) goto case_error; break; case DP.colon: // hh:mm [:ss] [am | pm] dp = parseTimeOfDay(n1); if (dp == DP.err) goto case_error; break; case DP.ampm: hours = n1; minutes = 0; seconds = 0; ampm = number; break; case DP.month: day = n1; month = number; dp = nextToken(); if (dp == DP.number) { // day month year year = number; dp = nextToken(); } break; default: year = n1; break; } continue; } dp = nextToken(); } // @@@ bug in the compiler: this is never reachable assert(0); } int parseCalendarDate(int n1) { int n2; int n3; int dp; debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1); dp = nextToken(); if (dp == DP.month) // day/month { day = n1; month = number; dp = nextToken(); if (dp == DP.number) { // day/month year year = number; dp = nextToken(); } else if (dp == DP.minus || dp == DP.slash) { // day/month/year dp = nextToken(); if (dp != DP.number) goto case_error; year = number; dp = nextToken(); } return dp; } if (dp != DP.number) goto case_error; n2 = number; //message(DTEXT("\tn2 = %d\n"), n2); dp = nextToken(); if (dp == DP.minus || dp == DP.slash) { dp = nextToken(); if (dp != DP.number) goto case_error; n3 = number; //message(DTEXT("\tn3 = %d\n"), n3); dp = nextToken(); // case1: year/month/day // case2: month/day/year int case1, case2; case1 = (n1 > 12 || (n2 >= 1 && n2 <= 12) && (n3 >= 1 && n3 <= 31)); case2 = ((n1 >= 1 && n1 <= 12) && (n2 >= 1 && n2 <= 31) || n3 > 31); if (case1 == case2) goto case_error; if (case1) { year = n1; month = n2; day = n3; } else { month = n1; day = n2; year = n3; } } else { // must be month/day month = n1; day = n2; } return dp; case_error: return DP.err; } int parseTimeOfDay(int n1) { int dp; int sign; // 12am is midnight // 12pm is noon //message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1); hours = n1; dp = nextToken(); if (dp != DP.number) goto case_error; minutes = number; dp = nextToken(); if (dp == DP.colon) { dp = nextToken(); if (dp != DP.number) goto case_error; seconds = number; dp = nextToken(); } else seconds = 0; if (dp == DP.ampm) { ampm = number; dp = nextToken(); } else if (dp == DP.plus || dp == DP.minus) { Loffset: sign = (dp == DP.minus) ? -1 : 1; dp = nextToken(); if (dp != DP.number) goto case_error; tzcorrection = -sign * number; dp = nextToken(); } else if (dp == DP.tz) { tzcorrection = number; dp = nextToken(); if (number == 0 && (dp == DP.plus || dp == DP.minus)) goto Loffset; if (dp == DP.dst) { tzcorrection += 100; dp = nextToken(); } } else if (dp == DP.dsttz) { tzcorrection = number; dp = nextToken(); } return dp; case_error: return DP.err; } } unittest { DateParse dp; Date d; dp.parse("March 10, 1959 12:00 -800", d); assert(d.year == 1959); assert(d.month == 3); assert(d.day == 10); assert(d.hour == 12); assert(d.minute == 0); assert(d.second == 0); assert(d.ms == 0); assert(d.weekday == 0); assert(d.tzcorrection == 800); dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d); assert(d.year == 1996); assert(d.month == 4); assert(d.day == 2); assert(d.hour == 2); assert(d.minute == 4); assert(d.second == 57); assert(d.ms == 0); assert(d.weekday == 3); assert(d.tzcorrection == 800); dp.parse("March 14, -1980 21:14:50", d); assert(d.year == 1980); assert(d.month == 3); assert(d.day == 14); assert(d.hour == 21); assert(d.minute == 14); assert(d.second == 50); assert(d.ms == 0); assert(d.weekday == 0); assert(d.tzcorrection == int.min); dp.parse("Tue Apr 02 02:04:57 1996", d); assert(d.year == 1996); assert(d.month == 4); assert(d.day == 2); assert(d.hour == 2); assert(d.minute == 4); assert(d.second == 57); assert(d.ms == 0); assert(d.weekday == 3); assert(d.tzcorrection == int.min); dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d); assert(d.year == 1996); assert(d.month == 4); assert(d.day == 2); assert(d.hour == 2); assert(d.minute == 4); assert(d.second == 57); assert(d.ms == 0); assert(d.weekday == 3); assert(d.tzcorrection == 0); dp.parse("December 31, 3000", d); assert(d.year == 3000); assert(d.month == 12); assert(d.day == 31); assert(d.hour == 0); assert(d.minute == 0); assert(d.second == 0); assert(d.ms == 0); assert(d.weekday == 0); assert(d.tzcorrection == int.min); dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d); assert(d.year == 1969); assert(d.month == 12); assert(d.day == 31); assert(d.hour == 16); assert(d.minute == 0); assert(d.second == 0); assert(d.ms == 0); assert(d.weekday == 4); assert(d.tzcorrection == 0); dp.parse("1/1/1999 12:30 AM", d); assert(d.year == 1999); assert(d.month == 1); assert(d.day == 1); assert(d.hour == 0); assert(d.minute == 30); assert(d.second == 0); assert(d.ms == 0); assert(d.weekday == 0); assert(d.tzcorrection == int.min); dp.parse("Tue, 20 May 2003 15:38:58 +0530", d); assert(d.year == 2003); assert(d.month == 5); assert(d.day == 20); assert(d.hour == 15); assert(d.minute == 38); assert(d.second == 58); assert(d.ms == 0); assert(d.weekday == 3); assert(d.tzcorrection == -530); debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n", d.year, d.month, d.day, d.hour, d.minute, d.second, d.ms, d.weekday, d.tzcorrection); } undeaD-1.0.10/src/undead/doformat.d000066400000000000000000001173041346374113600170160ustar00rootroot00000000000000// Written in the D programming language. /** Copyright: Copyright Digital Mars 2000-2013. License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0). Authors: $(HTTP walterbright.com, Walter Bright), $(HTTP erdani.com, Andrei Alexandrescu), and Kenji Hara Source: $(PHOBOSSRC std/_format.d) */ module undead.doformat; //debug=format; // uncomment to turn on debugging printf's import core.vararg; import std.exception; import std.meta; import std.range.primitives; import std.traits; import std.format; version(CRuntime_DigitalMars) { version = DigitalMarsC; } version (DigitalMarsC) { // This is DMC's internal floating point formatting function extern (C) { extern shared char* function(int c, int flags, int precision, in real* pdval, char* buf, size_t* psl, int width) __pfloatfmt; } } /********************************************************************** * Signals a mismatch between a format and its corresponding argument. */ class FormatException : Exception { @safe pure nothrow this() { super("format error"); } @safe pure nothrow this(string msg, string fn = __FILE__, size_t ln = __LINE__, Throwable next = null) { super(msg, fn, ln, next); } } // Legacy implementation enum Mangle : char { Tvoid = 'v', Tbool = 'b', Tbyte = 'g', Tubyte = 'h', Tshort = 's', Tushort = 't', Tint = 'i', Tuint = 'k', Tlong = 'l', Tulong = 'm', Tfloat = 'f', Tdouble = 'd', Treal = 'e', Tifloat = 'o', Tidouble = 'p', Tireal = 'j', Tcfloat = 'q', Tcdouble = 'r', Tcreal = 'c', Tchar = 'a', Twchar = 'u', Tdchar = 'w', Tarray = 'A', Tsarray = 'G', Taarray = 'H', Tpointer = 'P', Tfunction = 'F', Tident = 'I', Tclass = 'C', Tstruct = 'S', Tenum = 'E', Ttypedef = 'T', Tdelegate = 'D', Tconst = 'x', Timmutable = 'y', } // return the TypeInfo for a primitive type and null otherwise. This // is required since for arrays of ints we only have the mangled char // to work from. If arrays always subclassed TypeInfo_Array this // routine could go away. private TypeInfo primitiveTypeInfo(Mangle m) { // BUG: should fix this in static this() to avoid double checked locking bug __gshared TypeInfo[Mangle] dic; if (!dic.length) { dic = [ Mangle.Tvoid : typeid(void), Mangle.Tbool : typeid(bool), Mangle.Tbyte : typeid(byte), Mangle.Tubyte : typeid(ubyte), Mangle.Tshort : typeid(short), Mangle.Tushort : typeid(ushort), Mangle.Tint : typeid(int), Mangle.Tuint : typeid(uint), Mangle.Tlong : typeid(long), Mangle.Tulong : typeid(ulong), Mangle.Tfloat : typeid(float), Mangle.Tdouble : typeid(double), Mangle.Treal : typeid(real), Mangle.Tifloat : typeid(ifloat), Mangle.Tidouble : typeid(idouble), Mangle.Tireal : typeid(ireal), Mangle.Tcfloat : typeid(cfloat), Mangle.Tcdouble : typeid(cdouble), Mangle.Tcreal : typeid(creal), Mangle.Tchar : typeid(char), Mangle.Twchar : typeid(wchar), Mangle.Tdchar : typeid(dchar) ]; } auto p = m in dic; return p ? *p : null; } // This stuff has been removed from the docs and is planned for deprecation. /* * Interprets variadic argument list pointed to by argptr whose types * are given by arguments[], formats them according to embedded format * strings in the variadic argument list, and sends the resulting * characters to putc. * * The variadic arguments are consumed in order. Each is formatted * into a sequence of chars, using the default format specification * for its type, and the characters are sequentially passed to putc. * If a $(D char[]), $(D wchar[]), or $(D dchar[]) argument is * encountered, it is interpreted as a format string. As many * arguments as specified in the format string are consumed and * formatted according to the format specifications in that string and * passed to putc. If there are too few remaining arguments, a * $(D FormatException) is thrown. If there are more remaining arguments than * needed by the format specification, the default processing of * arguments resumes until they are all consumed. * * Params: * putc = Output is sent do this delegate, character by character. * arguments = Array of $(D TypeInfo)s, one for each argument to be formatted. * argptr = Points to variadic argument list. * * Throws: * Mismatched arguments and formats result in a $(D FormatException) being thrown. * * Format_String: * $(I Format strings) * consist of characters interspersed with * $(I format specifications). Characters are simply copied * to the output (such as putc) after any necessary conversion * to the corresponding UTF-8 sequence. * * A $(I format specification) starts with a '%' character, * and has the following grammar: $(CONSOLE $(I FormatSpecification): $(B '%%') $(B '%') $(I Flags) $(I Width) $(I Precision) $(I FormatChar) $(I Flags): $(I empty) $(B '-') $(I Flags) $(B '+') $(I Flags) $(B '#') $(I Flags) $(B '0') $(I Flags) $(B ' ') $(I Flags) $(I Width): $(I empty) $(I Integer) $(B '*') $(I Precision): $(I empty) $(B '.') $(B '.') $(I Integer) $(B '.*') $(I Integer): $(I Digit) $(I Digit) $(I Integer) $(I Digit): $(B '0') $(B '1') $(B '2') $(B '3') $(B '4') $(B '5') $(B '6') $(B '7') $(B '8') $(B '9') $(I FormatChar): $(B 's') $(B 'b') $(B 'd') $(B 'o') $(B 'x') $(B 'X') $(B 'e') $(B 'E') $(B 'f') $(B 'F') $(B 'g') $(B 'G') $(B 'a') $(B 'A') ) $(DL $(DT $(I Flags)) $(DL $(DT $(B '-')) $(DD Left justify the result in the field. It overrides any $(B 0) flag.) $(DT $(B '+')) $(DD Prefix positive numbers in a signed conversion with a $(B +). It overrides any $(I space) flag.) $(DT $(B '#')) $(DD Use alternative formatting: $(DL $(DT For $(B 'o'):) $(DD Add to precision as necessary so that the first digit of the octal formatting is a '0', even if both the argument and the $(I Precision) are zero.) $(DT For $(B 'x') ($(B 'X')):) $(DD If non-zero, prefix result with $(B 0x) ($(B 0X)).) $(DT For floating point formatting:) $(DD Always insert the decimal point.) $(DT For $(B 'g') ($(B 'G')):) $(DD Do not elide trailing zeros.) )) $(DT $(B '0')) $(DD For integer and floating point formatting when not nan or infinity, use leading zeros to pad rather than spaces. Ignore if there's a $(I Precision).) $(DT $(B ' ')) $(DD Prefix positive numbers in a signed conversion with a space.) ) $(DT $(I Width)) $(DD Specifies the minimum field width. If the width is a $(B *), the next argument, which must be of type $(B int), is taken as the width. If the width is negative, it is as if the $(B -) was given as a $(I Flags) character.) $(DT $(I Precision)) $(DD Gives the precision for numeric conversions. If the precision is a $(B *), the next argument, which must be of type $(B int), is taken as the precision. If it is negative, it is as if there was no $(I Precision).) $(DT $(I FormatChar)) $(DD $(DL $(DT $(B 's')) $(DD The corresponding argument is formatted in a manner consistent with its type: $(DL $(DT $(B bool)) $(DD The result is 'true' or 'false'.) $(DT integral types) $(DD The $(B %d) format is used.) $(DT floating point types) $(DD The $(B %g) format is used.) $(DT string types) $(DD The result is the string converted to UTF-8.) A $(I Precision) specifies the maximum number of characters to use in the result. $(DT classes derived from $(B Object)) $(DD The result is the string returned from the class instance's $(B .toString()) method. A $(I Precision) specifies the maximum number of characters to use in the result.) $(DT non-string static and dynamic arrays) $(DD The result is [s0, s1, ...] where sk is the kth element formatted with the default format.) )) $(DT $(B 'b','d','o','x','X')) $(DD The corresponding argument must be an integral type and is formatted as an integer. If the argument is a signed type and the $(I FormatChar) is $(B d) it is converted to a signed string of characters, otherwise it is treated as unsigned. An argument of type $(B bool) is formatted as '1' or '0'. The base used is binary for $(B b), octal for $(B o), decimal for $(B d), and hexadecimal for $(B x) or $(B X). $(B x) formats using lower case letters, $(B X) uppercase. If there are fewer resulting digits than the $(I Precision), leading zeros are used as necessary. If the $(I Precision) is 0 and the number is 0, no digits result.) $(DT $(B 'e','E')) $(DD A floating point number is formatted as one digit before the decimal point, $(I Precision) digits after, the $(I FormatChar), ±, followed by at least a two digit exponent: $(I d.dddddd)e$(I ±dd). If there is no $(I Precision), six digits are generated after the decimal point. If the $(I Precision) is 0, no decimal point is generated.) $(DT $(B 'f','F')) $(DD A floating point number is formatted in decimal notation. The $(I Precision) specifies the number of digits generated after the decimal point. It defaults to six. At least one digit is generated before the decimal point. If the $(I Precision) is zero, no decimal point is generated.) $(DT $(B 'g','G')) $(DD A floating point number is formatted in either $(B e) or $(B f) format for $(B g); $(B E) or $(B F) format for $(B G). The $(B f) format is used if the exponent for an $(B e) format is greater than -5 and less than the $(I Precision). The $(I Precision) specifies the number of significant digits, and defaults to six. Trailing zeros are elided after the decimal point, if the fractional part is zero then no decimal point is generated.) $(DT $(B 'a','A')) $(DD A floating point number is formatted in hexadecimal exponential notation 0x$(I h.hhhhhh)p$(I ±d). There is one hexadecimal digit before the decimal point, and as many after as specified by the $(I Precision). If the $(I Precision) is zero, no decimal point is generated. If there is no $(I Precision), as many hexadecimal digits as necessary to exactly represent the mantissa are generated. The exponent is written in as few digits as possible, but at least one, is in decimal, and represents a power of 2 as in $(I h.hhhhhh)*2$(I ±d). The exponent for zero is zero. The hexadecimal digits, x and p are in upper case if the $(I FormatChar) is upper case.) ) Floating point NaN's are formatted as $(B nan) if the $(I FormatChar) is lower case, or $(B NAN) if upper. Floating point infinities are formatted as $(B inf) or $(B infinity) if the $(I FormatChar) is lower case, or $(B INF) or $(B INFINITY) if upper. )) Example: ------------------------- import core.stdc.stdio; import std.format; void myPrint(...) { void putc(dchar c) { fputc(c, stdout); } std.format.doFormat(&putc, _arguments, _argptr); } void main() { int x = 27; // prints 'The answer is 27:6' myPrint("The answer is %s:", x, 6); } ------------------------ */ void doFormat()(scope void delegate(dchar) putc, TypeInfo[] arguments, va_list ap) { import std.utf : encode, toUCSindex, isValidDchar, UTFException, toUTF8; import core.stdc.string : strlen; import core.stdc.stdlib : alloca, malloc, realloc, free; import core.stdc.stdio : snprintf; size_t bufLength = 1024; void* argBuffer = malloc(bufLength); scope(exit) free(argBuffer); size_t bufUsed = 0; foreach (ti; arguments) { // Ensure the required alignment bufUsed += ti.talign - 1; bufUsed -= (cast(size_t)argBuffer + bufUsed) & (ti.talign - 1); auto pos = bufUsed; // Align to next word boundary bufUsed += ti.tsize + size_t.sizeof - 1; bufUsed -= (cast(size_t)argBuffer + bufUsed) & (size_t.sizeof - 1); // Resize buffer if necessary while (bufUsed > bufLength) { bufLength *= 2; argBuffer = realloc(argBuffer, bufLength); } // Copy argument into buffer va_arg(ap, ti, argBuffer + pos); } auto argptr = argBuffer; void* skipArg(TypeInfo ti) { // Ensure the required alignment argptr += ti.talign - 1; argptr -= cast(size_t)argptr & (ti.talign - 1); auto p = argptr; // Align to next word boundary argptr += ti.tsize + size_t.sizeof - 1; argptr -= cast(size_t)argptr & (size_t.sizeof - 1); return p; } auto getArg(T)() { return *cast(T*)skipArg(typeid(T)); } TypeInfo ti; Mangle m; uint flags; int field_width; int precision; enum : uint { FLdash = 1, FLplus = 2, FLspace = 4, FLhash = 8, FLlngdbl = 0x20, FL0pad = 0x40, FLprecision = 0x80, } static TypeInfo skipCI(TypeInfo valti) { for (;;) { if (typeid(valti).name.length == 18 && typeid(valti).name[9..18] == "Invariant") valti = (cast(TypeInfo_Invariant)valti).base; else if (typeid(valti).name.length == 14 && typeid(valti).name[9..14] == "Const") valti = (cast(TypeInfo_Const)valti).base; else break; } return valti; } void formatArg(char fc) { bool vbit; ulong vnumber; char vchar; dchar vdchar; Object vobject; real vreal; creal vcreal; Mangle m2; int signed = 0; uint base = 10; int uc; char[ulong.sizeof * 8] tmpbuf; // long enough to print long in binary const(char)* prefix = ""; string s; void putstr(const char[] s) { //printf("putstr: s = %.*s, flags = x%x\n", s.length, s.ptr, flags); ptrdiff_t padding = field_width - (strlen(prefix) + toUCSindex(s, s.length)); ptrdiff_t prepad = 0; ptrdiff_t postpad = 0; if (padding > 0) { if (flags & FLdash) postpad = padding; else prepad = padding; } if (flags & FL0pad) { while (*prefix) putc(*prefix++); while (prepad--) putc('0'); } else { while (prepad--) putc(' '); while (*prefix) putc(*prefix++); } foreach (dchar c; s) putc(c); while (postpad--) putc(' '); } void putreal(real v) { //printf("putreal %Lg\n", vreal); switch (fc) { case 's': fc = 'g'; break; case 'f', 'F', 'e', 'E', 'g', 'G', 'a', 'A': break; default: //printf("fc = '%c'\n", fc); Lerror: throw new FormatException("incompatible format character for floating point type"); } version (DigitalMarsC) { uint sl; char[] fbuf = tmpbuf; if (!(flags & FLprecision)) precision = 6; while (1) { sl = fbuf.length; prefix = (*__pfloatfmt)(fc, flags | FLlngdbl, precision, &v, cast(char*)fbuf, &sl, field_width); if (sl != -1) break; sl = fbuf.length * 2; fbuf = (cast(char*)alloca(sl * char.sizeof))[0 .. sl]; } putstr(fbuf[0 .. sl]); } else { ptrdiff_t sl; char[] fbuf = tmpbuf; char[12] format; format[0] = '%'; int i = 1; if (flags & FLdash) format[i++] = '-'; if (flags & FLplus) format[i++] = '+'; if (flags & FLspace) format[i++] = ' '; if (flags & FLhash) format[i++] = '#'; if (flags & FL0pad) format[i++] = '0'; format[i + 0] = '*'; format[i + 1] = '.'; format[i + 2] = '*'; format[i + 3] = 'L'; format[i + 4] = fc; format[i + 5] = 0; if (!(flags & FLprecision)) precision = -1; while (1) { sl = fbuf.length; int n; version (CRuntime_Microsoft) { import std.math : isNaN, isInfinity; if (isNaN(v)) // snprintf writes 1.#QNAN n = snprintf(fbuf.ptr, sl, "nan"); else if (isInfinity(v)) // snprintf writes 1.#INF n = snprintf(fbuf.ptr, sl, v < 0 ? "-inf" : "inf"); else n = snprintf(fbuf.ptr, sl, format.ptr, field_width, precision, cast(double)v); } else n = snprintf(fbuf.ptr, sl, format.ptr, field_width, precision, v); //printf("format = '%s', n = %d\n", cast(char*)format, n); if (n >= 0 && n < sl) { sl = n; break; } if (n < 0) sl = sl * 2; else sl = n + 1; fbuf = (cast(char*)alloca(sl * char.sizeof))[0 .. sl]; } putstr(fbuf[0 .. sl]); } return; } static Mangle getMan(TypeInfo ti) { auto m = cast(Mangle)typeid(ti).name[9]; if (typeid(ti).name.length == 20 && typeid(ti).name[9..20] == "StaticArray") m = cast(Mangle)'G'; return m; } /* p = pointer to the first element in the array * len = number of elements in the array * valti = type of the elements */ void putArray(void* p, size_t len, TypeInfo valti) { //printf("\nputArray(len = %u), tsize = %u\n", len, valti.tsize); putc('['); valti = skipCI(valti); size_t tsize = valti.tsize; auto argptrSave = argptr; auto tiSave = ti; auto mSave = m; ti = valti; //printf("\n%.*s\n", typeid(valti).name.length, typeid(valti).name.ptr); m = getMan(valti); while (len--) { //doFormat(putc, (&valti)[0 .. 1], p); argptr = p; formatArg('s'); p += tsize; if (len > 0) putc(','); } m = mSave; ti = tiSave; argptr = argptrSave; putc(']'); } void putAArray(ubyte[long] vaa, TypeInfo valti, TypeInfo keyti) { putc('['); bool comma=false; auto argptrSave = argptr; auto tiSave = ti; auto mSave = m; valti = skipCI(valti); keyti = skipCI(keyti); foreach (ref fakevalue; vaa) { if (comma) putc(','); comma = true; void *pkey = &fakevalue; version (D_LP64) pkey -= (long.sizeof + 15) & ~(15); else pkey -= (long.sizeof + size_t.sizeof - 1) & ~(size_t.sizeof - 1); // the key comes before the value auto keysize = keyti.tsize; version (D_LP64) auto keysizet = (keysize + 15) & ~(15); else auto keysizet = (keysize + size_t.sizeof - 1) & ~(size_t.sizeof - 1); void* pvalue = pkey + keysizet; //doFormat(putc, (&keyti)[0..1], pkey); m = getMan(keyti); argptr = pkey; ti = keyti; formatArg('s'); putc(':'); //doFormat(putc, (&valti)[0..1], pvalue); m = getMan(valti); argptr = pvalue; ti = valti; formatArg('s'); } m = mSave; ti = tiSave; argptr = argptrSave; putc(']'); } //printf("formatArg(fc = '%c', m = '%c')\n", fc, m); int mi; switch (m) { case Mangle.Tbool: vbit = getArg!(bool)(); if (fc != 's') { vnumber = vbit; goto Lnumber; } putstr(vbit ? "true" : "false"); return; case Mangle.Tchar: vchar = getArg!(char)(); if (fc != 's') { vnumber = vchar; goto Lnumber; } L2: putstr((&vchar)[0 .. 1]); return; case Mangle.Twchar: vdchar = getArg!(wchar)(); goto L1; case Mangle.Tdchar: vdchar = getArg!(dchar)(); L1: if (fc != 's') { vnumber = vdchar; goto Lnumber; } if (vdchar <= 0x7F) { vchar = cast(char)vdchar; goto L2; } else { if (!isValidDchar(vdchar)) throw new UTFException("invalid dchar in format"); char[4] vbuf; putstr(vbuf[0 .. encode(vbuf, vdchar)]); } return; case Mangle.Tbyte: signed = 1; vnumber = getArg!(byte)(); goto Lnumber; case Mangle.Tubyte: vnumber = getArg!(ubyte)(); goto Lnumber; case Mangle.Tshort: signed = 1; vnumber = getArg!(short)(); goto Lnumber; case Mangle.Tushort: vnumber = getArg!(ushort)(); goto Lnumber; case Mangle.Tint: signed = 1; vnumber = getArg!(int)(); goto Lnumber; case Mangle.Tuint: Luint: vnumber = getArg!(uint)(); goto Lnumber; case Mangle.Tlong: signed = 1; vnumber = cast(ulong)getArg!(long)(); goto Lnumber; case Mangle.Tulong: Lulong: vnumber = getArg!(ulong)(); goto Lnumber; case Mangle.Tclass: vobject = getArg!(Object)(); if (vobject is null) s = "null"; else s = vobject.toString(); goto Lputstr; case Mangle.Tpointer: vnumber = cast(ulong)getArg!(void*)(); if (fc != 'x') uc = 1; flags |= FL0pad; if (!(flags & FLprecision)) { flags |= FLprecision; precision = (void*).sizeof; } base = 16; goto Lnumber; case Mangle.Tfloat: case Mangle.Tifloat: if (fc == 'x' || fc == 'X') goto Luint; vreal = getArg!(float)(); goto Lreal; case Mangle.Tdouble: case Mangle.Tidouble: if (fc == 'x' || fc == 'X') goto Lulong; vreal = getArg!(double)(); goto Lreal; case Mangle.Treal: case Mangle.Tireal: vreal = getArg!(real)(); goto Lreal; case Mangle.Tcfloat: vcreal = getArg!(cfloat)(); goto Lcomplex; case Mangle.Tcdouble: vcreal = getArg!(cdouble)(); goto Lcomplex; case Mangle.Tcreal: vcreal = getArg!(creal)(); goto Lcomplex; case Mangle.Tsarray: putArray(argptr, (cast(TypeInfo_StaticArray)ti).len, (cast(TypeInfo_StaticArray)ti).next); return; case Mangle.Tarray: mi = 10; if (typeid(ti).name.length == 14 && typeid(ti).name[9..14] == "Array") { // array of non-primitive types TypeInfo tn = (cast(TypeInfo_Array)ti).next; tn = skipCI(tn); switch (cast(Mangle)typeid(tn).name[9]) { case Mangle.Tchar: goto LarrayChar; case Mangle.Twchar: goto LarrayWchar; case Mangle.Tdchar: goto LarrayDchar; default: break; } void[] va = getArg!(void[])(); putArray(va.ptr, va.length, tn); return; } if (typeid(ti).name.length == 25 && typeid(ti).name[9..25] == "AssociativeArray") { // associative array ubyte[long] vaa = getArg!(ubyte[long])(); putAArray(vaa, (cast(TypeInfo_AssociativeArray)ti).next, (cast(TypeInfo_AssociativeArray)ti).key); return; } while (1) { m2 = cast(Mangle)typeid(ti).name[mi]; switch (m2) { case Mangle.Tchar: LarrayChar: s = getArg!(string)(); goto Lputstr; case Mangle.Twchar: LarrayWchar: wchar[] sw = getArg!(wchar[])(); s = toUTF8(sw); goto Lputstr; case Mangle.Tdchar: LarrayDchar: s = toUTF8(getArg!(dstring)()); Lputstr: if (fc != 's') throw new FormatException("string"); if (flags & FLprecision && precision < s.length) s = s[0 .. precision]; putstr(s); break; case Mangle.Tconst: case Mangle.Timmutable: mi++; continue; default: TypeInfo ti2 = primitiveTypeInfo(m2); if (!ti2) goto Lerror; void[] va = getArg!(void[])(); putArray(va.ptr, va.length, ti2); } return; } assert(0); case Mangle.Tenum: ti = (cast(TypeInfo_Enum)ti).base; m = cast(Mangle)typeid(ti).name[9]; formatArg(fc); return; case Mangle.Tstruct: { TypeInfo_Struct tis = cast(TypeInfo_Struct)ti; if (tis.xtoString is null) throw new FormatException("Can't convert " ~ tis.toString() ~ " to string: \"string toString()\" not defined"); s = tis.xtoString(skipArg(tis)); goto Lputstr; } default: goto Lerror; } Lnumber: switch (fc) { case 's': case 'd': if (signed) { if (cast(long)vnumber < 0) { prefix = "-"; vnumber = -vnumber; } else if (flags & FLplus) prefix = "+"; else if (flags & FLspace) prefix = " "; } break; case 'b': signed = 0; base = 2; break; case 'o': signed = 0; base = 8; break; case 'X': uc = 1; if (flags & FLhash && vnumber) prefix = "0X"; signed = 0; base = 16; break; case 'x': if (flags & FLhash && vnumber) prefix = "0x"; signed = 0; base = 16; break; default: goto Lerror; } if (!signed) { switch (m) { case Mangle.Tbyte: vnumber &= 0xFF; break; case Mangle.Tshort: vnumber &= 0xFFFF; break; case Mangle.Tint: vnumber &= 0xFFFFFFFF; break; default: break; } } if (flags & FLprecision && fc != 'p') flags &= ~FL0pad; if (vnumber < base) { if (vnumber == 0 && precision == 0 && flags & FLprecision && !(fc == 'o' && flags & FLhash)) { putstr(null); return; } if (precision == 0 || !(flags & FLprecision)) { vchar = cast(char)('0' + vnumber); if (vnumber < 10) vchar = cast(char)('0' + vnumber); else vchar = cast(char)((uc ? 'A' - 10 : 'a' - 10) + vnumber); goto L2; } } { ptrdiff_t n = tmpbuf.length; char c; int hexoffset = uc ? ('A' - ('9' + 1)) : ('a' - ('9' + 1)); while (vnumber) { c = cast(char)((vnumber % base) + '0'); if (c > '9') c += hexoffset; vnumber /= base; tmpbuf[--n] = c; } if (tmpbuf.length - n < precision && precision < tmpbuf.length) { ptrdiff_t m = tmpbuf.length - precision; tmpbuf[m .. n] = '0'; n = m; } else if (flags & FLhash && fc == 'o') prefix = "0"; putstr(tmpbuf[n .. tmpbuf.length]); return; } Lreal: putreal(vreal); return; Lcomplex: putreal(vcreal.re); if (vcreal.im >= 0) { putc('+'); } putreal(vcreal.im); putc('i'); return; Lerror: throw new FormatException("formatArg"); } for (int j = 0; j < arguments.length; ) { ti = arguments[j++]; //printf("arg[%d]: '%.*s' %d\n", j, typeid(ti).name.length, typeid(ti).name.ptr, typeid(ti).name.length); //ti.print(); flags = 0; precision = 0; field_width = 0; ti = skipCI(ti); int mi = 9; do { if (typeid(ti).name.length <= mi) goto Lerror; m = cast(Mangle)typeid(ti).name[mi++]; } while (m == Mangle.Tconst || m == Mangle.Timmutable); if (m == Mangle.Tarray) { if (typeid(ti).name.length == 14 && typeid(ti).name[9..14] == "Array") { TypeInfo tn = (cast(TypeInfo_Array)ti).next; tn = skipCI(tn); switch (cast(Mangle)typeid(tn).name[9]) { case Mangle.Tchar: case Mangle.Twchar: case Mangle.Tdchar: ti = tn; mi = 9; break; default: break; } } L1: Mangle m2 = cast(Mangle)typeid(ti).name[mi]; string fmt; // format string wstring wfmt; dstring dfmt; /* For performance reasons, this code takes advantage of the * fact that most format strings will be ASCII, and that the * format specifiers are always ASCII. This means we only need * to deal with UTF in a couple of isolated spots. */ switch (m2) { case Mangle.Tchar: fmt = getArg!(string)(); break; case Mangle.Twchar: wfmt = getArg!(wstring)(); fmt = toUTF8(wfmt); break; case Mangle.Tdchar: dfmt = getArg!(dstring)(); fmt = toUTF8(dfmt); break; case Mangle.Tconst: case Mangle.Timmutable: mi++; goto L1; default: formatArg('s'); continue; } for (size_t i = 0; i < fmt.length; ) { dchar c = fmt[i++]; dchar getFmtChar() { // Valid format specifier characters will never be UTF if (i == fmt.length) throw new FormatException("invalid specifier"); return fmt[i++]; } int getFmtInt() { int n; while (1) { n = n * 10 + (c - '0'); if (n < 0) // overflow throw new FormatException("int overflow"); c = getFmtChar(); if (c < '0' || c > '9') break; } return n; } int getFmtStar() { Mangle m; TypeInfo ti; if (j == arguments.length) throw new FormatException("too few arguments"); ti = arguments[j++]; m = cast(Mangle)typeid(ti).name[9]; if (m != Mangle.Tint) throw new FormatException("int argument expected"); return getArg!(int)(); } if (c != '%') { if (c > 0x7F) // if UTF sequence { i--; // back up and decode UTF sequence import std.utf : decode; c = decode(fmt, i); } Lputc: putc(c); continue; } // Get flags {-+ #} flags = 0; while (1) { c = getFmtChar(); switch (c) { case '-': flags |= FLdash; continue; case '+': flags |= FLplus; continue; case ' ': flags |= FLspace; continue; case '#': flags |= FLhash; continue; case '0': flags |= FL0pad; continue; case '%': if (flags == 0) goto Lputc; break; default: break; } break; } // Get field width field_width = 0; if (c == '*') { field_width = getFmtStar(); if (field_width < 0) { flags |= FLdash; field_width = -field_width; } c = getFmtChar(); } else if (c >= '0' && c <= '9') field_width = getFmtInt(); if (flags & FLplus) flags &= ~FLspace; if (flags & FLdash) flags &= ~FL0pad; // Get precision precision = 0; if (c == '.') { flags |= FLprecision; //flags &= ~FL0pad; c = getFmtChar(); if (c == '*') { precision = getFmtStar(); if (precision < 0) { precision = 0; flags &= ~FLprecision; } c = getFmtChar(); } else if (c >= '0' && c <= '9') precision = getFmtInt(); } if (j == arguments.length) goto Lerror; ti = arguments[j++]; ti = skipCI(ti); mi = 9; do { m = cast(Mangle)typeid(ti).name[mi++]; } while (m == Mangle.Tconst || m == Mangle.Timmutable); if (c > 0x7F) // if UTF sequence goto Lerror; // format specifiers can't be UTF formatArg(cast(char)c); } } else { formatArg('s'); } } return; Lerror: throw new FormatException(); } private bool needToSwapEndianess(Char)(ref FormatSpec!Char f) { import std.system : endian, Endian; return endian == Endian.littleEndian && f.flPlus || endian == Endian.bigEndian && f.flDash; } unittest { string res; void putc(dchar c) { res ~= c; } void myPrint(...) { undead.doformat.doFormat(&putc, _arguments, _argptr); } myPrint("The answer is %s:", 27, 6); assert(res == "The answer is 27:6"); } undeaD-1.0.10/src/undead/internal/000077500000000000000000000000001346374113600166445ustar00rootroot00000000000000undeaD-1.0.10/src/undead/internal/file.d000066400000000000000000000012341346374113600177300ustar00rootroot00000000000000// Written in the D programming language module undead.internal.file; // Copied from std.file. undead doesn't have access to it, but some modules // in undead used std.file.deleteme when they were in Phobos, so this gives // them access to a version of it. public @property string deleteme() @safe { import std.conv : to; import std.file : tempDir; import std.path : buildPath; import std.process : thisProcessID; static _deleteme = "deleteme.dmd.unittest.pid"; static _first = true; if(_first) { _deleteme = buildPath(tempDir(), _deleteme) ~ to!string(thisProcessID); _first = false; } return _deleteme; } undeaD-1.0.10/src/undead/metastrings.d000066400000000000000000000113061346374113600175360ustar00rootroot00000000000000// Written in the D programming language. /** Templates with which to do compile-time manipulation of strings. Macros: WIKI = Phobos/StdMetastrings Copyright: Copyright Digital Mars 2007 - 2009. License: Boost License 1.0. Authors: $(WEB digitalmars.com, Walter Bright), Don Clugston Source: $(PHOBOSSRC std/_metastrings.d) */ /* Copyright Digital Mars 2007 - 2009. Distributed under the Boost Software License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) */ module undead.metastrings; /** Formats constants into a string at compile time. Analogous to $(XREF string,format). Parameters: A = tuple of constants, which can be strings, characters, or integral values. Formats: * The formats supported are %s for strings, and %% * for the % character. Example: --- import std.metastrings; import std.stdio; void main() { string s = Format!("Arg %s = %s", "foo", 27); writefln(s); // "Arg foo = 27" } * --- */ template Format(A...) { static if (A.length == 0) enum Format = ""; else static if (is(typeof(A[0]) : const(char)[])) enum Format = FormatString!(A[0], A[1..$]); else enum Format = toStringNow!(A[0]) ~ Format!(A[1..$]); } template FormatString(const(char)[] F, A...) { static if (F.length == 0) enum FormatString = Format!(A); else static if (F.length == 1) enum FormatString = F[0] ~ Format!(A); else static if (F[0..2] == "%s") enum FormatString = toStringNow!(A[0]) ~ FormatString!(F[2..$],A[1..$]); else static if (F[0..2] == "%%") enum FormatString = "%" ~ FormatString!(F[2..$],A); else { static assert(F[0] != '%', "unrecognized format %" ~ F[1]); enum FormatString = F[0] ~ FormatString!(F[1..$],A); } } unittest { auto s = Format!("hel%slo", "world", -138, 'c', true); assert(s == "helworldlo-138ctrue", "[" ~ s ~ "]"); } /** * Convert constant argument to a string. */ template toStringNow(ulong v) { static if (v < 10) enum toStringNow = "" ~ cast(char)(v + '0'); else enum toStringNow = toStringNow!(v / 10) ~ toStringNow!(v % 10); } unittest { static assert(toStringNow!(1uL << 62) == "4611686018427387904"); } /// ditto template toStringNow(long v) { static if (v < 0) enum toStringNow = "-" ~ toStringNow!(cast(ulong) -v); else enum toStringNow = toStringNow!(cast(ulong) v); } unittest { static assert(toStringNow!(0x100000000) == "4294967296"); static assert(toStringNow!(-138L) == "-138"); } /// ditto template toStringNow(uint U) { enum toStringNow = toStringNow!(cast(ulong)U); } /// ditto template toStringNow(int I) { enum toStringNow = toStringNow!(cast(long)I); } /// ditto template toStringNow(bool B) { enum toStringNow = B ? "true" : "false"; } /// ditto template toStringNow(string S) { enum toStringNow = S; } /// ditto template toStringNow(char C) { enum toStringNow = "" ~ C; } /******** * Parse unsigned integer literal from the start of string s. * returns: * .value = the integer literal as a string, * .rest = the string following the integer literal * Otherwise: * .value = null, * .rest = s */ template parseUinteger(const(char)[] s) { static if (s.length == 0) { enum value = ""; enum rest = ""; } else static if (s[0] >= '0' && s[0] <= '9') { enum value = s[0] ~ parseUinteger!(s[1..$]).value; enum rest = parseUinteger!(s[1..$]).rest; } else { enum value = ""; enum rest = s; } } /******** Parse integer literal optionally preceded by $(D '-') from the start of string $(D s). Returns: .value = the integer literal as a string, .rest = the string following the integer literal Otherwise: .value = null, .rest = s */ template parseInteger(const(char)[] s) { static if (s.length == 0) { enum value = ""; enum rest = ""; } else static if (s[0] >= '0' && s[0] <= '9') { enum value = s[0] ~ parseUinteger!(s[1..$]).value; enum rest = parseUinteger!(s[1..$]).rest; } else static if (s.length >= 2 && s[0] == '-' && s[1] >= '0' && s[1] <= '9') { enum value = s[0..2] ~ parseUinteger!(s[2..$]).value; enum rest = parseUinteger!(s[2..$]).rest; } else { enum value = ""; enum rest = s; } } unittest { assert(parseUinteger!("1234abc").value == "1234"); assert(parseUinteger!("1234abc").rest == "abc"); assert(parseInteger!("-1234abc").value == "-1234"); assert(parseInteger!("-1234abc").rest == "abc"); } undeaD-1.0.10/src/undead/regexp.d000066400000000000000000003036721346374113600165020ustar00rootroot00000000000000// Written in the D programming language. // Regular Expressions. /** * $(RED Deprecated. It will be removed in February 2012. * Please use $(LINK2 std_regex.html, std.regex) instead.) * * $(LINK2 http://www.digitalmars.com/ctg/regular.html, Regular * expressions) are a powerful method of string pattern matching. The * regular expression language used in this library is the same as * that commonly used, however, some of the very advanced forms may * behave slightly differently. The standard observed is the $(WEB * www.ecma-international.org/publications/standards/Ecma-262.htm, * ECMA standard) for regular expressions. * * undead.regexp is designed to work only with valid UTF strings as input. * To validate untrusted input, use std.utf.validate(). * * In the following guide, $(I pattern)[] refers to a * $(LINK2 http://www.digitalmars.com/ctg/regular.html, regular expression). * The $(I attributes)[] refers to * a string controlling the interpretation * of the regular expression. * It consists of a sequence of one or more * of the following characters: * * * * $(TR $(TH Attribute) $(TH Action)) * * $(TD $(B g)) * $(TD global; repeat over the whole input string) * * * $(TD $(B i)) * $(TD case insensitive) * * * $(TD $(B m)) * $(TD treat as multiple lines separated by newlines) * *
Attribute Characters
* * The $(I format)[] string has the formatting characters: * * * * $(TR $(TH Format) $(TH Replaced With)) * $(TR * $(TD $(B $$)) $(TD $) * ) * $(TR * $(TD $(B $&)) $(TD The matched substring.) * ) * $(TR * $(TD $(B $`)) $(TD The portion of string that precedes the matched substring.) * ) * $(TR * $(TD $(B $')) $(TD The portion of string that follows the matched substring.) * ) * $(TR * $(TD $(B $(DOLLAR))$(I n)) $(TD The $(I n)th capture, where $(I n) * is a single digit 1-9 * and $$(I n) is not followed by a decimal digit.) * ) * $(TR * $(TD $(B $(DOLLAR))$(I nn)) $(TD The $(I nn)th capture, where $(I nn) * is a two-digit decimal * number 01-99. * If $(I nn)th capture is undefined or more than the number * of parenthesized subexpressions, use the empty * string instead.) * ) *
Formatting Characters
* * Any other $ are left as is. * * References: * $(LINK2 http://en.wikipedia.org/wiki/Regular_expressions, Wikipedia) * Macros: * WIKI = StdRegexp * DOLLAR = $ * * Copyright: Copyright Digital Mars 2000 - 2011. * License: Boost License 1.0. * Authors: $(WEB digitalmars.com, Walter Bright) * Source: $(PHOBOSSRC std/_regexp.d) */ /* Copyright Digital Mars 2000 - 2011. * Distributed under the Boost Software License, Version 1.0. * (See accompanying file LICENSE_1_0.txt or copy at * http://www.boost.org/LICENSE_1_0.txt) */ /* Escape sequences: \nnn starts out a 1, 2 or 3 digit octal sequence, where n is an octal digit. If nnn is larger than 0377, then the 3rd digit is not part of the sequence and is not consumed. For maximal portability, use exactly 3 digits. \xXX starts out a 1 or 2 digit hex sequence. X is a hex character. If the first character after the \x is not a hex character, the value of the sequence is 'x' and the XX are not consumed. For maximal portability, use exactly 2 digits. \uUUUU is a unicode sequence. There are exactly 4 hex characters after the \u, if any are not, then the value of the sequence is 'u', and the UUUU are not consumed. Character classes: [a-b], where a is greater than b, will produce an error. References: http://www.unicode.org/unicode/reports/tr18/ */ module undead.regexp; //pragma(msg, "Notice: As of Phobos 2.055, std.regexp has been deprecated. " ~ // "It will be removed in February 2012. Please use std.regex instead."); //debug = regexp; // uncomment to turn on debugging printf's private { import core.stdc.stdio; import core.stdc.stdlib; import core.stdc.string; import std.array; import std.stdio; import std.string; import std.ascii; import std.outbuffer; import std.bitmanip; import std.utf; import std.algorithm; import std.array; import std.traits; } //deprecated: /** Regular expression to extract an _email address. * References: * $(LINK2 http://www.regular-expressions.info/email.html, How to Find or Validate an Email Address)$(BR) * $(LINK2 http://tools.ietf.org/html/rfc2822#section-3.4.1, RFC 2822 Internet Message Format) */ string email = r"[a-zA-Z]([.]?([[a-zA-Z0-9_]-]+)*)?@([[a-zA-Z0-9_]\-_]+\.)+[a-zA-Z]{2,6}"; /** Regular expression to extract a _url */ string url = r"(([h|H][t|T]|[f|F])[t|T][p|P]([s|S]?)\:\/\/|~/|/)?([\w]+:\w+@)?(([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?)?((/?\w+/)+|/?)(\w+\.[\w]{3,4})?([,]\w+)*((\?\w+=\w+)?(&\w+=\w+)*([,]\w*)*)?"; /************************************ * One of these gets thrown on compilation errors */ class RegExpException : Exception { this(string msg) { super(msg); } } struct regmatch_t { sizediff_t rm_so; // index of start of match sizediff_t rm_eo; // index past end of match } private alias char rchar; // so we can make a wchar version /****************************************************** * Search string for matches with regular expression * pattern with attributes. * Replace each match with string generated from format. * Params: * s = String to search. * pattern = Regular expression pattern. * format = Replacement string format. * attributes = Regular expression attributes. * Returns: * the resulting string * Example: * Replace the letters 'a' with the letters 'ZZ'. * --- * s = "Strap a rocket engine on a chicken." * sub(s, "a", "ZZ") // result: StrZZp a rocket engine on a chicken. * sub(s, "a", "ZZ", "g") // result: StrZZp ZZ rocket engine on ZZ chicken. * --- * The replacement format can reference the matches using * the $&, $$, $', $`, $0 .. $99 notation: * --- * sub(s, "[ar]", "[$&]", "g") // result: St[r][a]p [a] [r]ocket engine on [a] chi * --- */ string sub(string s, string pattern, string format, string attributes = null) { auto r = new RegExp(pattern, attributes); auto result = r.replace(s, format); r.destroy(); return result; } unittest { debug(regexp) printf("regexp.sub.unittest\n"); string r = sub("hello", "ll", "ss"); assert(r == "hesso"); } /******************************************************* * Search string for matches with regular expression * pattern with attributes. * Pass each match to delegate dg. * Replace each match with the return value from dg. * Params: * s = String to search. * pattern = Regular expression pattern. * dg = Delegate * attributes = Regular expression attributes. * Returns: the resulting string. * Example: * Capitalize the letters 'a' and 'r': * --- * s = "Strap a rocket engine on a chicken."; * sub(s, "[ar]", * delegate char[] (RegExp m) * { * return toUpper(m[0]); * }, * "g"); // result: StRAp A Rocket engine on A chicken. * --- */ string sub(string s, string pattern, string delegate(RegExp) dg, string attributes = null) { auto r = new RegExp(pattern, attributes); string result = s; size_t lastindex = 0; size_t offset = 0; while (r.test(s, lastindex)) { auto so = r.pmatch[0].rm_so; auto eo = r.pmatch[0].rm_eo; string replacement = dg(r); // Optimize by using std.string.replace if possible - Dave Fladebo string slice = result[offset + so .. offset + eo]; if (r.attributes & RegExp.REA.global && // global, so replace all !(r.attributes & RegExp.REA.ignoreCase) && // not ignoring case !(r.attributes & RegExp.REA.multiline) && // not multiline pattern == slice) // simple pattern (exact match, no special characters) { debug(regexp) printf("result: %.*s, pattern: %.*s, slice: %.*s, replacement: %.*s\n", result.length, result.ptr, pattern.length, pattern.ptr, slice.length, slice.ptr, replacement.length, replacement.ptr); result = replace(result,slice,replacement); break; } result = replaceSlice(result, result[offset + so .. offset + eo], replacement); if (r.attributes & RegExp.REA.global) { offset += replacement.length - (eo - so); if (lastindex == eo) lastindex++; // always consume some source else lastindex = eo; } else break; } r.destroy(); return result; } unittest { debug(regexp) printf("regexp.sub.unittest\n"); string foo(RegExp r) { return "ss"; } auto r = sub("hello", "ll", delegate string(RegExp r) { return "ss"; }); assert(r == "hesso"); r = sub("hello", "l", delegate string(RegExp r) { return "l"; }, "g"); assert(r == "hello"); auto s = sub("Strap a rocket engine on a chicken.", "[ar]", delegate string (RegExp m) { return std.string.toUpper(m[0]); }, "g"); assert(s == "StRAp A Rocket engine on A chicken."); } /************************************************* * Search $(D_PARAM s[]) for first match with $(D_PARAM pattern). * Params: * s = String to search. * pattern = Regular expression pattern. * Returns: * index into s[] of match if found, -1 if no match. * Example: * --- * auto s = "abcabcabab"; * find(s, RegExp("b")); // match, returns 1 * find(s, RegExp("f")); // no match, returns -1 * --- */ sizediff_t find(string s, RegExp pattern) { return pattern.test(s) ? pattern.pmatch[0].rm_so : -1; } unittest { debug(regexp) printf("regexp.find.unittest\n"); auto i = find("xabcy", RegExp("abc")); assert(i == 1); i = find("cba", RegExp("abc")); assert(i == -1); } /** Returns: Same as $(D_PARAM find(s, RegExp(pattern, attributes))). WARNING: This function is scheduled for deprecation due to unnecessary ambiguity with the homonym function in std.string. Instead of $(D_PARAM undead.regexp.find(s, p, a)), you may want to use $(D_PARAM find(s, RegExp(p, a))). */ sizediff_t find(string s, string pattern, string attributes = null) { auto r = new RegExp(pattern, attributes); scope(exit) r.destroy(); return r.test(s) ? r.pmatch[0].rm_so : -1; } unittest { debug(regexp) printf("regexp.find.unittest\n"); auto i = find("xabcy", "abc"); assert(i == 1); i = find("cba", "abc"); assert(i == -1); } /************************************************* * Search $(D_PARAM s[]) for last match with $(D_PARAM pattern). * Params: * s = String to search. * pattern = Regular expression pattern. * Returns: * index into s[] of match if found, -1 if no match. * Example: * --- * auto s = "abcabcabab"; * rfind(s, RegExp("b")); // match, returns 9 * rfind(s, RegExp("f")); // no match, returns -1 * --- */ sizediff_t rfind(string s, RegExp pattern) { sizediff_t i = -1, lastindex = 0; while (pattern.test(s, lastindex)) { auto eo = pattern.pmatch[0].rm_eo; i = pattern.pmatch[0].rm_so; if (lastindex == eo) lastindex++; // always consume some source else lastindex = eo; } return i; } unittest { sizediff_t i; debug(regexp) printf("regexp.rfind.unittest\n"); i = rfind("abcdefcdef", RegExp("c")); assert(i == 6); i = rfind("abcdefcdef", RegExp("cd")); assert(i == 6); i = rfind("abcdefcdef", RegExp("x")); assert(i == -1); i = rfind("abcdefcdef", RegExp("xy")); assert(i == -1); i = rfind("abcdefcdef", RegExp("")); assert(i == 10); } /************************************************* Returns: Same as $(D_PARAM rfind(s, RegExp(pattern, attributes))). WARNING: This function is scheduled for deprecation due to unnecessary ambiguity with the homonym function in std.string. Instead of $(D_PARAM undead.regexp.rfind(s, p, a)), you may want to use $(D_PARAM rfind(s, RegExp(p, a))). */ sizediff_t rfind(string s, string pattern, string attributes = null) { typeof(return) i = -1, lastindex = 0; auto r = new RegExp(pattern, attributes); while (r.test(s, lastindex)) { auto eo = r.pmatch[0].rm_eo; i = r.pmatch[0].rm_so; if (lastindex == eo) lastindex++; // always consume some source else lastindex = eo; } r.destroy(); return i; } unittest { sizediff_t i; debug(regexp) printf("regexp.rfind.unittest\n"); i = rfind("abcdefcdef", "c"); assert(i == 6); i = rfind("abcdefcdef", "cd"); assert(i == 6); i = rfind("abcdefcdef", "x"); assert(i == -1); i = rfind("abcdefcdef", "xy"); assert(i == -1); i = rfind("abcdefcdef", ""); assert(i == 10); } /******************************************** * Split s[] into an array of strings, using the regular * expression $(D_PARAM pattern) as the separator. * Params: * s = String to search. * pattern = Regular expression pattern. * Returns: * array of slices into s[] * Example: * --- * foreach (s; split("abcabcabab", RegExp("C.", "i"))) * { * writefln("s = '%s'", s); * } * // Prints: * // s = 'ab' * // s = 'b' * // s = 'bab' * --- */ string[] split(string s, RegExp pattern) { return pattern.split(s); } unittest { debug(regexp) printf("regexp.split.unittest()\n"); string[] result; result = split("ab", RegExp("a*")); assert(result.length == 2); assert(result[0] == ""); assert(result[1] == "b"); foreach (i, s; split("abcabcabab", RegExp("C.", "i"))) { //writefln("s[%d] = '%s'", i, s); if (i == 0) assert(s == "ab"); else if (i == 1) assert(s == "b"); else if (i == 2) assert(s == "bab"); else assert(0); } } /******************************************** Returns: Same as $(D_PARAM split(s, RegExp(pattern, attributes))). WARNING: This function is scheduled for deprecation due to unnecessary ambiguity with the homonym function in std.string. Instead of $(D_PARAM undead.regexp.split(s, p, a)), you may want to use $(D_PARAM split(s, RegExp(p, a))). */ string[] split(string s, string pattern, string attributes = null) { auto r = new RegExp(pattern, attributes); auto result = r.split(s); r.destroy(); return result; } unittest { debug(regexp) printf("regexp.split.unittest()\n"); string[] result; result = split("ab", "a*"); assert(result.length == 2); assert(result[0] == ""); assert(result[1] == "b"); foreach (i, s; split("abcabcabab", "C.", "i")) { //writefln("s[%d] = '%s'", i, s.length, s.ptr); if (i == 0) assert(s == "ab"); else if (i == 1) assert(s == "b"); else if (i == 2) assert(s == "bab"); else assert(0); } } /**************************************************** * Search s[] for first match with pattern[] with attributes[]. * Params: * s = String to search. * pattern = Regular expression pattern. * attributes = Regular expression attributes. * Returns: * corresponding RegExp if found, null if not. * Example: * --- * import std.stdio; * import undead.regexp; * * void main() * { * if (auto m = undead.regexp.search("abcdef", "c")) * { * writefln("%s[%s]%s", m.pre, m[0], m.post); * } * } * // Prints: * // ab[c]def * --- */ RegExp search(string s, string pattern, string attributes = null) { auto r = new RegExp(pattern, attributes); if (!r.test(s)) { r.destroy(); r = null; assert(r is null); } return r; } unittest { debug(regexp) printf("regexp.string.unittest()\n"); if (auto m = undead.regexp.search("abcdef", "c()")) { auto result = std.string.format("%s[%s]%s", m.pre, m[0], m.post); assert(result == "ab[c]def"); assert(m[1] == null); assert(m[2] == null); } else assert(0); if (auto n = undead.regexp.search("abcdef", "g")) { assert(0); } } /* ********************************* RegExp ******************************** */ /***************************** * RegExp is a class to handle regular expressions. * * It is the core foundation for adding powerful string pattern matching * capabilities to programs like grep, text editors, awk, sed, etc. */ class RegExp { /***** * Construct a RegExp object. Compile pattern * with attributes into * an internal form for fast execution. * Params: * pattern = regular expression * attributes = _attributes * Throws: RegExpException if there are any compilation errors. * Example: * Declare two variables and assign to them a RegExp object: * --- * auto r = new RegExp("pattern"); * auto s = new RegExp(r"p[1-5]\s*"); * --- */ public this(string pattern, string attributes = null) { pmatch = (&gmatch)[0 .. 1]; compile(pattern, attributes); } /***** * Generate instance of RegExp. * Params: * pattern = regular expression * attributes = _attributes * Throws: RegExpException if there are any compilation errors. * Example: * Declare two variables and assign to them a RegExp object: * --- * auto r = RegExp("pattern"); * auto s = RegExp(r"p[1-5]\s*"); * --- */ public static RegExp opCall(string pattern, string attributes = null) { return new RegExp(pattern, attributes); } unittest { debug(regexp) printf("regexp.opCall.unittest()\n"); auto r1 = RegExp("hello", "m"); string msg; try { auto r2 = RegExp("hello", "q"); assert(0); } catch (RegExpException ree) { msg = ree.toString(); //writefln("message: %s", ree); } assert(std.algorithm.countUntil(msg, "unrecognized attribute") >= 0); } /************************************ * Set up for start of foreach loop. * Returns: * search() returns instance of RegExp set up to _search string[]. * Example: * --- * import std.stdio; * import undead.regexp; * * void main() * { * foreach(m; RegExp("ab").search("abcabcabab")) * { * writefln("%s[%s]%s", m.pre, m[0], m.post); * } * } * // Prints: * // [ab]cabcabab * // abc[ab]cabab * // abcabc[ab]ab * // abcabcab[ab] * --- */ public RegExp search(string string) { input = string; pmatch[0].rm_eo = 0; return this; } /** ditto */ public int opApply(scope int delegate(ref RegExp) dg) { int result; RegExp r = this; while (test()) { result = dg(r); if (result) break; } return result; } unittest { debug(regexp) printf("regexp.search.unittest()\n"); int i; foreach(m; RegExp("ab").search("abcabcabab")) { auto s = std.string.format("%s[%s]%s", m.pre, m[0], m.post); if (i == 0) assert(s == "[ab]cabcabab"); else if (i == 1) assert(s == "abc[ab]cabab"); else if (i == 2) assert(s == "abcabc[ab]ab"); else if (i == 3) assert(s == "abcabcab[ab]"); else assert(0); i++; } } /****************** * Retrieve match n. * * n==0 means the matched substring, n>0 means the * n'th parenthesized subexpression. * if n is larger than the number of parenthesized subexpressions, * null is returned. */ public string opIndex(size_t n) { if (n >= pmatch.length) return null; else { auto rm_so = pmatch[n].rm_so; auto rm_eo = pmatch[n].rm_eo; if (rm_so == rm_eo) return null; return input[rm_so .. rm_eo]; } } /** Same as $(D_PARAM opIndex(n)). WARNING: Scheduled for deprecation due to confusion with overloaded $(D_PARAM match(string)). Instead of $(D_PARAM regex.match(n)) you may want to use $(D_PARAM regex[n]). */ public string match(size_t n) { return this[n]; } /******************* * Return the slice of the input that precedes the matched substring. */ public @property string pre() { return input[0 .. pmatch[0].rm_so]; } /******************* * Return the slice of the input that follows the matched substring. */ public @property string post() { return input[pmatch[0].rm_eo .. $]; } uint re_nsub; // number of parenthesized subexpression matches regmatch_t[] pmatch; // array [re_nsub + 1] string input; // the string to search // per instance: string pattern; // source text of the regular expression string flags; // source text of the attributes parameter int errors; uint attributes; enum REA { global = 1, // has the g attribute ignoreCase = 2, // has the i attribute multiline = 4, // if treat as multiple lines separated // by newlines, or as a single line dotmatchlf = 8, // if . matches \n } private: size_t src; // current source index in input[] size_t src_start; // starting index for match in input[] size_t p; // position of parser in pattern[] regmatch_t gmatch; // match for the entire regular expression // (serves as storage for pmatch[0]) const(ubyte)[] program; // pattern[] compiled into regular expression program OutBuffer buf; /******************************************/ // Opcodes enum : ubyte { REend, // end of program REchar, // single character REichar, // single character, case insensitive REdchar, // single UCS character REidchar, // single wide character, case insensitive REanychar, // any character REanystar, // ".*" REstring, // string of characters REistring, // string of characters, case insensitive REtestbit, // any in bitmap, non-consuming REbit, // any in the bit map REnotbit, // any not in the bit map RErange, // any in the string REnotrange, // any not in the string REor, // a | b REplus, // 1 or more REstar, // 0 or more REquest, // 0 or 1 REnm, // n..m REnmq, // n..m, non-greedy version REbol, // beginning of line REeol, // end of line REparen, // parenthesized subexpression REgoto, // goto offset REwordboundary, REnotwordboundary, REdigit, REnotdigit, REspace, REnotspace, REword, REnotword, REbackref, }; // BUG: should this include '$'? private int isword(dchar c) { return isAlphaNum(c) || c == '_'; } private uint inf = ~0u; /* ******************************** * Throws RegExpException on error */ public void compile(string pattern, string attributes) { //printf("RegExp.compile('%.*s', '%.*s')\n", pattern.length, pattern.ptr, attributes.length, attributes.ptr); this.attributes = 0; foreach (rchar c; attributes) { REA att; switch (c) { case 'g': att = REA.global; break; case 'i': att = REA.ignoreCase; break; case 'm': att = REA.multiline; break; default: error("unrecognized attribute"); return; } if (this.attributes & att) { error("redundant attribute"); return; } this.attributes |= att; } input = null; this.pattern = pattern; this.flags = attributes; uint oldre_nsub = re_nsub; re_nsub = 0; errors = 0; buf = new OutBuffer(); buf.reserve(pattern.length * 8); p = 0; parseRegexp(); if (p < pattern.length) { error("unmatched ')'"); } // @@@ SKIPPING OPTIMIZATION SOLVES BUG 941 @@@ //optimize(); program = buf.data; buf.data = null; buf.destroy(); if (re_nsub > oldre_nsub) { if (pmatch.ptr is &gmatch) pmatch = null; pmatch.length = re_nsub + 1; } pmatch[0].rm_so = 0; pmatch[0].rm_eo = 0; } /******************************************** * Split s[] into an array of strings, using the regular * expression as the separator. * Returns: * array of slices into s[] */ public string[] split(string s) { debug(regexp) printf("regexp.split()\n"); string[] result; if (s.length) { sizediff_t p, q; for (q = p; q != s.length;) { if (test(s, q)) { q = pmatch[0].rm_so; auto e = pmatch[0].rm_eo; if (e != p) { result ~= s[p .. q]; for (size_t i = 1; i < pmatch.length; i++) { auto so = pmatch[i].rm_so; auto eo = pmatch[i].rm_eo; if (so == eo) { so = 0; // -1 gives array bounds error eo = 0; } result ~= s[so .. eo]; } q = p = e; continue; } } q++; } result ~= s[p .. s.length]; } else if (!test(s)) result ~= s; return result; } unittest { debug(regexp) printf("regexp.split.unittest()\n"); auto r = new RegExp("a*?", null); string[] result; string j; int i; result = r.split("ab"); assert(result.length == 2); i = (result[0] == "a"); assert(i == 1); i = (result[1] == "b"); assert(i == 1); r = new RegExp("a*", null); result = r.split("ab"); assert(result.length == 2); i = (result[0] == ""); assert(i == 1); i = (result[1] == "b"); assert(i == 1); r = new RegExp("<(\\/)?([^<>]+)>", null); result = r.split("afontbarhello"); debug(regexp) { for (i = 0; i < result.length; i++) printf("result[%d] = '%.*s'\n", i, result[i].length, result[i].ptr); } j = join(result, ","); //printf("j = '%.*s'\n", j.length, j.ptr); i = (j == "a,,b,font,/,b,bar,,TAG,hello,/,TAG,"); assert(i == 1); r = new RegExp("a[bc]", null); result = r.match("123ab"); j = join(result, ","); i = (j == "ab"); assert(i == 1); result = r.match("ac"); j = join(result, ","); i = (j == "ac"); assert(i == 1); } /************************************************* * Search string[] for match with regular expression. * Returns: * index of match if successful, -1 if not found */ public sizediff_t find(string string) { if (test(string)) return pmatch[0].rm_so; else return -1; // no match } //deprecated alias find search; unittest { debug(regexp) printf("regexp.find.unittest()\n"); RegExp r = new RegExp("abc", null); auto i = r.find("xabcy"); assert(i == 1); i = r.find("cba"); assert(i == -1); } /************************************************* * Search s[] for match. * Returns: * If global attribute, return same value as exec(s). * If not global attribute, return array of all matches. */ public string[] match(string s) { string[] result; if (attributes & REA.global) { sizediff_t lastindex = 0; while (test(s, lastindex)) { auto eo = pmatch[0].rm_eo; result ~= input[pmatch[0].rm_so .. eo]; if (lastindex == eo) lastindex++; // always consume some source else lastindex = eo; } } else { result = exec(s); } return result; } unittest { debug(regexp) printf("regexp.match.unittest()\n"); int i; string[] result; string j; RegExp r; r = new RegExp("a[bc]", null); result = r.match("1ab2ac3"); j = join(result, ","); i = (j == "ab"); assert(i == 1); r = new RegExp("a[bc]", "g"); result = r.match("1ab2ac3"); j = join(result, ","); i = (j == "ab,ac"); assert(i == 1); } /************************************************* * Find regular expression matches in s[]. Replace those matches * with a new string composed of format[] merged with the result of the * matches. * If global, replace all matches. Otherwise, replace first match. * Returns: the new string */ public string replace(string s, string format) { debug(regexp) printf("string = %.*s, format = %.*s\n", s.length, s.ptr, format.length, format.ptr); string result = s; sizediff_t lastindex = 0; size_t offset = 0; for (;;) { if (!test(s, lastindex)) break; auto so = pmatch[0].rm_so; auto eo = pmatch[0].rm_eo; string replacement = replace(format); // Optimize by using replace if possible - Dave Fladebo string slice = result[offset + so .. offset + eo]; if (attributes & REA.global && // global, so replace all !(attributes & REA.ignoreCase) && // not ignoring case !(attributes & REA.multiline) && // not multiline pattern == slice && // simple pattern (exact match, no special characters) format == replacement) // simple format, not $ formats { debug(regexp) { auto sss = result[offset + so .. offset + eo]; printf("pattern: %.*s, slice: %.*s, format: %.*s, replacement: %.*s\n", pattern.length, pattern.ptr, sss.length, sss.ptr, format.length, format.ptr, replacement.length, replacement.ptr); } result = std.array.replace(result,slice,replacement); break; } result = replaceSlice(result, result[offset + so .. offset + eo], replacement); if (attributes & REA.global) { offset += replacement.length - (eo - so); if (lastindex == eo) lastindex++; // always consume some source else lastindex = eo; } else break; } return result; } unittest { debug(regexp) printf("regexp.replace.unittest()\n"); int i; string result; RegExp r; r = new RegExp("a[bc]", "g"); result = r.replace("1ab2ac3", "x$&y"); i = (result == "1xaby2xacy3"); assert(i == 1); r = new RegExp("ab", "g"); result = r.replace("1ab2ac3", "xy"); i = (result == "1xy2ac3"); assert(i == 1); } /************************************************* * Search string[] for match. * Returns: * array of slices into string[] representing matches */ public string[] exec(string s) { debug(regexp) printf("regexp.exec(string = '%.*s')\n", s.length, s.ptr); input = s; pmatch[0].rm_so = 0; pmatch[0].rm_eo = 0; return exec(); } /************************************************* * Pick up where last exec(string) or exec() left off, * searching string[] for next match. * Returns: * array of slices into string[] representing matches */ public string[] exec() { if (!test()) return null; auto result = new string[pmatch.length]; for (int i = 0; i < pmatch.length; i++) { if (pmatch[i].rm_so == pmatch[i].rm_eo) result[i] = null; else result[i] = input[pmatch[i].rm_so .. pmatch[i].rm_eo]; } return result; } /************************************************ * Search s[] for match. * Returns: 0 for no match, !=0 for match * Example: --- import std.stdio; import undead.regexp; import std.string; int grep(int delegate(char[]) pred, char[][] list) { int count; foreach (s; list) { if (pred(s)) ++count; } return count; } void main() { auto x = grep(&RegExp("[Ff]oo").test, std.string.split("mary had a foo lamb")); writefln(x); } --- * which prints: 1 */ //@@@ public bool test(string s) { return test(s, 0 /*pmatch[0].rm_eo*/) != 0; } /************************************************ * Pick up where last test(string) or test() left off, and search again. * Returns: 0 for no match, !=0 for match */ public int test() { return test(input, pmatch[0].rm_eo); } /************************************************ * Test s[] starting at startindex against regular expression. * Returns: 0 for no match, !=0 for match */ public int test(string s, size_t startindex) { char firstc; input = s; debug (regexp) printf("RegExp.test(input[] = '%.*s', startindex = %zd)\n", input.length, input.ptr, startindex); pmatch[0].rm_so = 0; pmatch[0].rm_eo = 0; if (startindex < 0 || startindex > input.length) { return 0; // fail } //debug(regexp) printProgram(program); // First character optimization firstc = 0; if (program[0] == REchar) { firstc = program[1]; if (attributes & REA.ignoreCase && isAlpha(firstc)) firstc = 0; } for (auto si = startindex; ; si++) { if (firstc) { if (si == input.length) break; // no match if (input[si] != firstc) { si++; if (!chr(si, firstc)) // if first character not found break; // no match } } for (size_t i = 0; i < re_nsub + 1; i++) { pmatch[i].rm_so = -1; pmatch[i].rm_eo = -1; } src_start = src = si; if (trymatch(0, program.length)) { pmatch[0].rm_so = si; pmatch[0].rm_eo = src; //debug(regexp) printf("start = %d, end = %d\n", gmatch.rm_so, gmatch.rm_eo); return 1; } // If possible match must start at beginning, we are done if (program[0] == REbol || program[0] == REanystar) { if (attributes & REA.multiline) { // Scan for the next \n if (!chr(si, '\n')) break; // no match if '\n' not found } else break; } if (si == input.length) break; debug(regexp) { auto sss = input[si + 1 .. input.length]; printf("Starting new try: '%.*s'\n", sss.length, sss.ptr); } } return 0; // no match } /** Returns whether string $(D_PARAM s) matches $(D_PARAM this). */ alias test opEquals; // bool opEquals(string s) // { // return test(s); // } unittest { assert("abc" == RegExp(".b.")); assert("abc" != RegExp(".b..")); } int chr(ref size_t si, rchar c) { for (; si < input.length; si++) { if (input[si] == c) return 1; } return 0; } void printProgram(const(ubyte)[] prog) { //debug(regexp) { size_t len; uint n; uint m; ushort *pu; uint *puint; char[] str; printf("printProgram()\n"); for (size_t pc = 0; pc < prog.length; ) { printf("%3d: ", pc); //printf("prog[pc] = %d, REchar = %d, REnmq = %d\n", prog[pc], REchar, REnmq); switch (prog[pc]) { case REchar: printf("\tREchar '%c'\n", prog[pc + 1]); pc += 1 + char.sizeof; break; case REichar: printf("\tREichar '%c'\n", prog[pc + 1]); pc += 1 + char.sizeof; break; case REdchar: printf("\tREdchar '%c'\n", *cast(dchar *)&prog[pc + 1]); pc += 1 + dchar.sizeof; break; case REidchar: printf("\tREidchar '%c'\n", *cast(dchar *)&prog[pc + 1]); pc += 1 + dchar.sizeof; break; case REanychar: printf("\tREanychar\n"); pc++; break; case REstring: len = *cast(size_t *)&prog[pc + 1]; str = (cast(char*)&prog[pc + 1 + size_t.sizeof])[0 .. len]; printf("\tREstring x%x, '%.*s'\n", len, str.length, str.ptr); pc += 1 + size_t.sizeof + len * rchar.sizeof; break; case REistring: len = *cast(size_t *)&prog[pc + 1]; str = (cast(char*)&prog[pc + 1 + size_t.sizeof])[0 .. len]; printf("\tREistring x%x, '%.*s'\n", len, str.length, str.ptr); pc += 1 + size_t.sizeof + len * rchar.sizeof; break; case REtestbit: pu = cast(ushort *)&prog[pc + 1]; printf("\tREtestbit %d, %d\n", pu[0], pu[1]); len = pu[1]; pc += 1 + 2 * ushort.sizeof + len; break; case REbit: pu = cast(ushort *)&prog[pc + 1]; len = pu[1]; printf("\tREbit cmax=%02x, len=%d:", pu[0], len); for (n = 0; n < len; n++) printf(" %02x", prog[pc + 1 + 2 * ushort.sizeof + n]); printf("\n"); pc += 1 + 2 * ushort.sizeof + len; break; case REnotbit: pu = cast(ushort *)&prog[pc + 1]; printf("\tREnotbit %d, %d\n", pu[0], pu[1]); len = pu[1]; pc += 1 + 2 * ushort.sizeof + len; break; case RErange: len = *cast(uint *)&prog[pc + 1]; printf("\tRErange %d\n", len); // BUG: REAignoreCase? pc += 1 + uint.sizeof + len; break; case REnotrange: len = *cast(uint *)&prog[pc + 1]; printf("\tREnotrange %d\n", len); // BUG: REAignoreCase? pc += 1 + uint.sizeof + len; break; case REbol: printf("\tREbol\n"); pc++; break; case REeol: printf("\tREeol\n"); pc++; break; case REor: len = *cast(uint *)&prog[pc + 1]; printf("\tREor %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len); pc += 1 + uint.sizeof; break; case REgoto: len = *cast(uint *)&prog[pc + 1]; printf("\tREgoto %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len); pc += 1 + uint.sizeof; break; case REanystar: printf("\tREanystar\n"); pc++; break; case REnm: case REnmq: // len, n, m, () puint = cast(uint *)&prog[pc + 1]; len = puint[0]; n = puint[1]; m = puint[2]; printf("\tREnm%s len=%d, n=%u, m=%u, pc=>%d\n", (prog[pc] == REnmq) ? "q".ptr : " ".ptr, len, n, m, pc + 1 + uint.sizeof * 3 + len); pc += 1 + uint.sizeof * 3; break; case REparen: // len, n, () puint = cast(uint *)&prog[pc + 1]; len = puint[0]; n = puint[1]; printf("\tREparen len=%d n=%d, pc=>%d\n", len, n, pc + 1 + uint.sizeof * 2 + len); pc += 1 + uint.sizeof * 2; break; case REend: printf("\tREend\n"); return; case REwordboundary: printf("\tREwordboundary\n"); pc++; break; case REnotwordboundary: printf("\tREnotwordboundary\n"); pc++; break; case REdigit: printf("\tREdigit\n"); pc++; break; case REnotdigit: printf("\tREnotdigit\n"); pc++; break; case REspace: printf("\tREspace\n"); pc++; break; case REnotspace: printf("\tREnotspace\n"); pc++; break; case REword: printf("\tREword\n"); pc++; break; case REnotword: printf("\tREnotword\n"); pc++; break; case REbackref: printf("\tREbackref %d\n", prog[1]); pc += 2; break; default: assert(0); } } } } /************************************************** * Match input against a section of the program[]. * Returns: * 1 if successful match * 0 no match */ int trymatch(size_t pc, size_t pcend) { size_t len; size_t n; size_t m; size_t count; size_t pop; size_t ss; regmatch_t *psave; size_t c1; size_t c2; ushort* pu; uint* puint; debug(regexp) { auto sss = input[src .. input.length]; printf("RegExp.trymatch(pc = %zd, src = '%.*s', pcend = %zd)\n", pc, sss.length, sss.ptr, pcend); } auto srcsave = src; psave = null; for (;;) { if (pc == pcend) // if done matching { debug(regex) printf("\tprogend\n"); return 1; } //printf("\top = %d\n", program[pc]); switch (program[pc]) { case REchar: if (src == input.length) goto Lnomatch; debug(regexp) printf("\tREchar '%c', src = '%c'\n", program[pc + 1], input[src]); if (program[pc + 1] != input[src]) goto Lnomatch; src++; pc += 1 + char.sizeof; break; case REichar: if (src == input.length) goto Lnomatch; debug(regexp) printf("\tREichar '%c', src = '%c'\n", program[pc + 1], input[src]); c1 = program[pc + 1]; c2 = input[src]; if (c1 != c2) { if (isLower(cast(rchar)c2)) c2 = std.ascii.toUpper(cast(rchar)c2); else goto Lnomatch; if (c1 != c2) goto Lnomatch; } src++; pc += 1 + char.sizeof; break; case REdchar: debug(regexp) printf("\tREdchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]); if (src == input.length) goto Lnomatch; if (*(cast(dchar *)&program[pc + 1]) != input[src]) goto Lnomatch; src++; pc += 1 + dchar.sizeof; break; case REidchar: debug(regexp) printf("\tREidchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]); if (src == input.length) goto Lnomatch; c1 = *(cast(dchar *)&program[pc + 1]); c2 = input[src]; if (c1 != c2) { if (isLower(cast(rchar)c2)) c2 = std.ascii.toUpper(cast(rchar)c2); else goto Lnomatch; if (c1 != c2) goto Lnomatch; } src++; pc += 1 + dchar.sizeof; break; case REanychar: debug(regexp) printf("\tREanychar\n"); if (src == input.length) goto Lnomatch; if (!(attributes & REA.dotmatchlf) && input[src] == cast(rchar)'\n') goto Lnomatch; src += std.utf.stride(input, src); //src++; pc++; break; case REstring: len = *cast(size_t *)&program[pc + 1]; debug(regexp) { auto sss2 = (&program[pc + 1 + size_t.sizeof])[0 .. len]; printf("\tREstring x%x, '%.*s'\n", len, sss2.length, sss2.ptr); } if (src + len > input.length) goto Lnomatch; if (memcmp(&program[pc + 1 + size_t.sizeof], &input[src], len * rchar.sizeof)) goto Lnomatch; src += len; pc += 1 + size_t.sizeof + len * rchar.sizeof; break; case REistring: len = *cast(size_t *)&program[pc + 1]; debug(regexp) { auto sss2 = (&program[pc + 1 + size_t.sizeof])[0 .. len]; printf("\tREistring x%x, '%.*s'\n", len, sss2.length, sss2.ptr); } if (src + len > input.length) goto Lnomatch; if (icmp((cast(char*)&program[pc + 1 + size_t.sizeof])[0..len], input[src .. src + len])) goto Lnomatch; src += len; pc += 1 + size_t.sizeof + len * rchar.sizeof; break; case REtestbit: pu = (cast(ushort *)&program[pc + 1]); if (src == input.length) goto Lnomatch; debug(regexp) printf("\tREtestbit %d, %d, '%c', x%02x\n", pu[0], pu[1], input[src], input[src]); len = pu[1]; c1 = input[src]; //printf("[x%02x]=x%02x, x%02x\n", c1 >> 3, ((&program[pc + 1 + 4])[c1 >> 3] ), (1 << (c1 & 7))); if (c1 <= pu[0] && !((&(program[pc + 1 + 4]))[c1 >> 3] & (1 << (c1 & 7)))) goto Lnomatch; pc += 1 + 2 * ushort.sizeof + len; break; case REbit: pu = (cast(ushort *)&program[pc + 1]); if (src == input.length) goto Lnomatch; debug(regexp) printf("\tREbit %d, %d, '%c'\n", pu[0], pu[1], input[src]); len = pu[1]; c1 = input[src]; if (c1 > pu[0]) goto Lnomatch; if (!((&program[pc + 1 + 4])[c1 >> 3] & (1 << (c1 & 7)))) goto Lnomatch; src++; pc += 1 + 2 * ushort.sizeof + len; break; case REnotbit: pu = (cast(ushort *)&program[pc + 1]); if (src == input.length) goto Lnomatch; debug(regexp) printf("\tREnotbit %d, %d, '%c'\n", pu[0], pu[1], input[src]); len = pu[1]; c1 = input[src]; if (c1 <= pu[0] && ((&program[pc + 1 + 4])[c1 >> 3] & (1 << (c1 & 7)))) goto Lnomatch; src++; pc += 1 + 2 * ushort.sizeof + len; break; case RErange: len = *cast(uint *)&program[pc + 1]; debug(regexp) printf("\tRErange %d\n", len); if (src == input.length) goto Lnomatch; // BUG: REA.ignoreCase? if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) == null) goto Lnomatch; src++; pc += 1 + uint.sizeof + len; break; case REnotrange: len = *cast(uint *)&program[pc + 1]; debug(regexp) printf("\tREnotrange %d\n", len); if (src == input.length) goto Lnomatch; // BUG: REA.ignoreCase? if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) != null) goto Lnomatch; src++; pc += 1 + uint.sizeof + len; break; case REbol: debug(regexp) printf("\tREbol\n"); if (src == 0) { } else if (attributes & REA.multiline) { if (input[src - 1] != '\n') goto Lnomatch; } else goto Lnomatch; pc++; break; case REeol: debug(regexp) printf("\tREeol\n"); if (src == input.length) { } else if (attributes & REA.multiline && input[src] == '\n') src++; else goto Lnomatch; pc++; break; case REor: len = (cast(uint *)&program[pc + 1])[0]; debug(regexp) printf("\tREor %d\n", len); pop = pc + 1 + uint.sizeof; ss = src; if (trymatch(pop, pcend)) { if (pcend != program.length) { auto s = src; if (trymatch(pcend, program.length)) { debug(regexp) printf("\tfirst operand matched\n"); src = s; return 1; } else { // If second branch doesn't match to end, take first anyway src = ss; if (!trymatch(pop + len, program.length)) { debug(regexp) printf("\tfirst operand matched\n"); src = s; return 1; } } src = ss; } else { debug(regexp) printf("\tfirst operand matched\n"); return 1; } } pc = pop + len; // proceed with 2nd branch break; case REgoto: debug(regexp) printf("\tREgoto\n"); len = (cast(uint *)&program[pc + 1])[0]; pc += 1 + uint.sizeof + len; break; case REanystar: debug(regexp) printf("\tREanystar\n"); pc++; for (;;) { auto s1 = src; if (src == input.length) break; if (!(attributes & REA.dotmatchlf) && input[src] == '\n') break; src++; auto s2 = src; // If no match after consumption, but it // did match before, then no match if (!trymatch(pc, program.length)) { src = s1; // BUG: should we save/restore pmatch[]? if (trymatch(pc, program.length)) { src = s1; // no match break; } } src = s2; } break; case REnm: case REnmq: // len, n, m, () puint = cast(uint *)&program[pc + 1]; len = puint[0]; n = puint[1]; m = puint[2]; debug(regexp) printf("\tREnm%s len=%d, n=%u, m=%u\n", (program[pc] == REnmq) ? "q".ptr : "".ptr, len, n, m); pop = pc + 1 + uint.sizeof * 3; for (count = 0; count < n; count++) { if (!trymatch(pop, pop + len)) goto Lnomatch; } if (!psave && count < m) { //version (Win32) psave = cast(regmatch_t *)alloca((re_nsub + 1) * regmatch_t.sizeof); //else //psave = new regmatch_t[re_nsub + 1]; } if (program[pc] == REnmq) // if minimal munch { for (; count < m; count++) { memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof); auto s1 = src; if (trymatch(pop + len, program.length)) { src = s1; memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof); break; } if (!trymatch(pop, pop + len)) { debug(regexp) printf("\tdoesn't match subexpression\n"); break; } // If source is not consumed, don't // infinite loop on the match if (s1 == src) { debug(regexp) printf("\tsource is not consumed\n"); break; } } } else // maximal munch { for (; count < m; count++) { memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof); auto s1 = src; if (!trymatch(pop, pop + len)) { debug(regexp) printf("\tdoesn't match subexpression\n"); break; } auto s2 = src; // If source is not consumed, don't // infinite loop on the match if (s1 == s2) { debug(regexp) printf("\tsource is not consumed\n"); break; } // If no match after consumption, but it // did match before, then no match if (!trymatch(pop + len, program.length)) { src = s1; if (trymatch(pop + len, program.length)) { src = s1; // no match memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof); break; } } src = s2; } } debug(regexp) printf("\tREnm len=%d, n=%u, m=%u, DONE count=%d\n", len, n, m, count); pc = pop + len; break; case REparen: // len, () debug(regexp) printf("\tREparen\n"); puint = cast(uint *)&program[pc + 1]; len = puint[0]; n = puint[1]; pop = pc + 1 + uint.sizeof * 2; ss = src; if (!trymatch(pop, pop + len)) goto Lnomatch; pmatch[n + 1].rm_so = ss; pmatch[n + 1].rm_eo = src; pc = pop + len; break; case REend: debug(regexp) printf("\tREend\n"); return 1; // successful match case REwordboundary: debug(regexp) printf("\tREwordboundary\n"); if (src > 0 && src < input.length) { c1 = input[src - 1]; c2 = input[src]; if (!( (isword(cast(rchar)c1) && !isword(cast(rchar)c2)) || (!isword(cast(rchar)c1) && isword(cast(rchar)c2)) ) ) goto Lnomatch; } pc++; break; case REnotwordboundary: debug(regexp) printf("\tREnotwordboundary\n"); if (src == 0 || src == input.length) goto Lnomatch; c1 = input[src - 1]; c2 = input[src]; if ( (isword(cast(rchar)c1) && !isword(cast(rchar)c2)) || (!isword(cast(rchar)c1) && isword(cast(rchar)c2)) ) goto Lnomatch; pc++; break; case REdigit: debug(regexp) printf("\tREdigit\n"); if (src == input.length) goto Lnomatch; if (!isDigit(input[src])) goto Lnomatch; src++; pc++; break; case REnotdigit: debug(regexp) printf("\tREnotdigit\n"); if (src == input.length) goto Lnomatch; if (isDigit(input[src])) goto Lnomatch; src++; pc++; break; case REspace: debug(regexp) printf("\tREspace\n"); if (src == input.length) goto Lnomatch; if (!isWhite(input[src])) goto Lnomatch; src++; pc++; break; case REnotspace: debug(regexp) printf("\tREnotspace\n"); if (src == input.length) goto Lnomatch; if (isWhite(input[src])) goto Lnomatch; src++; pc++; break; case REword: debug(regexp) printf("\tREword\n"); if (src == input.length) goto Lnomatch; if (!isword(input[src])) goto Lnomatch; src++; pc++; break; case REnotword: debug(regexp) printf("\tREnotword\n"); if (src == input.length) goto Lnomatch; if (isword(input[src])) goto Lnomatch; src++; pc++; break; case REbackref: { n = program[pc + 1]; debug(regexp) printf("\tREbackref %d\n", n); auto so = pmatch[n + 1].rm_so; auto eo = pmatch[n + 1].rm_eo; len = eo - so; if (src + len > input.length) goto Lnomatch; else if (attributes & REA.ignoreCase) { if (icmp(input[src .. src + len], input[so .. eo])) goto Lnomatch; } else if (memcmp(&input[src], &input[so], len * rchar.sizeof)) goto Lnomatch; src += len; pc += 2; break; } default: assert(0); } } Lnomatch: debug(regexp) printf("\tnomatch pc=%d\n", pc); src = srcsave; return 0; } /* =================== Compiler ================== */ int parseRegexp() { size_t gotooffset; uint len1; uint len2; debug(regexp) { auto sss = pattern[p .. pattern.length]; printf("parseRegexp() '%.*s'\n", sss.length, sss.ptr); } auto offset = buf.offset; for (;;) { assert(p <= pattern.length); if (p == pattern.length) { buf.write(REend); return 1; } switch (pattern[p]) { case ')': return 1; case '|': p++; gotooffset = buf.offset; buf.write(REgoto); buf.write(cast(uint)0); len1 = cast(uint)(buf.offset - offset); buf.spread(offset, 1 + uint.sizeof); gotooffset += 1 + uint.sizeof; parseRegexp(); len2 = cast(uint)(buf.offset - (gotooffset + 1 + uint.sizeof)); buf.data[offset] = REor; (cast(uint *)&buf.data[offset + 1])[0] = len1; (cast(uint *)&buf.data[gotooffset + 1])[0] = len2; break; default: parsePiece(); break; } } } int parsePiece() { uint len; uint n; uint m; ubyte op; auto plength = pattern.length; debug(regexp) { auto sss = pattern[p .. pattern.length]; printf("parsePiece() '%.*s'\n", sss.length, sss.ptr); } auto offset = buf.offset; parseAtom(); if (p == plength) return 1; switch (pattern[p]) { case '*': // Special optimization: replace .* with REanystar if (buf.offset - offset == 1 && buf.data[offset] == REanychar && p + 1 < plength && pattern[p + 1] != '?') { buf.data[offset] = REanystar; p++; break; } n = 0; m = inf; goto Lnm; case '+': n = 1; m = inf; goto Lnm; case '?': n = 0; m = 1; goto Lnm; case '{': // {n} {n,} {n,m} p++; if (p == plength || !isDigit(pattern[p])) goto Lerr; n = 0; do { // BUG: handle overflow n = n * 10 + pattern[p] - '0'; p++; if (p == plength) goto Lerr; } while (isDigit(pattern[p])); if (pattern[p] == '}') // {n} { m = n; goto Lnm; } if (pattern[p] != ',') goto Lerr; p++; if (p == plength) goto Lerr; if (pattern[p] == /*{*/ '}') // {n,} { m = inf; goto Lnm; } if (!isDigit(pattern[p])) goto Lerr; m = 0; // {n,m} do { // BUG: handle overflow m = m * 10 + pattern[p] - '0'; p++; if (p == plength) goto Lerr; } while (isDigit(pattern[p])); if (pattern[p] != /*{*/ '}') goto Lerr; goto Lnm; Lnm: p++; op = REnm; if (p < plength && pattern[p] == '?') { op = REnmq; // minimal munch version p++; } len = cast(uint)(buf.offset - offset); buf.spread(offset, 1 + uint.sizeof * 3); buf.data[offset] = op; uint* puint = cast(uint *)&buf.data[offset + 1]; puint[0] = len; puint[1] = n; puint[2] = m; break; default: break; } return 1; Lerr: error("badly formed {n,m}"); assert(0); } int parseAtom() { ubyte op; size_t offset; rchar c; debug(regexp) { auto sss = pattern[p .. pattern.length]; printf("parseAtom() '%.*s'\n", sss.length, sss.ptr); } if (p < pattern.length) { c = pattern[p]; switch (c) { case '*': case '+': case '?': error("*+? not allowed in atom"); p++; return 0; case '(': p++; buf.write(REparen); offset = buf.offset; buf.write(cast(uint)0); // reserve space for length buf.write(re_nsub); re_nsub++; parseRegexp(); *cast(uint *)&buf.data[offset] = cast(uint)(buf.offset - (offset + uint.sizeof * 2)); if (p == pattern.length || pattern[p] != ')') { error("')' expected"); return 0; } p++; break; case '[': if (!parseRange()) return 0; break; case '.': p++; buf.write(REanychar); break; case '^': p++; buf.write(REbol); break; case '$': p++; buf.write(REeol); break; case '\\': p++; if (p == pattern.length) { error("no character past '\\'"); return 0; } c = pattern[p]; switch (c) { case 'b': op = REwordboundary; goto Lop; case 'B': op = REnotwordboundary; goto Lop; case 'd': op = REdigit; goto Lop; case 'D': op = REnotdigit; goto Lop; case 's': op = REspace; goto Lop; case 'S': op = REnotspace; goto Lop; case 'w': op = REword; goto Lop; case 'W': op = REnotword; goto Lop; Lop: buf.write(op); p++; break; case 'f': case 'n': case 'r': case 't': case 'v': case 'c': case 'x': case 'u': case '0': c = cast(char)escape(); goto Lbyte; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': c -= '1'; if (c < re_nsub) { buf.write(REbackref); buf.write(cast(ubyte)c); } else { error("no matching back reference"); return 0; } p++; break; default: p++; goto Lbyte; } break; default: p++; Lbyte: op = REchar; if (attributes & REA.ignoreCase) { if (isAlpha(c)) { op = REichar; c = cast(char)std.ascii.toUpper(c); } } if (op == REchar && c <= 0xFF) { // Look ahead and see if we can make this into // an REstring auto q = p; for (; q < pattern.length; ++q) { rchar qc = pattern[q]; switch (qc) { case '{': case '*': case '+': case '?': if (q == p) goto Lchar; q--; break; case '(': case ')': case '|': case '[': case ']': case '.': case '^': case '$': case '\\': case '}': break; default: continue; } break; } auto len = q - p; if (len > 0) { debug(regexp) printf("writing string len %d, c = '%c', pattern[p] = '%c'\n", len+1, c, pattern[p]); buf.reserve(5 + (1 + len) * rchar.sizeof); buf.write((attributes & REA.ignoreCase) ? REistring : REstring); buf.write(len + 1); buf.write(c); buf.write(pattern[p .. p + len]); p = q; break; } } if (c >= 0x80) { // Convert to dchar opcode op = (op == REchar) ? REdchar : REidchar; buf.write(op); buf.write(c); } else { Lchar: debug(regexp) printf("It's an REchar '%c'\n", c); buf.write(op); buf.write(cast(char)c); } break; } } return 1; } private: class Range { size_t maxc; size_t maxb; OutBuffer buf; ubyte* base; BitArray bits; this(OutBuffer buf) { this.buf = buf; if (buf.data.length) this.base = &buf.data[buf.offset]; } void setbitmax(size_t u) { //printf("setbitmax(x%x), maxc = x%x\n", u, maxc); if (u > maxc) { maxc = u; auto b = u / 8; if (b >= maxb) { auto u2 = base ? base - &buf.data[0] : 0; buf.fill0(b - maxb + 1); base = &buf.data[u2]; maxb = b + 1; //bits = (cast(bit*)this.base)[0 .. maxc + 1]; bits = BitArray(maxc + 1, cast(size_t*)this.base); } bits.length = maxc + 1; } } void setbit2(size_t u) { setbitmax(u + 1); //printf("setbit2 [x%02x] |= x%02x\n", u >> 3, 1 << (u & 7)); bits[u] = 1; } }; int parseRange() { int c; int c2; uint i; uint cmax; cmax = 0x7F; p++; ubyte op = REbit; if (p == pattern.length) { error("invalid range"); return 0; } if (pattern[p] == '^') { p++; op = REnotbit; if (p == pattern.length) { error("invalid range"); return 0; } } buf.write(op); auto offset = buf.offset; buf.write(cast(uint)0); // reserve space for length buf.reserve(128 / 8); auto r = new Range(buf); if (op == REnotbit) r.setbit2(0); switch (pattern[p]) { case ']': case '-': c = pattern[p]; p++; r.setbit2(c); break; default: break; } enum RS { start, rliteral, dash } RS rs; rs = RS.start; for (;;) { if (p == pattern.length) goto Lerr; switch (pattern[p]) { case ']': switch (rs) { case RS.dash: r.setbit2('-'); goto case; case RS.rliteral: r.setbit2(c); break; case RS.start: break; default: assert(0); } p++; break; case '\\': p++; r.setbitmax(cmax); if (p == pattern.length) goto Lerr; switch (pattern[p]) { case 'd': for (i = '0'; i <= '9'; i++) r.bits[i] = 1; goto Lrs; case 'D': for (i = 1; i < '0'; i++) r.bits[i] = 1; for (i = '9' + 1; i <= cmax; i++) r.bits[i] = 1; goto Lrs; case 's': for (i = 0; i <= cmax; i++) if (isWhite(i)) r.bits[i] = 1; goto Lrs; case 'S': for (i = 1; i <= cmax; i++) if (!isWhite(i)) r.bits[i] = 1; goto Lrs; case 'w': for (i = 0; i <= cmax; i++) if (isword(cast(rchar)i)) r.bits[i] = 1; goto Lrs; case 'W': for (i = 1; i <= cmax; i++) if (!isword(cast(rchar)i)) r.bits[i] = 1; goto Lrs; Lrs: switch (rs) { case RS.dash: r.setbit2('-'); goto case; case RS.rliteral: r.setbit2(c); break; default: break; } rs = RS.start; continue; default: break; } c2 = escape(); goto Lrange; case '-': p++; if (rs == RS.start) goto Lrange; else if (rs == RS.rliteral) rs = RS.dash; else if (rs == RS.dash) { r.setbit2(c); r.setbit2('-'); rs = RS.start; } continue; default: c2 = pattern[p]; p++; Lrange: switch (rs) { case RS.rliteral: r.setbit2(c); goto case; case RS.start: c = c2; rs = RS.rliteral; break; case RS.dash: if (c > c2) { error("inverted range in character class"); return 0; } r.setbitmax(c2); //printf("c = %x, c2 = %x\n",c,c2); for (; c <= c2; c++) r.bits[c] = 1; rs = RS.start; break; default: assert(0); } continue; } break; } if (attributes & REA.ignoreCase) { // BUG: what about dchar? r.setbitmax(0x7F); for (c = 'a'; c <= 'z'; c++) { if (r.bits[c]) r.bits[c + 'A' - 'a'] = 1; else if (r.bits[c + 'A' - 'a']) r.bits[c] = 1; } } //printf("maxc = %d, maxb = %d\n",r.maxc,r.maxb); (cast(ushort *)&buf.data[offset])[0] = cast(ushort)r.maxc; (cast(ushort *)&buf.data[offset])[1] = cast(ushort)r.maxb; return 1; Lerr: error("invalid range"); return 0; } void error(string msg) { errors++; debug(regexp) printf("error: %.*s\n", msg.length, msg.ptr); //assert(0); //*(char*)0=0; throw new RegExpException(msg); } // p is following the \ char int escape() in { assert(p < pattern.length); } body { int c; int i; rchar tc; c = pattern[p]; // none of the cases are multibyte switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; // BUG: Perl does \a and \e too, should we? case 'c': ++p; if (p == pattern.length) goto Lretc; c = pattern[p]; // Note: we are deliberately not allowing dchar letters if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'))) { Lcerr: error("letter expected following \\c"); return 0; } c &= 0x1F; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': c -= '0'; for (i = 0; i < 2; i++) { p++; if (p == pattern.length) goto Lretc; tc = pattern[p]; if ('0' <= tc && tc <= '7') { c = c * 8 + (tc - '0'); // Treat overflow as if last // digit was not an octal digit if (c >= 0xFF) { c >>= 3; return c; } } else return c; } break; case 'x': c = 0; for (i = 0; i < 2; i++) { p++; if (p == pattern.length) goto Lretc; tc = pattern[p]; if ('0' <= tc && tc <= '9') c = c * 16 + (tc - '0'); else if ('a' <= tc && tc <= 'f') c = c * 16 + (tc - 'a' + 10); else if ('A' <= tc && tc <= 'F') c = c * 16 + (tc - 'A' + 10); else if (i == 0) // if no hex digits after \x { // Not a valid \xXX sequence return 'x'; } else return c; } break; case 'u': c = 0; for (i = 0; i < 4; i++) { p++; if (p == pattern.length) goto Lretc; tc = pattern[p]; if ('0' <= tc && tc <= '9') c = c * 16 + (tc - '0'); else if ('a' <= tc && tc <= 'f') c = c * 16 + (tc - 'a' + 10); else if ('A' <= tc && tc <= 'F') c = c * 16 + (tc - 'A' + 10); else { // Not a valid \uXXXX sequence p -= i; return 'u'; } } break; default: break; } p++; Lretc: return c; } /* ==================== optimizer ======================= */ void optimize() { ubyte[] prog; debug(regexp) printf("RegExp.optimize()\n"); prog = buf.toBytes(); for (size_t i = 0; 1;) { //printf("\tprog[%d] = %d, %d\n", i, prog[i], REstring); switch (prog[i]) { case REend: case REanychar: case REanystar: case REbackref: case REeol: case REchar: case REichar: case REdchar: case REidchar: case REstring: case REistring: case REtestbit: case REbit: case REnotbit: case RErange: case REnotrange: case REwordboundary: case REnotwordboundary: case REdigit: case REnotdigit: case REspace: case REnotspace: case REword: case REnotword: return; case REbol: i++; continue; case REor: case REnm: case REnmq: case REparen: case REgoto: { auto bitbuf = new OutBuffer; auto r = new Range(bitbuf); auto offset = i; if (starrchars(r, prog[i .. prog.length])) { debug(regexp) printf("\tfilter built\n"); buf.spread(offset, 1 + 4 + r.maxb); buf.data[offset] = REtestbit; (cast(ushort *)&buf.data[offset + 1])[0] = cast(ushort)r.maxc; (cast(ushort *)&buf.data[offset + 1])[1] = cast(ushort)r.maxb; i = offset + 1 + 4; buf.data[i .. i + r.maxb] = r.base[0 .. r.maxb]; } return; } default: assert(0); } } } ///////////////////////////////////////// // OR the leading character bits into r. // Limit the character range from 0..7F, // trymatch() will allow through anything over maxc. // Return 1 if success, 0 if we can't build a filter or // if there is no point to one. int starrchars(Range r, const(ubyte)[] prog) { rchar c; uint maxc; size_t maxb; size_t len; uint b; uint n; uint m; const(ubyte)* pop; //printf("RegExp.starrchars(prog = %p, progend = %p)\n", prog, progend); for (size_t i = 0; i < prog.length;) { switch (prog[i]) { case REchar: c = prog[i + 1]; if (c <= 0x7F) r.setbit2(c); return 1; case REichar: c = prog[i + 1]; if (c <= 0x7F) { r.setbit2(c); r.setbit2(std.ascii.toLower(cast(rchar)c)); } return 1; case REdchar: case REidchar: return 1; case REanychar: return 0; // no point case REstring: len = *cast(size_t *)&prog[i + 1]; assert(len); c = *cast(rchar *)&prog[i + 1 + size_t.sizeof]; debug(regexp) printf("\tREstring %d, '%c'\n", len, c); if (c <= 0x7F) r.setbit2(c); return 1; case REistring: len = *cast(size_t *)&prog[i + 1]; assert(len); c = *cast(rchar *)&prog[i + 1 + size_t.sizeof]; debug(regexp) printf("\tREistring %d, '%c'\n", len, c); if (c <= 0x7F) { r.setbit2(std.ascii.toUpper(cast(rchar)c)); r.setbit2(std.ascii.toLower(cast(rchar)c)); } return 1; case REtestbit: case REbit: maxc = (cast(ushort *)&prog[i + 1])[0]; maxb = (cast(ushort *)&prog[i + 1])[1]; if (maxc <= 0x7F) r.setbitmax(maxc); else maxb = r.maxb; for (b = 0; b < maxb; b++) r.base[b] |= prog[i + 1 + 4 + b]; return 1; case REnotbit: maxc = (cast(ushort *)&prog[i + 1])[0]; maxb = (cast(ushort *)&prog[i + 1])[1]; if (maxc <= 0x7F) r.setbitmax(maxc); else maxb = r.maxb; for (b = 0; b < maxb; b++) r.base[b] |= ~cast(int)prog[i + 1 + 4 + b]; return 1; case REbol: case REeol: return 0; case REor: len = (cast(uint *)&prog[i + 1])[0]; return starrchars(r, prog[i + 1 + uint.sizeof .. prog.length]) && starrchars(r, prog[i + 1 + uint.sizeof + len .. prog.length]); case REgoto: len = (cast(uint *)&prog[i + 1])[0]; i += 1 + uint.sizeof + len; break; case REanystar: return 0; case REnm: case REnmq: // len, n, m, () len = (cast(uint *)&prog[i + 1])[0]; n = (cast(uint *)&prog[i + 1])[1]; m = (cast(uint *)&prog[i + 1])[2]; pop = &prog[i + 1 + uint.sizeof * 3]; if (!starrchars(r, pop[0 .. len])) return 0; if (n) return 1; i += 1 + uint.sizeof * 3 + len; break; case REparen: // len, () len = (cast(uint *)&prog[i + 1])[0]; n = (cast(uint *)&prog[i + 1])[1]; pop = &prog[0] + i + 1 + uint.sizeof * 2; return starrchars(r, pop[0 .. len]); case REend: return 0; case REwordboundary: case REnotwordboundary: return 0; case REdigit: r.setbitmax('9'); for (c = '0'; c <= '9'; c++) r.bits[c] = 1; return 1; case REnotdigit: r.setbitmax(0x7F); for (c = 0; c <= '0'; c++) r.bits[c] = 1; for (c = '9' + 1; c <= r.maxc; c++) r.bits[c] = 1; return 1; case REspace: r.setbitmax(0x7F); for (c = 0; c <= r.maxc; c++) if (isWhite(c)) r.bits[c] = 1; return 1; case REnotspace: r.setbitmax(0x7F); for (c = 0; c <= r.maxc; c++) if (!isWhite(c)) r.bits[c] = 1; return 1; case REword: r.setbitmax(0x7F); for (c = 0; c <= r.maxc; c++) if (isword(cast(rchar)c)) r.bits[c] = 1; return 1; case REnotword: r.setbitmax(0x7F); for (c = 0; c <= r.maxc; c++) if (!isword(cast(rchar)c)) r.bits[c] = 1; return 1; case REbackref: return 0; default: assert(0); } } return 1; } /* ==================== replace ======================= */ /*********************** * After a match is found with test(), this function * will take the match results and, using the format * string, generate and return a new string. */ public string replace(string format) { return replace3(format, input, pmatch[0 .. re_nsub + 1]); } // Static version that doesn't require a RegExp object to be created public static string replace3(string format, string input, regmatch_t[] pmatch) { string result; size_t c2; sizediff_t rm_so, rm_eo, i; // printf("replace3(format = '%.*s', input = '%.*s')\n", format.length, format.ptr, input.length, input.ptr); result.length = format.length; result.length = 0; for (size_t f = 0; f < format.length; f++) { char c = format[f]; L1: if (c != '$') { result ~= c; continue; } ++f; if (f == format.length) { result ~= '$'; break; } c = format[f]; switch (c) { case '&': rm_so = pmatch[0].rm_so; rm_eo = pmatch[0].rm_eo; goto Lstring; case '`': rm_so = 0; rm_eo = pmatch[0].rm_so; goto Lstring; case '\'': rm_so = pmatch[0].rm_eo; rm_eo = input.length; goto Lstring; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': i = c - '0'; if (f + 1 == format.length) { if (i == 0) { result ~= '$'; result ~= c; continue; } } else { c2 = format[f + 1]; if (c2 >= '0' && c2 <= '9') { i = (c - '0') * 10 + (c2 - '0'); f++; } if (i == 0) { result ~= '$'; result ~= c; c = cast(char)c2; goto L1; } } if (i < pmatch.length) { rm_so = pmatch[i].rm_so; rm_eo = pmatch[i].rm_eo; goto Lstring; } break; Lstring: if (rm_so != rm_eo) result ~= input[rm_so .. rm_eo]; break; default: result ~= '$'; result ~= c; break; } } return result; } /************************************ * Like replace(char[] format), but uses old style formatting:
Format Description
& replace with the match
\n replace with the nth parenthesized match, n is 1..9
\c replace with char c.
*/ public string replaceOld(string format) { string result; //printf("replace: this = %p so = %d, eo = %d\n", this, pmatch[0].rm_so, pmatch[0].rm_eo); //printf("3input = '%.*s'\n", input.length, input.ptr); result.length = format.length; result.length = 0; for (size_t i; i < format.length; i++) { char c = format[i]; switch (c) { case '&': { auto sss = input[pmatch[0].rm_so .. pmatch[0].rm_eo]; //printf("match = '%.*s'\n", sss.length, sss.ptr); result ~= sss; } break; case '\\': if (i + 1 < format.length) { c = format[++i]; if (c >= '1' && c <= '9') { uint j; j = c - '0'; if (j <= re_nsub && pmatch[j].rm_so != pmatch[j].rm_eo) result ~= input[pmatch[j].rm_so .. pmatch[j].rm_eo]; break; } } result ~= c; break; default: result ~= c; break; } } return result; } } unittest { // Created and placed in public domain by Don Clugston auto m = search("aBC r s", `bc\x20r[\40]s`, "i"); assert(m.pre=="a"); assert(m[0]=="BC r s"); auto m2 = search("7xxyxxx", `^\d([a-z]{2})\D\1`); assert(m2[0]=="7xxyxx"); // Just check the parsing. auto m3 = search("dcbxx", `ca|b[\d\]\D\s\S\w-\W]`); auto m4 = search("xy", `[^\ca-\xFa\r\n\b\f\t\v\0123]{2,485}$`); auto m5 = search("xxx", `^^\r\n\b{13,}\f{4}\t\v\u02aF3a\w\W`); auto m6 = search("xxy", `.*y`); assert(m6[0]=="xxy"); auto m7 = search("QWDEfGH", "(ca|b|defg)+", "i"); assert(m7[0]=="DEfG"); auto m8 = search("dcbxx", `a?\B\s\S`); auto m9 = search("dcbxx", `[-w]`); auto m10 = search("dcbsfd", `aB[c-fW]dB|\d|\D|\u012356|\w|\W|\s|\S`, "i"); auto m11 = search("dcbsfd", `[]a-]`); m.replaceOld(`a&b\1c`); m.replace(`a$&b$'$1c`); } // Andrei //------------------------------------------------------------------------------ struct Pattern(Char) { immutable(Char)[] pattern; this(immutable(Char)[] pattern) { this.pattern = pattern; } } Pattern!(Char) pattern(Char)(immutable(Char)[] pat) { return typeof(return)(pat); } struct Splitter(Range) { Range _input; size_t _chunkLength; RegExp _rx; private Range search() { //rx = undead.regexp.search(_input, "(" ~ _separator.pattern ~ ")"); auto i = undead.regexp.find(cast(string) _input, _rx); return _input[i >= 0 ? i : _input.length .. _input.length]; } private void advance() { //writeln("(" ~ _separator.pattern ~ ")"); //writeln(_input); //assert(_rx[0].length > 0); _chunkLength += _rx[0].length; } this(Range input, Pattern!(char) separator) { _input = input; _rx = RegExp(separator.pattern); _chunkLength = _input.length - search().length; } ref auto opSlice() { return this; } @property Range front() { return _input[0 .. _chunkLength]; } @property bool empty() { return _input.empty; } void popFront() { if (_chunkLength == _input.length) { _input = _input[_chunkLength .. _input.length]; return; } advance(); _input = _input[_chunkLength .. _input.length]; _chunkLength = _input.length - search().length; } } Splitter!(Range) splitter(Range)(Range r, Pattern!(char) pat) { static assert(is(Unqual!(typeof(Range.init[0])) == char), Unqual!(typeof(Range.init[0])).stringof); return typeof(return)(cast(string) r, pat); } unittest { auto s1 = ", abc, de, fg, hi, "; auto sp2 = splitter(s1, pattern(", *")); //foreach (e; sp2) writeln("[", e, "]"); assert(equal(sp2, ["", "abc", "de", "fg", "hi"][])); } unittest { auto str= "foo"; string[] re_strs= [ r"^(h|a|)fo[oas]$", r"^(a|b|)fo[oas]$", r"^(a|)foo$", r"(a|)foo", r"^(h|)foo$", r"(h|)foo", r"(h|a|)fo[oas]", r"^(a|b|)fo[o]$", r"[abf][ops](o|oo|)(h|a|)", r"(h|)[abf][ops](o|oo|)", r"(c|)[abf][ops](o|oo|)" ]; foreach (re_str; re_strs) { auto re= new RegExp(re_str); auto matches= cast(bool)re.test(str); assert(matches); //writefln("'%s' matches '%s' ? %s", str, re_str, matches); } for (char c='a'; c<='z'; ++c) { auto re_str= "("~c~"|)foo"; auto re= new RegExp(re_str); auto matches= cast(bool)re.test(str); assert(matches); //writefln("'%s' matches '%s' ? %s", str, re_str, matches); } } undeaD-1.0.10/src/undead/socketstream.d000066400000000000000000000077641346374113600177170ustar00rootroot00000000000000// Written in the D programming language /* Copyright (C) 2004 Christopher E. Miller This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ /************** * $(RED Deprecated: This module is considered out-dated and not up to Phobos' * current standards.) * * $(D SocketStream) is a stream for a blocking, * connected $(D Socket). * * Example: * See $(SAMPLESRC htmlget.d) * Authors: Christopher E. Miller * References: * $(LINK2 std_stream.html, std.stream) * Source: $(PHOBOSSRC std/_socketstream.d) * Macros: WIKI=Phobos/StdSocketstream */ module undead.socketstream; private import undead.stream; private import std.socket; /************** * $(D SocketStream) is a stream for a blocking, * connected $(D Socket). */ class SocketStream: Stream { private: Socket sock; public: /** * Constructs a SocketStream with the specified Socket and FileMode flags. */ this(Socket sock, FileMode mode) { if(mode & FileMode.In) readable = true; if(mode & FileMode.Out) writeable = true; this.sock = sock; } /** * Uses mode $(D FileMode.In | FileMode.Out). */ this(Socket sock) { writeable = readable = true; this.sock = sock; } /** * Property to get the $(D Socket) that is being streamed. */ Socket socket() { return sock; } /** * Attempts to read the entire block, waiting if necessary. */ override size_t readBlock(void* _buffer, size_t size) { ubyte* buffer = cast(ubyte*)_buffer; assertReadable(); if (size == 0) return size; auto len = sock.receive(buffer[0 .. size]); readEOF = cast(bool)(len == 0); if (len == sock.ERROR) len = 0; return len; } /** * Attempts to write the entire block, waiting if necessary. */ override size_t writeBlock(const void* _buffer, size_t size) { ubyte* buffer = cast(ubyte*)_buffer; assertWriteable(); if (size == 0) return size; auto len = sock.send(buffer[0 .. size]); readEOF = cast(bool)(len == 0); if (len == sock.ERROR) len = 0; return len; } /** * Socket streams do not support seeking. This disabled method throws * a $(D SeekException). */ override ulong seek(long offset, SeekPos whence) { throw new SeekException("Cannot seek a socket."); } /** * Does not return the entire stream because that would * require the remote connection to be closed. */ override string toString() { return sock.toString(); } /** * Close the $(D Socket). */ override void close() { sock.close(); super.close(); } } undeaD-1.0.10/src/undead/stream.d000066400000000000000000002614561346374113600165060ustar00rootroot00000000000000// Written in the D programming language /** * $(RED Deprecated: This module is considered out-dated and not up to Phobos' * current standards.) * * Source: $(PHOBOSSRC std/_stream.d) * Macros: * WIKI = Phobos/StdStream */ /* * Copyright (c) 2001-2005 * Pavel "EvilOne" Minayev * with buffering and endian support added by Ben Hinkle * with buffered readLine performance improvements by Dave Fladebo * with opApply inspired by (and mostly copied from) Regan Heath * with bug fixes and MemoryStream/SliceStream enhancements by Derick Eddington * * Permission to use, copy, modify, distribute and sell this software * and its documentation for any purpose is hereby granted without fee, * provided that the above copyright notice appear in all copies and * that both that copyright notice and this permission notice appear * in supporting documentation. Author makes no representations about * the suitability of this software for any purpose. It is provided * "as is" without express or implied warranty. */ module undead.stream; import std.internal.cstring; /* Class structure: * InputStream interface for reading * OutputStream interface for writing * Stream abstract base of stream implementations * File an OS file stream * FilterStream a base-class for wrappers around another stream * BufferedStream a buffered stream wrapping another stream * BufferedFile a buffered File * EndianStream a wrapper stream for swapping byte order and BOMs * SliceStream a portion of another stream * MemoryStream a stream entirely stored in main memory * TArrayStream a stream wrapping an array-like buffer */ /// A base class for stream exceptions. class StreamException: Exception { /// Construct a StreamException with given error message. this(string msg) { super(msg); } } /// Thrown when unable to read data from Stream. class ReadException: StreamException { /// Construct a ReadException with given error message. this(string msg) { super(msg); } } /// Thrown when unable to write data to Stream. class WriteException: StreamException { /// Construct a WriteException with given error message. this(string msg) { super(msg); } } /// Thrown when unable to move Stream pointer. class SeekException: StreamException { /// Construct a SeekException with given error message. this(string msg) { super(msg); } } // seek whence... enum SeekPos { Set, Current, End } private { import std.conv; import std.algorithm; import std.ascii; //import std.format; import std.system; // for Endian enumeration import std.utf; import undead.utf; import core.bitop; // for bswap import core.vararg; static import std.file; import undead.internal.file; import undead.doformat; } /// InputStream is the interface for readable streams. interface InputStream { /*** * Read exactly size bytes into the buffer. * * Throws a ReadException if it is not correct. */ void readExact(void* buffer, size_t size); /*** * Read a block of data big enough to fill the given array buffer. * * Returns: the actual number of bytes read. Unfilled bytes are not modified. */ size_t read(ubyte[] buffer); /*** * Read a basic type or counted string. * * Throw a ReadException if it could not be read. * Outside of byte, ubyte, and char, the format is * implementation-specific and should not be used except as opposite actions * to write. */ void read(out byte x); void read(out ubyte x); /// ditto void read(out short x); /// ditto void read(out ushort x); /// ditto void read(out int x); /// ditto void read(out uint x); /// ditto void read(out long x); /// ditto void read(out ulong x); /// ditto void read(out float x); /// ditto void read(out double x); /// ditto void read(out real x); /// ditto void read(out ifloat x); /// ditto void read(out idouble x); /// ditto void read(out ireal x); /// ditto void read(out cfloat x); /// ditto void read(out cdouble x); /// ditto void read(out creal x); /// ditto void read(out char x); /// ditto void read(out wchar x); /// ditto void read(out dchar x); /// ditto // reads a string, written earlier by write() void read(out char[] s); /// ditto // reads a Unicode string, written earlier by write() void read(out wchar[] s); /// ditto /*** * Read a line that is terminated with some combination of carriage return and * line feed or end-of-file. * * The terminators are not included. The wchar version * is identical. The optional buffer parameter is filled (reallocating * it if necessary) and a slice of the result is returned. */ char[] readLine(); char[] readLine(char[] result); /// ditto wchar[] readLineW(); /// ditto wchar[] readLineW(wchar[] result); /// ditto /*** * Overload foreach statements to read the stream line by line and call the * supplied delegate with each line or with each line with line number. * * The string passed in line may be reused between calls to the delegate. * Line numbering starts at 1. * Breaking out of the foreach will leave the stream * position at the beginning of the next line to be read. * For example, to echo a file line-by-line with line numbers run: * ------------------------------------ * Stream file = new BufferedFile("sample.txt"); * foreach(ulong n, char[] line; file) * { * writefln("line %d: %s", n, line); * } * file.close(); * ------------------------------------ */ // iterate through the stream line-by-line int opApply(scope int delegate(ref char[] line) dg); int opApply(scope int delegate(ref ulong n, ref char[] line) dg); /// ditto int opApply(scope int delegate(ref wchar[] line) dg); /// ditto int opApply(scope int delegate(ref ulong n, ref wchar[] line) dg); /// ditto /// Read a string of the given length, /// throwing ReadException if there was a problem. char[] readString(size_t length); /*** * Read a string of the given length, throwing ReadException if there was a * problem. * * The file format is implementation-specific and should not be used * except as opposite actions to write. */ wchar[] readStringW(size_t length); /*** * Read and return the next character in the stream. * * This is the only method that will handle ungetc properly. * getcw's format is implementation-specific. * If EOF is reached then getc returns char.init and getcw returns wchar.init. */ char getc(); wchar getcw(); /// ditto /*** * Push a character back onto the stream. * * They will be returned in first-in last-out order from getc/getcw. * Only has effect on further calls to getc() and getcw(). */ char ungetc(char c); wchar ungetcw(wchar c); /// ditto /*** * Scan a string from the input using a similar form to C's scanf * and std.format. * * An argument of type string is interpreted as a format string. * All other arguments must be pointer types. * If a format string is not present a default will be supplied computed from * the base type of the pointer type. An argument of type string* is filled * (possibly with appending characters) and a slice of the result is assigned * back into the argument. For example the following readf statements * are equivalent: * -------------------------- * int x; * double y; * string s; * file.readf(&x, " hello ", &y, &s); * file.readf("%d hello %f %s", &x, &y, &s); * file.readf("%d hello %f", &x, &y, "%s", &s); * -------------------------- */ int vreadf(TypeInfo[] arguments, va_list args); int readf(...); /// ditto /// Retrieve the number of bytes available for immediate reading. @property size_t available(); /*** * Return whether the current file position is the same as the end of the * file. * * This does not require actually reading past the end, as with stdio. For * non-seekable streams this might only return true after attempting to read * past the end. */ @property bool eof(); @property bool isOpen(); /// Return true if the stream is currently open. } /// Interface for writable streams. interface OutputStream { /*** * Write exactly size bytes from buffer, or throw a WriteException if that * could not be done. */ void writeExact(const void* buffer, size_t size); /*** * Write as much of the buffer as possible, * returning the number of bytes written. */ size_t write(const(ubyte)[] buffer); /*** * Write a basic type. * * Outside of byte, ubyte, and char, the format is implementation-specific * and should only be used in conjunction with read. * Throw WriteException on error. */ void write(byte x); void write(ubyte x); /// ditto void write(short x); /// ditto void write(ushort x); /// ditto void write(int x); /// ditto void write(uint x); /// ditto void write(long x); /// ditto void write(ulong x); /// ditto void write(float x); /// ditto void write(double x); /// ditto void write(real x); /// ditto void write(ifloat x); /// ditto void write(idouble x); /// ditto void write(ireal x); /// ditto void write(cfloat x); /// ditto void write(cdouble x); /// ditto void write(creal x); /// ditto void write(char x); /// ditto void write(wchar x); /// ditto void write(dchar x); /// ditto /*** * Writes a string, together with its length. * * The format is implementation-specific * and should only be used in conjunction with read. * Throw WriteException on error. */ void write(const(char)[] s); void write(const(wchar)[] s); /// ditto /*** * Write a line of text, * appending the line with an operating-system-specific line ending. * * Throws WriteException on error. */ void writeLine(const(char)[] s); /*** * Write a line of text, * appending the line with an operating-system-specific line ending. * * The format is implementation-specific. * Throws WriteException on error. */ void writeLineW(const(wchar)[] s); /*** * Write a string of text. * * Throws WriteException if it could not be fully written. */ void writeString(const(char)[] s); /*** * Write a string of text. * * The format is implementation-specific. * Throws WriteException if it could not be fully written. */ void writeStringW(const(wchar)[] s); /*** * Print a formatted string into the stream using printf-style syntax, * returning the number of bytes written. */ size_t vprintf(const(char)[] format, va_list args); size_t printf(const(char)[] format, ...); /// ditto /*** * Print a formatted string into the stream using writef-style syntax. * References: std.format. * Returns: self to chain with other stream commands like flush. */ OutputStream writef(...); OutputStream writefln(...); /// ditto OutputStream writefx(TypeInfo[] arguments, va_list argptr, int newline = false); /// ditto void flush(); /// Flush pending output if appropriate. void close(); /// Close the stream, flushing output if appropriate. @property bool isOpen(); /// Return true if the stream is currently open. } /*** * Stream is the base abstract class from which the other stream classes derive. * * Stream's byte order is the format native to the computer. * * Reading: * These methods require that the readable flag be set. * Problems with reading result in a ReadException being thrown. * Stream implements the InputStream interface in addition to the * readBlock method. * * Writing: * These methods require that the writeable flag be set. Problems with writing * result in a WriteException being thrown. Stream implements the OutputStream * interface in addition to the following methods: * writeBlock * copyFrom * copyFrom * * Seeking: * These methods require that the seekable flag be set. * Problems with seeking result in a SeekException being thrown. * seek, seekSet, seekCur, seekEnd, position, size, toString, toHash */ // not really abstract, but its instances will do nothing useful class Stream : InputStream, OutputStream { private import std.string, std.digest.crc, core.stdc.stdlib, core.stdc.stdio; // stream abilities bool readable = false; /// Indicates whether this stream can be read from. bool writeable = false; /// Indicates whether this stream can be written to. bool seekable = false; /// Indicates whether this stream can be sought within. protected bool isopen = true; /// Indicates whether this stream is open. protected bool readEOF = false; /** Indicates whether this stream is at eof * after the last read attempt. */ protected bool prevCr = false; /** For a non-seekable stream indicates that * the last readLine or readLineW ended on a * '\r' character. */ this() {} /*** * Read up to size bytes into the buffer and return the number of bytes * actually read. A return value of 0 indicates end-of-file. */ abstract size_t readBlock(void* buffer, size_t size); // reads block of data of specified size, // throws ReadException on error void readExact(void* buffer, size_t size) { for(;;) { if (!size) return; size_t readsize = readBlock(buffer, size); // return 0 on eof if (readsize == 0) break; buffer += readsize; size -= readsize; } if (size != 0) throw new ReadException("not enough data in stream"); } // reads block of data big enough to fill the given // array, returns actual number of bytes read size_t read(ubyte[] buffer) { return readBlock(buffer.ptr, buffer.length); } // read a single value of desired type, // throw ReadException on error void read(out byte x) { readExact(&x, x.sizeof); } void read(out ubyte x) { readExact(&x, x.sizeof); } void read(out short x) { readExact(&x, x.sizeof); } void read(out ushort x) { readExact(&x, x.sizeof); } void read(out int x) { readExact(&x, x.sizeof); } void read(out uint x) { readExact(&x, x.sizeof); } void read(out long x) { readExact(&x, x.sizeof); } void read(out ulong x) { readExact(&x, x.sizeof); } void read(out float x) { readExact(&x, x.sizeof); } void read(out double x) { readExact(&x, x.sizeof); } void read(out real x) { readExact(&x, x.sizeof); } void read(out ifloat x) { readExact(&x, x.sizeof); } void read(out idouble x) { readExact(&x, x.sizeof); } void read(out ireal x) { readExact(&x, x.sizeof); } void read(out cfloat x) { readExact(&x, x.sizeof); } void read(out cdouble x) { readExact(&x, x.sizeof); } void read(out creal x) { readExact(&x, x.sizeof); } void read(out char x) { readExact(&x, x.sizeof); } void read(out wchar x) { readExact(&x, x.sizeof); } void read(out dchar x) { readExact(&x, x.sizeof); } // reads a string, written earlier by write() void read(out char[] s) { size_t len; read(len); s = readString(len); } // reads a Unicode string, written earlier by write() void read(out wchar[] s) { size_t len; read(len); s = readStringW(len); } // reads a line, terminated by either CR, LF, CR/LF, or EOF char[] readLine() { return readLine(null); } // reads a line, terminated by either CR, LF, CR/LF, or EOF // reusing the memory in buffer if result will fit and otherwise // allocates a new string char[] readLine(char[] result) { size_t strlen = 0; char ch = getc(); while (readable) { switch (ch) { case '\r': if (seekable) { ch = getc(); if (ch != '\n') ungetc(ch); } else { prevCr = true; } goto case; case '\n': case char.init: result.length = strlen; return result; default: if (strlen < result.length) { result[strlen] = ch; } else { result ~= ch; } strlen++; } ch = getc(); } result.length = strlen; return result; } // reads a Unicode line, terminated by either CR, LF, CR/LF, // or EOF; pretty much the same as the above, working with // wchars rather than chars wchar[] readLineW() { return readLineW(null); } // reads a Unicode line, terminated by either CR, LF, CR/LF, // or EOF; // fills supplied buffer if line fits and otherwise allocates a new string. wchar[] readLineW(wchar[] result) { size_t strlen = 0; wchar c = getcw(); while (readable) { switch (c) { case '\r': if (seekable) { c = getcw(); if (c != '\n') ungetcw(c); } else { prevCr = true; } goto case; case '\n': case wchar.init: result.length = strlen; return result; default: if (strlen < result.length) { result[strlen] = c; } else { result ~= c; } strlen++; } c = getcw(); } result.length = strlen; return result; } // iterate through the stream line-by-line - due to Regan Heath int opApply(scope int delegate(ref char[] line) dg) { int res = 0; char[128] buf; while (!eof) { char[] line = readLine(buf); res = dg(line); if (res) break; } return res; } // iterate through the stream line-by-line with line count and string int opApply(scope int delegate(ref ulong n, ref char[] line) dg) { int res = 0; ulong n = 1; char[128] buf; while (!eof) { auto line = readLine(buf); res = dg(n,line); if (res) break; n++; } return res; } // iterate through the stream line-by-line with wchar[] int opApply(scope int delegate(ref wchar[] line) dg) { int res = 0; wchar[128] buf; while (!eof) { auto line = readLineW(buf); res = dg(line); if (res) break; } return res; } // iterate through the stream line-by-line with line count and wchar[] int opApply(scope int delegate(ref ulong n, ref wchar[] line) dg) { int res = 0; ulong n = 1; wchar[128] buf; while (!eof) { auto line = readLineW(buf); res = dg(n,line); if (res) break; n++; } return res; } // reads a string of given length, throws // ReadException on error char[] readString(size_t length) { char[] result = new char[length]; readExact(result.ptr, length); return result; } // reads a Unicode string of given length, throws // ReadException on error wchar[] readStringW(size_t length) { auto result = new wchar[length]; readExact(result.ptr, result.length * wchar.sizeof); return result; } // unget buffer private wchar[] unget; final bool ungetAvailable() { return unget.length > 1; } // reads and returns next character from the stream, // handles characters pushed back by ungetc() // returns char.init on eof. char getc() { char c; if (prevCr) { prevCr = false; c = getc(); if (c != '\n') return c; } if (unget.length > 1) { c = cast(char)unget[unget.length - 1]; unget.length = unget.length - 1; } else { readBlock(&c,1); } return c; } // reads and returns next Unicode character from the // stream, handles characters pushed back by ungetc() // returns wchar.init on eof. wchar getcw() { wchar c; if (prevCr) { prevCr = false; c = getcw(); if (c != '\n') return c; } if (unget.length > 1) { c = unget[unget.length - 1]; unget.length = unget.length - 1; } else { void* buf = &c; size_t n = readBlock(buf,2); if (n == 1 && readBlock(buf+1,1) == 0) throw new ReadException("not enough data in stream"); } return c; } // pushes back character c into the stream; only has // effect on further calls to getc() and getcw() char ungetc(char c) { if (c == c.init) return c; // first byte is a dummy so that we never set length to 0 if (unget.length == 0) unget.length = 1; unget ~= c; return c; } // pushes back Unicode character c into the stream; only // has effect on further calls to getc() and getcw() wchar ungetcw(wchar c) { if (c == c.init) return c; // first byte is a dummy so that we never set length to 0 if (unget.length == 0) unget.length = 1; unget ~= c; return c; } int vreadf(TypeInfo[] arguments, va_list args) { string fmt; int j = 0; int count = 0, i = 0; char c; bool firstCharacter = true; while ((j < arguments.length || i < fmt.length) && !eof) { if(firstCharacter) { c = getc(); firstCharacter = false; } if (fmt.length == 0 || i == fmt.length) { i = 0; if (arguments[j] is typeid(string) || arguments[j] is typeid(char[]) || arguments[j] is typeid(const(char)[])) { fmt = va_arg!(string)(args); j++; continue; } else if (arguments[j] is typeid(int*) || arguments[j] is typeid(byte*) || arguments[j] is typeid(short*) || arguments[j] is typeid(long*)) { fmt = "%d"; } else if (arguments[j] is typeid(uint*) || arguments[j] is typeid(ubyte*) || arguments[j] is typeid(ushort*) || arguments[j] is typeid(ulong*)) { fmt = "%d"; } else if (arguments[j] is typeid(float*) || arguments[j] is typeid(double*) || arguments[j] is typeid(real*)) { fmt = "%f"; } else if (arguments[j] is typeid(char[]*) || arguments[j] is typeid(wchar[]*) || arguments[j] is typeid(dchar[]*)) { fmt = "%s"; } else if (arguments[j] is typeid(char*)) { fmt = "%c"; } } if (fmt[i] == '%') { // a field i++; bool suppress = false; if (fmt[i] == '*') { // suppress assignment suppress = true; i++; } // read field width int width = 0; while (isDigit(fmt[i])) { width = width * 10 + (fmt[i] - '0'); i++; } if (width == 0) width = -1; // skip any modifier if present if (fmt[i] == 'h' || fmt[i] == 'l' || fmt[i] == 'L') i++; // check the typechar and act accordingly switch (fmt[i]) { case 'd': // decimal/hexadecimal/octal integer case 'D': case 'u': case 'U': case 'o': case 'O': case 'x': case 'X': case 'i': case 'I': { while (isWhite(c)) { c = getc(); count++; } bool neg = false; if (c == '-') { neg = true; c = getc(); count++; } else if (c == '+') { c = getc(); count++; } char ifmt = cast(char)(fmt[i] | 0x20); if (ifmt == 'i') { // undetermined base if (c == '0') { // octal or hex c = getc(); count++; if (c == 'x' || c == 'X') { // hex ifmt = 'x'; c = getc(); count++; } else { // octal ifmt = 'o'; } } else // decimal ifmt = 'd'; } long n = 0; switch (ifmt) { case 'd': // decimal case 'u': { while (isDigit(c) && width) { n = n * 10 + (c - '0'); width--; c = getc(); count++; } } break; case 'o': { // octal while (isOctalDigit(c) && width) { n = n * 8 + (c - '0'); width--; c = getc(); count++; } } break; case 'x': { // hexadecimal while (isHexDigit(c) && width) { n *= 0x10; if (isDigit(c)) n += c - '0'; else n += 0xA + (c | 0x20) - 'a'; width--; c = getc(); count++; } } break; default: assert(0); } if (neg) n = -n; if (arguments[j] is typeid(int*)) { int* p = va_arg!(int*)(args); *p = cast(int)n; } else if (arguments[j] is typeid(short*)) { short* p = va_arg!(short*)(args); *p = cast(short)n; } else if (arguments[j] is typeid(byte*)) { byte* p = va_arg!(byte*)(args); *p = cast(byte)n; } else if (arguments[j] is typeid(long*)) { long* p = va_arg!(long*)(args); *p = n; } else if (arguments[j] is typeid(uint*)) { uint* p = va_arg!(uint*)(args); *p = cast(uint)n; } else if (arguments[j] is typeid(ushort*)) { ushort* p = va_arg!(ushort*)(args); *p = cast(ushort)n; } else if (arguments[j] is typeid(ubyte*)) { ubyte* p = va_arg!(ubyte*)(args); *p = cast(ubyte)n; } else if (arguments[j] is typeid(ulong*)) { ulong* p = va_arg!(ulong*)(args); *p = cast(ulong)n; } j++; i++; } break; case 'f': // float case 'F': case 'e': case 'E': case 'g': case 'G': { while (isWhite(c)) { c = getc(); count++; } bool neg = false; if (c == '-') { neg = true; c = getc(); count++; } else if (c == '+') { c = getc(); count++; } real r = 0; while (isDigit(c) && width) { r = r * 10 + (c - '0'); width--; c = getc(); count++; } if (width && c == '.') { width--; c = getc(); count++; double frac = 1; while (isDigit(c) && width) { r = r * 10 + (c - '0'); frac *= 10; width--; c = getc(); count++; } r /= frac; } if (width && (c == 'e' || c == 'E')) { width--; c = getc(); count++; if (width) { bool expneg = false; if (c == '-') { expneg = true; width--; c = getc(); count++; } else if (c == '+') { width--; c = getc(); count++; } real exp = 0; while (isDigit(c) && width) { exp = exp * 10 + (c - '0'); width--; c = getc(); count++; } if (expneg) { while (exp--) r /= 10; } else { while (exp--) r *= 10; } } } if(width && (c == 'n' || c == 'N')) { width--; c = getc(); count++; if(width && (c == 'a' || c == 'A')) { width--; c = getc(); count++; if(width && (c == 'n' || c == 'N')) { width--; c = getc(); count++; r = real.nan; } } } if(width && (c == 'i' || c == 'I')) { width--; c = getc(); count++; if(width && (c == 'n' || c == 'N')) { width--; c = getc(); count++; if(width && (c == 'f' || c == 'F')) { width--; c = getc(); count++; r = real.infinity; } } } if (neg) r = -r; if (arguments[j] is typeid(float*)) { float* p = va_arg!(float*)(args); *p = r; } else if (arguments[j] is typeid(double*)) { double* p = va_arg!(double*)(args); *p = r; } else if (arguments[j] is typeid(real*)) { real* p = va_arg!(real*)(args); *p = r; } j++; i++; } break; case 's': { // string while (isWhite(c)) { c = getc(); count++; } char[] s; char[]* p; size_t strlen; if (arguments[j] is typeid(char[]*)) { p = va_arg!(char[]*)(args); s = *p; } while (!isWhite(c) && c != char.init) { if (strlen < s.length) { s[strlen] = c; } else { s ~= c; } strlen++; c = getc(); count++; } s = s[0 .. strlen]; if (arguments[j] is typeid(char[]*)) { *p = s; } else if (arguments[j] is typeid(char*)) { s ~= 0; auto q = va_arg!(char*)(args); q[0 .. s.length] = s[]; } else if (arguments[j] is typeid(wchar[]*)) { auto q = va_arg!(const(wchar)[]*)(args); *q = toUTF16(s); } else if (arguments[j] is typeid(dchar[]*)) { auto q = va_arg!(const(dchar)[]*)(args); *q = toUTF32(s); } j++; i++; } break; case 'c': { // character(s) char* s = va_arg!(char*)(args); if (width < 0) width = 1; else while (isWhite(c)) { c = getc(); count++; } while (width-- && !eof) { *(s++) = c; c = getc(); count++; } j++; i++; } break; case 'n': { // number of chars read so far int* p = va_arg!(int*)(args); *p = count; j++; i++; } break; default: // read character as is goto nws; } } else if (isWhite(fmt[i])) { // skip whitespace while (isWhite(c)) c = getc(); i++; } else { // read character as is nws: if (fmt[i] != c) break; c = getc(); i++; } } ungetc(c); return count; } int readf(...) { return vreadf(_arguments, _argptr); } // returns estimated number of bytes available for immediate reading @property size_t available() { return 0; } /*** * Write up to size bytes from buffer in the stream, returning the actual * number of bytes that were written. */ abstract size_t writeBlock(const void* buffer, size_t size); // writes block of data of specified size, // throws WriteException on error void writeExact(const void* buffer, size_t size) { const(void)* p = buffer; for(;;) { if (!size) return; size_t writesize = writeBlock(p, size); if (writesize == 0) break; p += writesize; size -= writesize; } if (size != 0) throw new WriteException("unable to write to stream"); } // writes the given array of bytes, returns // actual number of bytes written size_t write(const(ubyte)[] buffer) { return writeBlock(buffer.ptr, buffer.length); } // write a single value of desired type, // throw WriteException on error void write(byte x) { writeExact(&x, x.sizeof); } void write(ubyte x) { writeExact(&x, x.sizeof); } void write(short x) { writeExact(&x, x.sizeof); } void write(ushort x) { writeExact(&x, x.sizeof); } void write(int x) { writeExact(&x, x.sizeof); } void write(uint x) { writeExact(&x, x.sizeof); } void write(long x) { writeExact(&x, x.sizeof); } void write(ulong x) { writeExact(&x, x.sizeof); } void write(float x) { writeExact(&x, x.sizeof); } void write(double x) { writeExact(&x, x.sizeof); } void write(real x) { writeExact(&x, x.sizeof); } void write(ifloat x) { writeExact(&x, x.sizeof); } void write(idouble x) { writeExact(&x, x.sizeof); } void write(ireal x) { writeExact(&x, x.sizeof); } void write(cfloat x) { writeExact(&x, x.sizeof); } void write(cdouble x) { writeExact(&x, x.sizeof); } void write(creal x) { writeExact(&x, x.sizeof); } void write(char x) { writeExact(&x, x.sizeof); } void write(wchar x) { writeExact(&x, x.sizeof); } void write(dchar x) { writeExact(&x, x.sizeof); } // writes a string, together with its length void write(const(char)[] s) { write(s.length); writeString(s); } // writes a Unicode string, together with its length void write(const(wchar)[] s) { write(s.length); writeStringW(s); } // writes a line, throws WriteException on error void writeLine(const(char)[] s) { writeString(s); version (Windows) writeString("\r\n"); else version (Mac) writeString("\r"); else writeString("\n"); } // writes a Unicode line, throws WriteException on error void writeLineW(const(wchar)[] s) { writeStringW(s); version (Windows) writeStringW("\r\n"); else version (Mac) writeStringW("\r"); else writeStringW("\n"); } // writes a string, throws WriteException on error void writeString(const(char)[] s) { writeExact(s.ptr, s.length); } // writes a Unicode string, throws WriteException on error void writeStringW(const(wchar)[] s) { writeExact(s.ptr, s.length * wchar.sizeof); } // writes data to stream using vprintf() syntax, // returns number of bytes written size_t vprintf(const(char)[] format, va_list args) { // shamelessly stolen from OutBuffer, // by Walter's permission char[1024] buffer; char* p = buffer.ptr; // Can't use `tempCString()` here as it will result in compilation error: // "cannot mix core.std.stdlib.alloca() and exception handling". auto f = toStringz(format); size_t psize = buffer.length; size_t count; while (true) { version (Windows) { count = vsnprintf(p, psize, f, args); if (count != -1) break; psize *= 2; p = cast(char*) alloca(psize); } else version (Posix) { count = vsnprintf(p, psize, f, args); if (count == -1) psize *= 2; else if (count >= psize) psize = count + 1; else break; p = cast(char*) alloca(psize); } else throw new Exception("unsupported platform"); } writeString(p[0 .. count]); return count; } // writes data to stream using printf() syntax, // returns number of bytes written size_t printf(const(char)[] format, ...) { va_list ap; va_start(ap, format); auto result = vprintf(format, ap); va_end(ap); return result; } private void doFormatCallback(dchar c) { char[4] buf; auto b = undead.utf.toUTF8(buf, c); writeString(b); } // writes data to stream using writef() syntax, OutputStream writef(...) { return writefx(_arguments,_argptr,0); } // writes data with trailing newline OutputStream writefln(...) { return writefx(_arguments,_argptr,1); } // writes data with optional trailing newline OutputStream writefx(TypeInfo[] arguments, va_list argptr, int newline=false) { doFormat(&doFormatCallback,arguments,argptr); if (newline) writeLine(""); return this; } /*** * Copies all data from s into this stream. * This may throw ReadException or WriteException on failure. * This restores the file position of s so that it is unchanged. */ void copyFrom(Stream s) { if (seekable) { ulong pos = s.position; s.position = 0; copyFrom(s, s.size); s.position = pos; } else { ubyte[128] buf; while (!s.eof) { size_t m = s.readBlock(buf.ptr, buf.length); writeExact(buf.ptr, m); } } } /*** * Copy a specified number of bytes from the given stream into this one. * This may throw ReadException or WriteException on failure. * Unlike the previous form, this doesn't restore the file position of s. */ void copyFrom(Stream s, ulong count) { ubyte[128] buf; while (count > 0) { size_t n = cast(size_t)(count 1) unget.length = 1; // keep at least 1 so that data ptr stays } // close the stream somehow; the default just flushes the buffer void close() { if (isopen) flush(); readEOF = prevCr = isopen = readable = writeable = seekable = false; } /*** * Read the entire stream and return it as a string. * If the stream is not seekable the contents from the current position to eof * is read and returned. */ override string toString() { if (!readable) return super.toString(); try { size_t pos; size_t rdlen; size_t blockSize; char[] result; if (seekable) { ulong orig_pos = position; scope(exit) position = orig_pos; position = 0; blockSize = cast(size_t)size; result = new char[blockSize]; while (blockSize > 0) { rdlen = readBlock(&result[pos], blockSize); pos += rdlen; blockSize -= rdlen; } } else { blockSize = 4096; result = new char[blockSize]; while ((rdlen = readBlock(&result[pos], blockSize)) > 0) { pos += rdlen; blockSize += rdlen; result.length = result.length + blockSize; } } return cast(string) result[0 .. pos]; } catch (Throwable) { return super.toString(); } } /*** * Get a hash of the stream by reading each byte and using it in a CRC-32 * checksum. */ override size_t toHash() @trusted { if (!readable || !seekable) return super.toHash(); try { ulong pos = position; scope(exit) position = pos; CRC32 crc; crc.start(); position = 0; ulong len = size; for (ulong i = 0; i < len; i++) { ubyte c; read(c); crc.put(c); } union resUnion { size_t hash; ubyte[4] crcVal; } resUnion res; res.crcVal = crc.finish(); return res.hash; } catch (Throwable) { return super.toHash(); } } // helper for checking that the stream is readable final protected void assertReadable() { if (!readable) throw new ReadException("Stream is not readable"); } // helper for checking that the stream is writeable final protected void assertWriteable() { if (!writeable) throw new WriteException("Stream is not writeable"); } // helper for checking that the stream is seekable final protected void assertSeekable() { if (!seekable) throw new SeekException("Stream is not seekable"); } unittest { // unit test for Issue 3363 import std.stdio; immutable fileName = undead.internal.file.deleteme ~ "-issue3363.txt"; auto w = std.stdio.File(fileName, "w"); scope (exit) std.file.remove(fileName); w.write("one two three"); w.close(); auto r = std.stdio.File(fileName, "r"); const(char)[] constChar; string str; char[] chars; r.readf("%s %s %s", &constChar, &str, &chars); assert (constChar == "one", constChar); assert (str == "two", str); assert (chars == "three", chars); } unittest { //unit tests for Issue 1668 void tryFloatRoundtrip(float x, string fmt = "", string pad = "") { auto s = new MemoryStream(); s.writef(fmt, x, pad); s.position = 0; float f; assert(s.readf(&f)); assert(x == f || (x != x && f != f)); //either equal or both NaN } tryFloatRoundtrip(1.0); tryFloatRoundtrip(1.0, "%f"); tryFloatRoundtrip(1.0, "", " "); tryFloatRoundtrip(1.0, "%f", " "); tryFloatRoundtrip(3.14); tryFloatRoundtrip(3.14, "%f"); tryFloatRoundtrip(3.14, "", " "); tryFloatRoundtrip(3.14, "%f", " "); float nan = float.nan; tryFloatRoundtrip(nan); tryFloatRoundtrip(nan, "%f"); tryFloatRoundtrip(nan, "", " "); tryFloatRoundtrip(nan, "%f", " "); float inf = 1.0/0.0; tryFloatRoundtrip(inf); tryFloatRoundtrip(inf, "%f"); tryFloatRoundtrip(inf, "", " "); tryFloatRoundtrip(inf, "%f", " "); tryFloatRoundtrip(-inf); tryFloatRoundtrip(-inf,"%f"); tryFloatRoundtrip(-inf, "", " "); tryFloatRoundtrip(-inf, "%f", " "); } } /*** * A base class for streams that wrap a source stream with additional * functionality. * * The method implementations forward read/write/seek calls to the * source stream. A FilterStream can change the position of the source stream * arbitrarily and may not keep the source stream state in sync with the * FilterStream, even upon flushing and closing the FilterStream. It is * recommended to not make any assumptions about the state of the source position * and read/write state after a FilterStream has acted upon it. Specifc subclasses * of FilterStream should document how they modify the source stream and if any * invariants hold true between the source and filter. */ class FilterStream : Stream { private Stream s; // source stream /// Property indicating when this stream closes to close the source stream as /// well. /// Defaults to true. bool nestClose = true; /// Construct a FilterStream for the given source. this(Stream source) { s = source; resetSource(); } // source getter/setter /*** * Get the current source stream. */ final Stream source(){return s;} /*** * Set the current source stream. * * Setting the source stream closes this stream before attaching the new * source. Attaching an open stream reopens this stream and resets the stream * state. */ void source(Stream s) { close(); this.s = s; resetSource(); } /*** * Indicates the source stream changed state and that this stream should reset * any readable, writeable, seekable, isopen and buffering flags. */ void resetSource() { if (s !is null) { readable = s.readable; writeable = s.writeable; seekable = s.seekable; isopen = s.isOpen; } else { readable = writeable = seekable = false; isopen = false; } readEOF = prevCr = false; } // read from source override size_t readBlock(void* buffer, size_t size) { size_t res = s.readBlock(buffer,size); readEOF = res == 0; return res; } // write to source override size_t writeBlock(const void* buffer, size_t size) { return s.writeBlock(buffer,size); } // close stream override void close() { if (isopen) { super.close(); if (nestClose) s.close(); } } // seek on source override ulong seek(long offset, SeekPos whence) { readEOF = false; return s.seek(offset,whence); } override @property size_t available() { return s.available; } override void flush() { super.flush(); s.flush(); } } /*** * This subclass is for buffering a source stream. * * A buffered stream must be * closed explicitly to ensure the final buffer content is written to the source * stream. The source stream position is changed according to the block size so * reading or writing to the BufferedStream may not change the source stream * position by the same amount. */ class BufferedStream : FilterStream { ubyte[] buffer; // buffer, if any size_t bufferCurPos; // current position in buffer size_t bufferLen; // amount of data in buffer bool bufferDirty = false; size_t bufferSourcePos; // position in buffer of source stream position ulong streamPos; // absolute position in source stream /* Example of relationship between fields: * * s ...01234567890123456789012EOF * buffer |-- --| * bufferCurPos | * bufferLen |-- --| * bufferSourcePos | * */ invariant() { assert(bufferSourcePos <= bufferLen); assert(bufferCurPos <= bufferLen); assert(bufferLen <= buffer.length); } enum size_t DefaultBufferSize = 8192; /*** * Create a buffered stream for the stream source with the buffer size * bufferSize. */ this(Stream source, size_t bufferSize = DefaultBufferSize) { super(source); if (bufferSize) buffer = new ubyte[bufferSize]; } override protected void resetSource() { super.resetSource(); streamPos = 0; bufferLen = bufferSourcePos = bufferCurPos = 0; bufferDirty = false; } // reads block of data of specified size using any buffered data // returns actual number of bytes read override size_t readBlock(void* result, size_t len) { if (len == 0) return 0; assertReadable(); ubyte* outbuf = cast(ubyte*)result; size_t readsize = 0; if (bufferCurPos + len < bufferLen) { // buffer has all the data so copy it outbuf[0 .. len] = buffer[bufferCurPos .. bufferCurPos+len]; bufferCurPos += len; readsize = len; goto ExitRead; } readsize = bufferLen - bufferCurPos; if (readsize > 0) { // buffer has some data so copy what is left outbuf[0 .. readsize] = buffer[bufferCurPos .. bufferLen]; outbuf += readsize; bufferCurPos += readsize; len -= readsize; } flush(); if (len >= buffer.length) { // buffer can't hold the data so fill output buffer directly size_t siz = super.readBlock(outbuf, len); readsize += siz; streamPos += siz; } else { // read a new block into buffer bufferLen = super.readBlock(buffer.ptr, buffer.length); if (bufferLen < len) len = bufferLen; outbuf[0 .. len] = buffer[0 .. len]; bufferSourcePos = bufferLen; streamPos += bufferLen; bufferCurPos = len; readsize += len; } ExitRead: return readsize; } // write block of data of specified size // returns actual number of bytes written override size_t writeBlock(const void* result, size_t len) { assertWriteable(); ubyte* buf = cast(ubyte*)result; size_t writesize = 0; if (bufferLen == 0) { // buffer is empty so fill it if possible if ((len < buffer.length) && (readable)) { // read in data if the buffer is currently empty bufferLen = s.readBlock(buffer.ptr, buffer.length); bufferSourcePos = bufferLen; streamPos += bufferLen; } else if (len >= buffer.length) { // buffer can't hold the data so write it directly and exit writesize = s.writeBlock(buf,len); streamPos += writesize; goto ExitWrite; } } if (bufferCurPos + len <= buffer.length) { // buffer has space for all the data so copy it and exit buffer[bufferCurPos .. bufferCurPos+len] = buf[0 .. len]; bufferCurPos += len; bufferLen = bufferCurPos > bufferLen ? bufferCurPos : bufferLen; writesize = len; bufferDirty = true; goto ExitWrite; } writesize = buffer.length - bufferCurPos; if (writesize > 0) { // buffer can take some data buffer[bufferCurPos .. buffer.length] = buf[0 .. writesize]; bufferCurPos = bufferLen = buffer.length; buf += writesize; len -= writesize; bufferDirty = true; } assert(bufferCurPos == buffer.length); assert(bufferLen == buffer.length); flush(); writesize += writeBlock(buf,len); ExitWrite: return writesize; } override ulong seek(long offset, SeekPos whence) { assertSeekable(); if ((whence != SeekPos.Current) || (offset + bufferCurPos < 0) || (offset + bufferCurPos >= bufferLen)) { flush(); streamPos = s.seek(offset,whence); } else { bufferCurPos += offset; } readEOF = false; return streamPos-bufferSourcePos+bufferCurPos; } // Buffered readLine - Dave Fladebo // reads a line, terminated by either CR, LF, CR/LF, or EOF // reusing the memory in buffer if result will fit, otherwise // will reallocate (using concatenation) template TreadLine(T) { T[] readLine(T[] inBuffer) { size_t lineSize = 0; bool haveCR = false; T c = '\0'; size_t idx = 0; ubyte* pc = cast(ubyte*)&c; L0: for(;;) { size_t start = bufferCurPos; L1: foreach(ubyte b; buffer[start .. bufferLen]) { bufferCurPos++; pc[idx] = b; if(idx < T.sizeof - 1) { idx++; continue L1; } else { idx = 0; } if(c == '\n' || haveCR) { if(haveCR && c != '\n') bufferCurPos--; break L0; } else { if(c == '\r') { haveCR = true; } else { if(lineSize < inBuffer.length) { inBuffer[lineSize] = c; } else { inBuffer ~= c; } lineSize++; } } } flush(); size_t res = super.readBlock(buffer.ptr, buffer.length); if(!res) break L0; // EOF bufferSourcePos = bufferLen = res; streamPos += res; } return inBuffer[0 .. lineSize]; } } // template TreadLine(T) override char[] readLine(char[] inBuffer) { if (ungetAvailable()) return super.readLine(inBuffer); else return TreadLine!(char).readLine(inBuffer); } alias readLine = Stream.readLine; override wchar[] readLineW(wchar[] inBuffer) { if (ungetAvailable()) return super.readLineW(inBuffer); else return TreadLine!(wchar).readLine(inBuffer); } alias readLineW = Stream.readLineW; override void flush() out { assert(bufferCurPos == 0); assert(bufferSourcePos == 0); assert(bufferLen == 0); } body { if (writeable && bufferDirty) { if (bufferSourcePos != 0 && seekable) { // move actual file pointer to front of buffer streamPos = s.seek(-bufferSourcePos, SeekPos.Current); } // write buffer out bufferSourcePos = s.writeBlock(buffer.ptr, bufferLen); if (bufferSourcePos != bufferLen) { throw new WriteException("Unable to write to stream"); } } super.flush(); long diff = cast(long)bufferCurPos-bufferSourcePos; if (diff != 0 && seekable) { // move actual file pointer to current position streamPos = s.seek(diff, SeekPos.Current); } // reset buffer data to be empty bufferSourcePos = bufferCurPos = bufferLen = 0; bufferDirty = false; } // returns true if end of stream is reached, false otherwise override @property bool eof() { if ((buffer.length == 0) || !readable) { return super.eof; } // some simple tests to avoid flushing if (ungetAvailable() || bufferCurPos != bufferLen) return false; if (bufferLen == buffer.length) flush(); size_t res = super.readBlock(&buffer[bufferLen],buffer.length-bufferLen); bufferSourcePos += res; bufferLen += res; streamPos += res; return readEOF; } // returns size of stream override @property ulong size() { if (bufferDirty) flush(); return s.size; } // returns estimated number of bytes available for immediate reading override @property size_t available() { return bufferLen - bufferCurPos; } } /// An exception for File errors. class StreamFileException: StreamException { /// Construct a StreamFileException with given error message. this(string msg) { super(msg); } } /// An exception for errors during File.open. class OpenException: StreamFileException { /// Construct an OpenFileException with given error message. this(string msg) { super(msg); } } /// Specifies the $(LREF File) access mode used when opening the file. enum FileMode { In = 1, /// Opens the file for reading. Out = 2, /// Opens the file for writing. OutNew = 6, /// Opens the file for writing, creates a new file if it doesn't exist. Append = 10 /// Opens the file for writing, appending new data to the end of the file. } version (Windows) { private import core.sys.windows.windows; extern (Windows) { void FlushFileBuffers(HANDLE hFile); DWORD GetFileType(HANDLE hFile); } } version (Posix) { private import core.sys.posix.fcntl; private import core.sys.posix.unistd; alias HANDLE = int; } /// This subclass is for unbuffered file system streams. class File: Stream { version (Windows) { private HANDLE hFile; } version (Posix) { private HANDLE hFile = -1; } this() { super(); version (Windows) { hFile = null; } version (Posix) { hFile = -1; } isopen = false; } // opens existing handle; use with care! this(HANDLE hFile, FileMode mode) { super(); this.hFile = hFile; readable = cast(bool)(mode & FileMode.In); writeable = cast(bool)(mode & FileMode.Out); version(Windows) { seekable = GetFileType(hFile) == 1; // FILE_TYPE_DISK } else { auto result = lseek(hFile, 0, 0); seekable = (result != ~0); } } /*** * Create the stream with no open file, an open file in read mode, or an open * file with explicit file mode. * mode, if given, is a combination of FileMode.In * (indicating a file that can be read) and FileMode.Out (indicating a file * that can be written). * Opening a file for reading that doesn't exist will error. * Opening a file for writing that doesn't exist will create the file. * The FileMode.OutNew mode will open the file for writing and reset the * length to zero. * The FileMode.Append mode will open the file for writing and move the * file position to the end of the file. */ this(string filename, FileMode mode = FileMode.In) { this(); open(filename, mode); } /*** * Open a file for the stream, in an identical manner to the constructors. * If an error occurs an OpenException is thrown. */ void open(string filename, FileMode mode = FileMode.In) { close(); int access, share, createMode; parseMode(mode, access, share, createMode); seekable = true; readable = cast(bool)(mode & FileMode.In); writeable = cast(bool)(mode & FileMode.Out); version (Windows) { hFile = CreateFileW(filename.tempCString!wchar(), access, share, null, createMode, 0, null); isopen = hFile != INVALID_HANDLE_VALUE; } version (Posix) { hFile = core.sys.posix.fcntl.open(filename.tempCString(), access | createMode, share); isopen = hFile != -1; } if (!isopen) throw new OpenException(cast(string) ("Cannot open or create file '" ~ filename ~ "'")); else if ((mode & FileMode.Append) == FileMode.Append) seekEnd(0); } private void parseMode(int mode, out int access, out int share, out int createMode) { version (Windows) { share |= FILE_SHARE_READ | FILE_SHARE_WRITE; if (mode & FileMode.In) { access |= GENERIC_READ; createMode = OPEN_EXISTING; } if (mode & FileMode.Out) { access |= GENERIC_WRITE; createMode = OPEN_ALWAYS; // will create if not present } if ((mode & FileMode.OutNew) == FileMode.OutNew) { createMode = CREATE_ALWAYS; // resets file } } version (Posix) { share = octal!666; if (mode & FileMode.In) { access = O_RDONLY; } if (mode & FileMode.Out) { createMode = O_CREAT; // will create if not present access = O_WRONLY; } if (access == (O_WRONLY | O_RDONLY)) { access = O_RDWR; } if ((mode & FileMode.OutNew) == FileMode.OutNew) { access |= O_TRUNC; // resets file } } } /// Create a file for writing. void create(string filename) { create(filename, FileMode.OutNew); } /// ditto void create(string filename, FileMode mode) { close(); open(filename, mode | FileMode.OutNew); } /// Close the current file if it is open; otherwise it does nothing. override void close() { if (isopen) { super.close(); if (hFile) { version (Windows) { CloseHandle(hFile); hFile = null; } else version (Posix) { core.sys.posix.unistd.close(hFile); hFile = -1; } } } } // destructor, closes file if still opened ~this() { close(); } version (Windows) { // returns size of stream override @property ulong size() { assertSeekable(); uint sizehi; uint sizelow = GetFileSize(hFile,&sizehi); return (cast(ulong)sizehi << 32) + sizelow; } } override size_t readBlock(void* buffer, size_t size) { assertReadable(); version (Windows) { auto dwSize = to!DWORD(size); ReadFile(hFile, buffer, dwSize, &dwSize, null); size = dwSize; } else version (Posix) { size = core.sys.posix.unistd.read(hFile, buffer, size); if (size == -1) size = 0; } readEOF = (size == 0); return size; } override size_t writeBlock(const void* buffer, size_t size) { assertWriteable(); version (Windows) { auto dwSize = to!DWORD(size); WriteFile(hFile, buffer, dwSize, &dwSize, null); size = dwSize; } else version (Posix) { size = core.sys.posix.unistd.write(hFile, buffer, size); if (size == -1) size = 0; } return size; } override ulong seek(long offset, SeekPos rel) { assertSeekable(); version (Windows) { int hi = cast(int)(offset>>32); uint low = SetFilePointer(hFile, cast(int)offset, &hi, rel); if ((low == INVALID_SET_FILE_POINTER) && (GetLastError() != 0)) throw new SeekException("unable to move file pointer"); ulong result = (cast(ulong)hi << 32) + low; } else version (Posix) { auto result = lseek(hFile, cast(off_t)offset, rel); if (result == cast(typeof(result))-1) throw new SeekException("unable to move file pointer"); } readEOF = false; return cast(ulong)result; } /*** * For a seekable file returns the difference of the size and position and * otherwise returns 0. */ override @property size_t available() { if (seekable) { ulong lavail = size - position; if (lavail > size_t.max) lavail = size_t.max; return cast(size_t)lavail; } return 0; } // OS-specific property, just in case somebody wants // to mess with underlying API HANDLE handle() { return hFile; } // run a few tests unittest { File file = new File; int i = 666; auto stream_file = undead.internal.file.deleteme ~ "-stream.$$$"; file.create(stream_file); // should be ok to write assert(file.writeable); file.writeLine("Testing stream.d:"); file.writeString("Hello, world!"); file.write(i); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); // we must be at the end of file assert(file.eof); file.close(); // no operations are allowed when file is closed assert(!file.readable && !file.writeable && !file.seekable); file.open(stream_file); // should be ok to read assert(file.readable); assert(file.available == file.size); char[] line = file.readLine(); char[] exp = "Testing stream.d:".dup; assert(line[0] == 'T'); assert(line.length == exp.length); assert(!std.algorithm.cmp(line, "Testing stream.d:")); // jump over "Hello, " file.seek(7, SeekPos.Current); version (Windows) assert(file.position == 19 + 7); version (Posix) assert(file.position == 18 + 7); assert(!std.algorithm.cmp(file.readString(6), "world!")); i = 0; file.read(i); assert(i == 666); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); // we must be at the end of file assert(file.eof); file.close(); file.open(stream_file,FileMode.OutNew | FileMode.In); file.writeLine("Testing stream.d:"); file.writeLine("Another line"); file.writeLine(""); file.writeLine("That was blank"); file.position = 0; char[][] lines; foreach(char[] line; file) { lines ~= line.dup; } assert( lines.length == 4 ); assert( lines[0] == "Testing stream.d:"); assert( lines[1] == "Another line"); assert( lines[2] == ""); assert( lines[3] == "That was blank"); file.position = 0; lines = new char[][4]; foreach(ulong n, char[] line; file) { lines[cast(size_t)(n-1)] = line.dup; } assert( lines[0] == "Testing stream.d:"); assert( lines[1] == "Another line"); assert( lines[2] == ""); assert( lines[3] == "That was blank"); file.close(); std.file.remove(stream_file); } } /*** * This subclass is for buffered file system streams. * * It is a convenience class for wrapping a File in a BufferedStream. * A buffered stream must be closed explicitly to ensure the final buffer * content is written to the file. */ class BufferedFile: BufferedStream { /// opens file for reading this() { super(new File()); } /// opens file in requested mode and buffer size this(string filename, FileMode mode = FileMode.In, size_t bufferSize = DefaultBufferSize) { super(new File(filename,mode),bufferSize); } /// opens file for reading with requested buffer size this(File file, size_t bufferSize = DefaultBufferSize) { super(file,bufferSize); } /// opens existing handle; use with care! this(HANDLE hFile, FileMode mode, size_t buffersize = DefaultBufferSize) { super(new File(hFile,mode),buffersize); } /// opens file in requested mode void open(string filename, FileMode mode = FileMode.In) { File sf = cast(File)s; sf.open(filename,mode); resetSource(); } /// creates file in requested mode void create(string filename, FileMode mode = FileMode.OutNew) { File sf = cast(File)s; sf.create(filename,mode); resetSource(); } // run a few tests same as File unittest { BufferedFile file = new BufferedFile; int i = 666; auto stream_file = undead.internal.file.deleteme ~ "-stream.$$$"; file.create(stream_file); // should be ok to write assert(file.writeable); file.writeLine("Testing stream.d:"); file.writeString("Hello, world!"); file.write(i); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); // we must be at the end of file assert(file.eof); long oldsize = cast(long)file.size; file.close(); // no operations are allowed when file is closed assert(!file.readable && !file.writeable && !file.seekable); file.open(stream_file); // should be ok to read assert(file.readable); // test getc/ungetc and size char c1 = file.getc(); file.ungetc(c1); assert( file.size == oldsize ); assert(!std.algorithm.cmp(file.readLine(), "Testing stream.d:")); // jump over "Hello, " file.seek(7, SeekPos.Current); version (Windows) assert(file.position == 19 + 7); version (Posix) assert(file.position == 18 + 7); assert(!std.algorithm.cmp(file.readString(6), "world!")); i = 0; file.read(i); assert(i == 666); // string#1 + string#2 + int should give exacly that version (Windows) assert(file.position == 19 + 13 + 4); version (Posix) assert(file.position == 18 + 13 + 4); // we must be at the end of file assert(file.eof); file.close(); std.file.remove(stream_file); } } /// UTF byte-order-mark signatures enum BOM { UTF8, /// UTF-8 UTF16LE, /// UTF-16 Little Endian UTF16BE, /// UTF-16 Big Endian UTF32LE, /// UTF-32 Little Endian UTF32BE, /// UTF-32 Big Endian } private enum int NBOMS = 5; immutable Endian[NBOMS] BOMEndian = [ std.system.endian, Endian.littleEndian, Endian.bigEndian, Endian.littleEndian, Endian.bigEndian ]; immutable ubyte[][NBOMS] ByteOrderMarks = [ [0xEF, 0xBB, 0xBF], [0xFF, 0xFE], [0xFE, 0xFF], [0xFF, 0xFE, 0x00, 0x00], [0x00, 0x00, 0xFE, 0xFF] ]; /*** * This subclass wraps a stream with big-endian or little-endian byte order * swapping. * * UTF Byte-Order-Mark (BOM) signatures can be read and deduced or * written. * Note that an EndianStream should not be used as the source of another * FilterStream since a FilterStream call the source with byte-oriented * read/write requests and the EndianStream will not perform any byte swapping. * The EndianStream reads and writes binary data (non-getc functions) in a * one-to-one * manner with the source stream so the source stream's position and state will be * kept in sync with the EndianStream if only non-getc functions are called. */ class EndianStream : FilterStream { Endian endian; /// Endianness property of the source stream. /*** * Create the endian stream for the source stream source with endianness end. * The default endianness is the native byte order. * The Endian type is defined * in the std.system module. */ this(Stream source, Endian end = std.system.endian) { super(source); endian = end; } /*** * Return -1 if no BOM and otherwise read the BOM and return it. * * If there is no BOM or if bytes beyond the BOM are read then the bytes read * are pushed back onto the ungetc buffer or ungetcw buffer. * Pass ungetCharSize == 2 to use * ungetcw instead of ungetc when no BOM is present. */ int readBOM(int ungetCharSize = 1) { ubyte[4] BOM_buffer; int n = 0; // the number of read bytes int result = -1; // the last match or -1 for (int i=0; i < NBOMS; ++i) { int j; immutable ubyte[] bom = ByteOrderMarks[i]; for (j=0; j < bom.length; ++j) { if (n <= j) { // have to read more if (eof) break; readExact(&BOM_buffer[n++],1); } if (BOM_buffer[j] != bom[j]) break; } if (j == bom.length) // found a match result = i; } ptrdiff_t m = 0; if (result != -1) { endian = BOMEndian[result]; // set stream endianness m = ByteOrderMarks[result].length; } if ((ungetCharSize == 1 && result == -1) || (result == BOM.UTF8)) { while (n-- > m) ungetc(BOM_buffer[n]); } else { // should eventually support unget for dchar as well if (n & 1) // make sure we have an even number of bytes readExact(&BOM_buffer[n++],1); while (n > m) { n -= 2; wchar cw = *(cast(wchar*)&BOM_buffer[n]); fixBO(&cw,2); ungetcw(cw); } } return result; } /*** * Correct the byte order of buffer to match native endianness. * size must be even. */ final void fixBO(const(void)* buffer, size_t size) { if (endian != std.system.endian) { ubyte* startb = cast(ubyte*)buffer; uint* start = cast(uint*)buffer; switch (size) { case 0: break; case 2: { ubyte x = *startb; *startb = *(startb+1); *(startb+1) = x; break; } case 4: { *start = bswap(*start); break; } default: { uint* end = cast(uint*)(buffer + size - uint.sizeof); while (start < end) { uint x = bswap(*start); *start = bswap(*end); *end = x; ++start; --end; } startb = cast(ubyte*)start; ubyte* endb = cast(ubyte*)end; auto len = uint.sizeof - (startb - endb); if (len > 0) fixBO(startb,len); } } } } /*** * Correct the byte order of the given buffer in blocks of the given size and * repeated the given number of times. * size must be even. */ final void fixBlockBO(void* buffer, uint size, size_t repeat) { while (repeat--) { fixBO(buffer,size); buffer += size; } } override void read(out byte x) { readExact(&x, x.sizeof); } override void read(out ubyte x) { readExact(&x, x.sizeof); } override void read(out short x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out ushort x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out int x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out uint x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out long x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out ulong x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out float x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out double x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out real x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out ifloat x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out idouble x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out ireal x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out cfloat x) { readExact(&x, x.sizeof); fixBlockBO(&x,float.sizeof,2); } override void read(out cdouble x) { readExact(&x, x.sizeof); fixBlockBO(&x,double.sizeof,2); } override void read(out creal x) { readExact(&x, x.sizeof); fixBlockBO(&x,real.sizeof,2); } override void read(out char x) { readExact(&x, x.sizeof); } override void read(out wchar x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override void read(out dchar x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); } override wchar getcw() { wchar c; if (prevCr) { prevCr = false; c = getcw(); if (c != '\n') return c; } if (unget.length > 1) { c = unget[unget.length - 1]; unget.length = unget.length - 1; } else { void* buf = &c; size_t n = readBlock(buf,2); if (n == 1 && readBlock(buf+1,1) == 0) throw new ReadException("not enough data in stream"); fixBO(&c,c.sizeof); } return c; } override wchar[] readStringW(size_t length) { wchar[] result = new wchar[length]; readExact(result.ptr, length * wchar.sizeof); fixBlockBO(result.ptr, wchar.sizeof, length); return result; } /// Write the specified BOM b to the source stream. void writeBOM(BOM b) { immutable ubyte[] bom = ByteOrderMarks[b]; writeBlock(bom.ptr, bom.length); } override void write(byte x) { writeExact(&x, x.sizeof); } override void write(ubyte x) { writeExact(&x, x.sizeof); } override void write(short x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(ushort x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(int x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(uint x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(long x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(ulong x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(float x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(double x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(real x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(ifloat x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(idouble x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(ireal x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(cfloat x) { fixBlockBO(&x,float.sizeof,2); writeExact(&x, x.sizeof); } override void write(cdouble x) { fixBlockBO(&x,double.sizeof,2); writeExact(&x, x.sizeof); } override void write(creal x) { fixBlockBO(&x,real.sizeof,2); writeExact(&x, x.sizeof); } override void write(char x) { writeExact(&x, x.sizeof); } override void write(wchar x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void write(dchar x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); } override void writeStringW(const(wchar)[] str) { foreach(wchar cw;str) { fixBO(&cw,2); s.writeExact(&cw, 2); } } override @property bool eof() { return s.eof && !ungetAvailable(); } override @property ulong size() { return s.size; } unittest { MemoryStream m; m = new MemoryStream (); EndianStream em = new EndianStream(m,Endian.bigEndian); uint x = 0x11223344; em.write(x); assert( m.data[0] == 0x11 ); assert( m.data[1] == 0x22 ); assert( m.data[2] == 0x33 ); assert( m.data[3] == 0x44 ); em.position = 0; ushort x2 = 0x5566; em.write(x2); assert( m.data[0] == 0x55 ); assert( m.data[1] == 0x66 ); em.position = 0; static ubyte[12] x3 = [1,2,3,4,5,6,7,8,9,10,11,12]; em.fixBO(x3.ptr,12); if (std.system.endian == Endian.littleEndian) { assert( x3[0] == 12 ); assert( x3[1] == 11 ); assert( x3[2] == 10 ); assert( x3[4] == 8 ); assert( x3[5] == 7 ); assert( x3[6] == 6 ); assert( x3[8] == 4 ); assert( x3[9] == 3 ); assert( x3[10] == 2 ); assert( x3[11] == 1 ); } em.endian = Endian.littleEndian; em.write(x); assert( m.data[0] == 0x44 ); assert( m.data[1] == 0x33 ); assert( m.data[2] == 0x22 ); assert( m.data[3] == 0x11 ); em.position = 0; em.write(x2); assert( m.data[0] == 0x66 ); assert( m.data[1] == 0x55 ); em.position = 0; em.fixBO(x3.ptr,12); if (std.system.endian == Endian.bigEndian) { assert( x3[0] == 12 ); assert( x3[1] == 11 ); assert( x3[2] == 10 ); assert( x3[4] == 8 ); assert( x3[5] == 7 ); assert( x3[6] == 6 ); assert( x3[8] == 4 ); assert( x3[9] == 3 ); assert( x3[10] == 2 ); assert( x3[11] == 1 ); } em.writeBOM(BOM.UTF8); assert( m.position == 3 ); assert( m.data[0] == 0xEF ); assert( m.data[1] == 0xBB ); assert( m.data[2] == 0xBF ); em.writeString ("Hello, world"); em.position = 0; assert( m.position == 0 ); assert( em.readBOM() == BOM.UTF8 ); assert( m.position == 3 ); assert( em.getc() == 'H' ); em.position = 0; em.writeBOM(BOM.UTF16BE); assert( m.data[0] == 0xFE ); assert( m.data[1] == 0xFF ); em.position = 0; em.writeBOM(BOM.UTF16LE); assert( m.data[0] == 0xFF ); assert( m.data[1] == 0xFE ); em.position = 0; em.writeString ("Hello, world"); em.position = 0; assert( em.readBOM() == -1 ); assert( em.getc() == 'H' ); assert( em.getc() == 'e' ); assert( em.getc() == 'l' ); assert( em.getc() == 'l' ); em.position = 0; } } /*** * Parameterized subclass that wraps an array-like buffer with a stream * interface. * * The type Buffer must support the length property, opIndex and opSlice. * Compile in release mode when directly instantiating a TArrayStream to avoid * link errors. */ class TArrayStream(Buffer): Stream { Buffer buf; // current data ulong len; // current data length ulong cur; // current file position /// Create the stream for the the buffer buf. Non-copying. this(Buffer buf) { super (); this.buf = buf; this.len = buf.length; readable = writeable = seekable = true; } // ensure subclasses don't violate this invariant() { assert(len <= buf.length); assert(cur <= len); } override size_t readBlock(void* buffer, size_t size) { assertReadable(); ubyte* cbuf = cast(ubyte*) buffer; if (len - cur < size) size = cast(size_t)(len - cur); ubyte[] ubuf = cast(ubyte[])buf[cast(size_t)cur .. cast(size_t)(cur + size)]; cbuf[0 .. size] = ubuf[]; cur += size; return size; } override size_t writeBlock(const void* buffer, size_t size) { assertWriteable(); ubyte* cbuf = cast(ubyte*) buffer; ulong blen = buf.length; if (cur + size > blen) size = cast(size_t)(blen - cur); ubyte[] ubuf = cast(ubyte[])buf[cast(size_t)cur .. cast(size_t)(cur + size)]; ubuf[] = cbuf[0 .. size]; cur += size; if (cur > len) len = cur; return size; } override ulong seek(long offset, SeekPos rel) { assertSeekable(); long scur; // signed to saturate to 0 properly switch (rel) { case SeekPos.Set: scur = offset; break; case SeekPos.Current: scur = cast(long)(cur + offset); break; case SeekPos.End: scur = cast(long)(len + offset); break; default: assert(0); } if (scur < 0) cur = 0; else if (scur > len) cur = len; else cur = cast(ulong)scur; return cur; } override @property size_t available () { return cast(size_t)(len - cur); } /// Get the current memory data in total. @property ubyte[] data() { if (len > size_t.max) throw new StreamException("Stream too big"); const(void)[] res = buf[0 .. cast(size_t)len]; return cast(ubyte[])res; } override string toString() { // assume data is UTF8 return to!(string)(cast(char[])data); } } /* Test the TArrayStream */ unittest { char[100] buf; TArrayStream!(char[]) m; m = new TArrayStream!(char[]) (buf); assert (m.isOpen); m.writeString ("Hello, world"); assert (m.position == 12); assert (m.available == 88); assert (m.seekSet (0) == 0); assert (m.available == 100); assert (m.seekCur (4) == 4); assert (m.available == 96); assert (m.seekEnd (-8) == 92); assert (m.available == 8); assert (m.size == 100); assert (m.seekSet (4) == 4); assert (m.readString (4) == "o, w"); m.writeString ("ie"); assert (buf[0..12] == "Hello, wield"); assert (m.position == 10); assert (m.available == 90); assert (m.size == 100); m.seekSet (0); assert (m.printf ("Answer is %d", 42) == 12); assert (buf[0..12] == "Answer is 42"); } /// This subclass reads and constructs an array of bytes in memory. class MemoryStream: TArrayStream!(ubyte[]) { /// Create the output buffer and setup for reading, writing, and seeking. // clear to an empty buffer. this() { this(cast(ubyte[]) null); } /*** * Create the output buffer and setup for reading, writing, and seeking. * Load it with specific input data. */ this(ubyte[] buf) { super (buf); } this(byte[] buf) { this(cast(ubyte[]) buf); } /// ditto this(char[] buf) { this(cast(ubyte[]) buf); } /// ditto /// Ensure the stream can write count extra bytes from cursor position without an allocation. void reserve(size_t count) { if (cur + count > buf.length) buf.length = cast(uint)((cur + count) * 2); } override size_t writeBlock(const void* buffer, size_t size) { reserve(size); return super.writeBlock(buffer,size); } unittest { MemoryStream m; m = new MemoryStream (); assert (m.isOpen); m.writeString ("Hello, world"); assert (m.position == 12); assert (m.seekSet (0) == 0); assert (m.available == 12); assert (m.seekCur (4) == 4); assert (m.available == 8); assert (m.seekEnd (-8) == 4); assert (m.available == 8); assert (m.size == 12); assert (m.readString (4) == "o, w"); m.writeString ("ie"); assert (cast(char[]) m.data == "Hello, wield"); m.seekEnd (0); m.writeString ("Foo"); assert (m.position == 15); assert (m.available == 0); m.writeString ("Foo foo foo foo foo foo foo"); assert (m.position == 42); m.position = 0; assert (m.available == 42); m.writef("%d %d %s",100,345,"hello"); auto str = m.toString(); assert (str[0..13] == "100 345 hello", str[0 .. 13]); assert (m.available == 29); assert (m.position == 13); MemoryStream m2; m.position = 3; m2 = new MemoryStream (); m2.writeString("before"); m2.copyFrom(m,10); str = m2.toString(); assert (str[0..16] == "before 345 hello"); m2.position = 3; m2.copyFrom(m); auto str2 = m.toString(); str = m2.toString(); assert (str == ("bef" ~ str2)); } } import std.mmfile; /*** * This subclass wraps a memory-mapped file with the stream API. * See std.mmfile module. */ class MmFileStream : TArrayStream!(MmFile) { /// Create stream wrapper for file. this(MmFile file) { super (file); MmFile.Mode mode = file.mode(); writeable = mode > MmFile.Mode.read; } override void flush() { if (isopen) { super.flush(); buf.flush(); } } override void close() { if (isopen) { super.close(); buf.destroy(); buf = null; } } } unittest { auto test_file = undead.internal.file.deleteme ~ "-testing.txt"; MmFile mf = new MmFile(test_file,MmFile.Mode.readWriteNew,100,null); MmFileStream m; m = new MmFileStream (mf); m.writeString ("Hello, world"); assert (m.position == 12); assert (m.seekSet (0) == 0); assert (m.seekCur (4) == 4); assert (m.seekEnd (-8) == 92); assert (m.size == 100); assert (m.seekSet (4)); assert (m.readString (4) == "o, w"); m.writeString ("ie"); ubyte[] dd = m.data; assert ((cast(char[]) dd)[0 .. 12] == "Hello, wield"); m.position = 12; m.writeString ("Foo"); assert (m.position == 15); m.writeString ("Foo foo foo foo foo foo foo"); assert (m.position == 42); m.close(); mf = new MmFile(test_file); m = new MmFileStream (mf); assert (!m.writeable); char[] str = m.readString(12); assert (str == "Hello, wield"); m.close(); std.file.remove(test_file); } /*** * This subclass slices off a portion of another stream, making seeking relative * to the boundaries of the slice. * * It could be used to section a large file into a * set of smaller files, such as with tar archives. Reading and writing a * SliceStream does not modify the position of the source stream if it is * seekable. */ class SliceStream : FilterStream { private { ulong pos; // our position relative to low ulong low; // low stream offset. ulong high; // high stream offset. bool bounded; // upper-bounded by high. } /*** * Indicate both the source stream to use for reading from and the low part of * the slice. * * The high part of the slice is dependent upon the end of the source * stream, so that if you write beyond the end it resizes the stream normally. */ this (Stream s, ulong low) in { assert (low <= s.size); } body { super(s); this.low = low; this.high = 0; this.bounded = false; } /*** * Indicate the high index as well. * * Attempting to read or write past the high * index results in the end being clipped off. */ this (Stream s, ulong low, ulong high) in { assert (low <= high); assert (high <= s.size); } body { super(s); this.low = low; this.high = high; this.bounded = true; } invariant() { if (bounded) assert (pos <= high - low); else // size() does not appear to be const, though it should be assert (pos <= (cast()s).size - low); } override size_t readBlock (void *buffer, size_t size) { assertReadable(); if (bounded && size > high - low - pos) size = cast(size_t)(high - low - pos); ulong bp = s.position; if (seekable) s.position = low + pos; size_t ret = super.readBlock(buffer, size); if (seekable) { pos = s.position - low; s.position = bp; } return ret; } override size_t writeBlock (const void *buffer, size_t size) { assertWriteable(); if (bounded && size > high - low - pos) size = cast(size_t)(high - low - pos); ulong bp = s.position; if (seekable) s.position = low + pos; size_t ret = s.writeBlock(buffer, size); if (seekable) { pos = s.position - low; s.position = bp; } return ret; } override ulong seek(long offset, SeekPos rel) { assertSeekable(); long spos; switch (rel) { case SeekPos.Set: spos = offset; break; case SeekPos.Current: spos = cast(long)(pos + offset); break; case SeekPos.End: if (bounded) spos = cast(long)(high - low + offset); else spos = cast(long)(s.size - low + offset); break; default: assert(0); } if (spos < 0) pos = 0; else if (bounded && spos > high - low) pos = high - low; else if (!bounded && spos > s.size - low) pos = s.size - low; else pos = cast(ulong)spos; readEOF = false; return pos; } override @property size_t available() { size_t res = s.available; ulong bp = s.position; if (bp <= pos+low && pos+low <= bp+res) { if (!bounded || bp+res <= high) return cast(size_t)(bp + res - pos - low); else if (high <= bp+res) return cast(size_t)(high - pos - low); } return 0; } unittest { MemoryStream m; SliceStream s; m = new MemoryStream ((cast(char[])"Hello, world").dup); s = new SliceStream (m, 4, 8); assert (s.size == 4); assert (m.position == 0); assert (s.position == 0); assert (m.available == 12); assert (s.available == 4); assert (s.writeBlock (cast(char *) "Vroom", 5) == 4); assert (m.position == 0); assert (s.position == 4); assert (m.available == 12); assert (s.available == 0); assert (s.seekEnd (-2) == 2); assert (s.available == 2); assert (s.seekEnd (2) == 4); assert (s.available == 0); assert (m.position == 0); assert (m.available == 12); m.seekEnd(0); m.writeString("\nBlaho"); assert (m.position == 18); assert (m.available == 0); assert (s.position == 4); assert (s.available == 0); s = new SliceStream (m, 4); assert (s.size == 14); assert (s.toString () == "Vrooorld\nBlaho"); s.seekEnd (0); assert (s.available == 0); s.writeString (", etcetera."); assert (s.position == 25); assert (s.seekSet (0) == 0); assert (s.size == 25); assert (m.position == 18); assert (m.size == 29); assert (m.toString() == "HellVrooorld\nBlaho, etcetera."); } } undeaD-1.0.10/src/undead/string.d000066400000000000000000000157651346374113600165210ustar00rootroot00000000000000/** * Contains the obsolete pattern matching functions from Phobos' * `std.string`. */ module undead.string; import std.traits; /*********************************************** * See if character c is in the pattern. * Patterns: * * A $(I pattern) is an array of characters much like a $(I character * class) in regular expressions. A sequence of characters * can be given, such as "abcde". The '-' can represent a range * of characters, as "a-e" represents the same pattern as "abcde". * "a-fA-F0-9" represents all the hex characters. * If the first character of a pattern is '^', then the pattern * is negated, i.e. "^0-9" means any character except a digit. * The functions inPattern, $(B countchars), $(B removeschars), * and $(B squeeze) use patterns. * * Note: In the future, the pattern syntax may be improved * to be more like regular expression character classes. */ bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc if (isSomeString!S) { bool result = false; int range = 0; dchar lastc; foreach (size_t i, dchar p; pattern) { if (p == '^' && i == 0) { result = true; if (i + 1 == pattern.length) return (c == p); // or should this be an error? } else if (range) { range = 0; if (lastc <= c && c <= p || c == p) return !result; } else if (p == '-' && i > result && i + 1 < pattern.length) { range = 1; continue; } else if (c == p) return !result; lastc = p; } return result; } @safe pure @nogc unittest { assertCTFEable!( { assert(inPattern('x', "x") == 1); assert(inPattern('x', "y") == 0); assert(inPattern('x', string.init) == 0); assert(inPattern('x', "^y") == 1); assert(inPattern('x', "yxxy") == 1); assert(inPattern('x', "^yxxy") == 0); assert(inPattern('x', "^abcd") == 1); assert(inPattern('^', "^^") == 0); assert(inPattern('^', "^") == 1); assert(inPattern('^', "a^") == 1); assert(inPattern('x', "a-z") == 1); assert(inPattern('x', "A-Z") == 0); assert(inPattern('x', "^a-z") == 0); assert(inPattern('x', "^A-Z") == 1); assert(inPattern('-', "a-") == 1); assert(inPattern('-', "^A-") == 0); assert(inPattern('a', "z-a") == 1); assert(inPattern('z', "z-a") == 1); assert(inPattern('x', "z-a") == 0); }); } /** * See if character c is in the intersection of the patterns. */ bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc if (isSomeString!S) { foreach (string pattern; patterns) { if (!inPattern(c, pattern)) { return false; } } return true; } /** * Count characters in s that match pattern. */ size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc if (isSomeString!S && isSomeString!S1) { size_t count; foreach (dchar c; s) { count += inPattern(c, pattern); } return count; } @safe pure @nogc unittest { assertCTFEable!( { assert(countchars("abc", "a-c") == 3); assert(countchars("hello world", "or") == 3); }); } /** * Return string that is s with all characters removed that match pattern. */ S removechars(S)(S s, in S pattern) @safe pure if (isSomeString!S) { import std.utf : encode; Unqual!(typeof(s[0]))[] r; bool changed = false; foreach (size_t i, dchar c; s) { if (inPattern(c, pattern)) { if (!changed) { changed = true; r = s[0 .. i].dup; } continue; } if (changed) { encode(r, c); } } if (changed) return r; else return s; } @safe pure unittest { assertCTFEable!( { assert(removechars("abc", "a-c").length == 0); assert(removechars("hello world", "or") == "hell wld"); assert(removechars("hello world", "d") == "hello worl"); assert(removechars("hah", "h") == "a"); }); } @safe pure unittest { assert(removechars("abc", "x") == "abc"); } /*************************************************** * Return string where sequences of a character in s[] from pattern[] * are replaced with a single instance of that character. * If pattern is null, it defaults to all characters. */ S squeeze(S)(S s, in S pattern = null) { import std.utf : encode, stride; Unqual!(typeof(s[0]))[] r; dchar lastc; size_t lasti; int run; bool changed; foreach (size_t i, dchar c; s) { if (run && lastc == c) { changed = true; } else if (pattern is null || inPattern(c, pattern)) { run = 1; if (changed) { if (r is null) r = s[0 .. lasti].dup; encode(r, c); } else lasti = i + stride(s, i); lastc = c; } else { run = 0; if (changed) { if (r is null) r = s[0 .. lasti].dup; encode(r, c); } } } return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s; } @system pure unittest { assertCTFEable!( { string s; assert(squeeze("hello") == "helo"); s = "abcd"; assert(squeeze(s) is s); s = "xyzz"; assert(squeeze(s).ptr == s.ptr); // should just be a slice assert(squeeze("hello goodbyee", "oe") == "hello godbye"); }); } /*************************************************************** Finds the position $(D_PARAM pos) of the first character in $(D_PARAM s) that does not match $(D_PARAM pattern) (in the terminology used by $(REF inPattern, std,string)). Updates $(D_PARAM s = s[pos..$]). Returns the slice from the beginning of the original (before update) string up to, and excluding, $(D_PARAM pos). The $(D_PARAM munch) function is mostly convenient for skipping certain category of characters (e.g. whitespace) when parsing strings. (In such cases, the return value is not used.) */ S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc { size_t j = s.length; foreach (i, dchar c; s) { if (!inPattern(c, pattern)) { j = i; break; } } scope(exit) s = s[j .. $]; return s[0 .. j]; } /// @safe pure @nogc unittest { string s = "123abc"; string t = munch(s, "0123456789"); assert(t == "123" && s == "abc"); t = munch(s, "0123456789"); assert(t == "" && s == "abc"); } @safe pure @nogc unittest { string s = "123€abc"; string t = munch(s, "0123456789"); assert(t == "123" && s == "€abc"); t = munch(s, "0123456789"); assert(t == "" && s == "€abc"); t = munch(s, "£$€¥"); assert(t == "€" && s == "abc"); } // helper function for unit tests private @property void assertCTFEable(alias dg)() { static assert({ cast(void) dg(); return true; }()); cast(void) dg(); }undeaD-1.0.10/src/undead/utf.d000066400000000000000000000011171346374113600157730ustar00rootroot00000000000000/** * Contains the obsolete functions from Phobos' `std.utf`. */ module undead.utf; import std.utf; import std.typecons; //deprecated("Removed October 2017. Please use std.utf.encode instead.") char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure { const sz = encode!(Yes.useReplacementDchar)(buf, c); return buf[0 .. sz]; } //deprecated("Removed October 2017. Please use std.utf.encode instead.") wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe pure { const sz = encode!(Yes.useReplacementDchar)(buf, c); return buf[0 .. sz]; } undeaD-1.0.10/win32.mak000066400000000000000000000016371346374113600144440ustar00rootroot00000000000000#_ win32.mak # Build win32 version of undead # Needs Digital Mars D compiler to build, available free from: # http://www.digitalmars.com/d/ DMD=dmd DEL=del S=src\undead O=obj B=bin TARGET=undead DFLAGS=-g -Isrc/ LFLAGS=-L/map/co #DFLAGS= #LFLAGS= .d.obj : $(DMD) -c $(DFLAGS) $* SRC= $S\bitarray.d $S\regexp.d $S\datebase.d $S\date.d $S\dateparse.d \ $S\cstream.d $S\stream.d $S\socketstream.d $S\doformat.d $S/string.d \ $S\internal\file.d SOURCE= $(SRC) win32.mak win64.mak posix.mak LICENSE README.md dub.json all: $B\$(TARGET).lib ################################################# $B\$(TARGET).lib : $(SRC) $(DMD) -lib -of$B\$(TARGET).lib $(SRC) $(DFLAGS) unittest : $(DMD) -unittest -main -cov -of$O\unittest.exe $(SRC) $(DFLAGS) $O\unittest.exe clean: $(DEL) $O\unittest.exe *.lst tolf: tolf $(SOURCE) detab: detab $(SRC) zip: detab tolf $(SOURCE) $(DEL) undead.zip zip32 undead $(SOURCE) undeaD-1.0.10/win64.mak000066400000000000000000000016441346374113600144470ustar00rootroot00000000000000#_ win32.mak # Build win32 version of undead # Needs Digital Mars D compiler to build, available free from: # http://www.digitalmars.com/d/ DMD=dmd DEL=del S=src\undead O=obj B=bin TARGET=undead DFLAGS=-m64 -g -Isrc/ LFLAGS=-L/map/co #DFLAGS= #LFLAGS= .d.obj : $(DMD) -c $(DFLAGS) $* SRC= $S\bitarray.d $S\regexp.d $S\datebase.d $S\date.d $S\dateparse.d \ $S\cstream.d $S\stream.d $S\socketstream.d $S\doformat.d $S/string.d \ $S\internal\file.d SOURCE= $(SRC) win32.mak win64.mak posix.mak LICENSE README.md dub.json all: $B\$(TARGET).lib ################################################# $B\$(TARGET).lib : $(SRC) $(DMD) -lib -of$B\$(TARGET).lib $(SRC) $(DFLAGS) unittest : $(DMD) -unittest -main -cov -of$O\unittest.exe $(SRC) $(DFLAGS) $O\unittest.exe clean: $(DEL) $O\unittest.exe *.lst tolf: tolf $(SOURCE) detab: detab $(SRC) zip: detab tolf $(SOURCE) $(DEL) undead.zip zip32 undead $(SOURCE)