pax_global_header 0000666 0000000 0000000 00000000064 13463741136 0014522 g ustar 00root root 0000000 0000000 52 comment=8d2028804cbb3be56cbf2bab47eb55012fd325a7
undeaD-1.0.10/ 0000775 0000000 0000000 00000000000 13463741136 0013001 5 ustar 00root root 0000000 0000000 undeaD-1.0.10/.editorconfig 0000664 0000000 0000000 00000000241 13463741136 0015453 0 ustar 00root root 0000000 0000000 root = true
[*.{c,h,d,di,dd}]
end_of_line = lf
insert_final_newline = true
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
charset = utf-8
undeaD-1.0.10/.gitignore 0000664 0000000 0000000 00000000446 13463741136 0014775 0 ustar 00root root 0000000 0000000 .B*
*.bak
*.lst
# Object files
*.o
*.ko
*.obj
*.elf
# Precompiled Headers
*.gch
*.pch
# Libraries
*.lib
*.a
*.la
*.lo
# Shared objects (inc. Windows DLLs)
*.dll
*.so
*.so.*
*.dylib
# Executables
*.exe
*.out
*.app
*.i*86
*.x86_64
*.hex
# Extra dirs/build files
bin
obj
.dub
.vs
undead.sln
undeaD-1.0.10/.travis.yml 0000664 0000000 0000000 00000000156 13463741136 0015114 0 ustar 00root root 0000000 0000000 language: d
os:
- linux
- osx
d:
- dmd
- dmd-beta
- dmd-nightly
- ldc
- ldc-beta
sudo: false
undeaD-1.0.10/LICENSE 0000664 0000000 0000000 00000002472 13463741136 0014013 0 ustar 00root root 0000000 0000000 Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
undeaD-1.0.10/LICENSE.txt 0000664 0000000 0000000 00000002472 13463741136 0014631 0 ustar 00root root 0000000 0000000 Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization
obtaining a copy of the software and accompanying documentation covered by
this license (the "Software") to use, reproduce, display, distribute,
execute, and transmit the Software, and to prepare derivative works of the
Software, and to permit third-parties to whom the Software is furnished to
do so, all subject to the following:
The copyright notices in the Software and this entire statement, including
the above license grant, this restriction and the following disclaimer,
must be included in all copies of the Software, in whole or in part, and
all derivative works of the Software, unless such copies or derivative
works are solely in the form of machine-executable object code generated by
a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
undeaD-1.0.10/README.md 0000664 0000000 0000000 00000001120 13463741136 0014252 0 ustar 00root root 0000000 0000000 [](https://travis-ci.org/dlang/undeaD)
undeaD
======
Need an obsolete Phobos module? Here they are, back from the dead and upgraded to work with the latest D
Current modules included:
* std.bitarray
* std.date
* std.datebase
* std.dateparse
* std.regexp
* std.stream and friends
Some individual functions have been moved here rather than full Phobos modules. They are
* undead.doformat: Contains the `doFormat` function from std.format
* undead.string: Contains regex style pattern matching functions from std.string
undeaD-1.0.10/dub.json 0000664 0000000 0000000 00000000373 13463741136 0014451 0 ustar 00root root 0000000 0000000 {
"name": "undead",
"description": "Obsolete Phobos modules, back from the dead",
"authors": ["various"],
"homepage": "https://github.com/dlang/undeaD",
"license": "BSL-1.0",
"targetType": "library",
"targetPath": "bin",
}
undeaD-1.0.10/posix.mak 0000664 0000000 0000000 00000001670 13463741136 0014641 0 ustar 00root root 0000000 0000000 #_ posix.mak
# Build posix version of undead
# Needs Digital Mars D compiler to build, available free from:
# http://www.digitalmars.com/d/
DMD=dmd
DEL=rm
S=src/undead
O=obj
B=bin
TARGET=undead
DFLAGS=-g -Isrc/
LFLAGS=-L/map/co
#DFLAGS=
#LFLAGS=
.d.obj :
$(DMD) -c $(DFLAGS) $*
SRC= $S/bitarray.d $S/regexp.d $S/datebase.d $S/date.d $S/dateparse.d \
$S/cstream.d $S/stream.d $S/socketstream.d $S/doformat.d $S/string.d \
$S/internal/file.d
SOURCE= $(SRC) win32.mak posix.mak LICENSE README.md dub.json
all: $B/$(TARGET).a
#################################################
$B/$(TARGET).a : $(SRC)
$(DMD) -lib -of$B/$(TARGET).a $(SRC) $(DFLAGS)
unittest :
$(DMD) -unittest -main -cov -of$O/unittest $(SRC) $(DFLAGS)
$O/unittest
clean:
$(DEL) $O/unittest *.lst
tolf:
tolf $(SOURCE)
detab:
detab $(SRC)
zip: detab tolf $(SOURCE)
$(DEL) undead.zip
zip undead $(SOURCE)
gitzip:
git archive --format=zip HEAD > undead.zip
undeaD-1.0.10/src/ 0000775 0000000 0000000 00000000000 13463741136 0013570 5 ustar 00root root 0000000 0000000 undeaD-1.0.10/src/undead/ 0000775 0000000 0000000 00000000000 13463741136 0015030 5 ustar 00root root 0000000 0000000 undeaD-1.0.10/src/undead/bitarray.d 0000664 0000000 0000000 00000047605 13463741136 0017026 0 ustar 00root root 0000000 0000000 /***********************
* Source: $(PHOBOSSRC std/_bitarray.d)
* Macros:
* WIKI = StdBitarray
*/
module undead.bitarray;
//debug = bitarray; // uncomment to turn on debugging printf's
private import core.bitop;
/**
* An array of bits.
*/
struct BitArray
{
size_t len;
size_t* ptr;
size_t dim()
{
return (len + 31) / 32;
}
size_t length() const pure nothrow
{
return len;
}
void length(size_t newlen)
{
if (newlen != len)
{
size_t olddim = dim();
size_t newdim = (newlen + 31) / 32;
if (newdim != olddim)
{
// Create a fake array so we can use D's realloc machinery
auto b = ptr[0 .. olddim];
b.length = newdim; // realloc
ptr = b.ptr;
if (newdim & 31)
{ // Set any pad bits to 0
ptr[newdim - 1] &= ~(~0 << (newdim & 31));
}
}
len = newlen;
}
}
/**********************************************
* Support for [$(I index)] operation for BitArray.
*/
bool opIndex(size_t i)
in
{
assert(i < len);
}
body
{
return cast(bool)bt(ptr, i);
}
/** ditto */
bool opIndexAssign(bool b, size_t i)
in
{
assert(i < len);
}
body
{
if (b)
bts(ptr, i);
else
btr(ptr, i);
return b;
}
/**********************************************
* Support for array.dup property for BitArray.
*/
BitArray dup()
{
BitArray ba;
auto b = ptr[0 .. dim].dup;
ba.len = len;
ba.ptr = b.ptr;
return ba;
}
unittest
{
BitArray a;
BitArray b;
debug(bitarray) printf("BitArray.dup.unittest\n");
a.length = 3;
a[0] = 1; a[1] = 0; a[2] = 1;
b = a.dup;
assert(b.length == 3);
for (int i = 0; i < 3; i++)
{ debug(bitarray) printf("b[%d] = %d\n", i, b[i]);
assert(b[i] == (((i ^ 1) & 1) ? true : false));
}
}
/**********************************************
* Support for foreach loops for BitArray.
*/
int opApply(int delegate(ref bool) dg)
{
int result;
for (size_t i = 0; i < len; i++)
{ bool b = opIndex(i);
result = dg(b);
(this)[i] = b;
if (result)
break;
}
return result;
}
/** ditto */
int opApply(int delegate(ref size_t, ref bool) dg)
{
int result;
for (size_t i = 0; i < len; i++)
{ bool b = opIndex(i);
result = dg(i, b);
(this)[i] = b;
if (result)
break;
}
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opApply unittest\n");
static bool[] ba = [1,0,1];
BitArray a; a.init(ba);
int i;
foreach (b;a)
{
switch (i)
{ case 0: assert(b == true); break;
case 1: assert(b == false); break;
case 2: assert(b == true); break;
default: assert(0);
}
i++;
}
foreach (j,b;a)
{
switch (j)
{ case 0: assert(b == true); break;
case 1: assert(b == false); break;
case 2: assert(b == true); break;
default: assert(0);
}
}
}
/**********************************************
* Support for array.reverse property for BitArray.
*/
BitArray reverse()
out (result)
{
assert(result == this);
}
body
{
if (len >= 2)
{
bool t;
size_t lo, hi;
lo = 0;
hi = len - 1;
for (; lo < hi; lo++, hi--)
{
t = (this)[lo];
(this)[lo] = (this)[hi];
(this)[hi] = t;
}
}
return this;
}
unittest
{
debug(bitarray) printf("BitArray.reverse.unittest\n");
BitArray b;
static bool[5] data = [1,0,1,1,0];
int i;
b.init(data);
b.reverse;
for (i = 0; i < data.length; i++)
{
assert(b[i] == data[4 - i]);
}
}
/**********************************************
* Support for array.sort property for BitArray.
*/
BitArray sort()
out (result)
{
assert(result == this);
}
body
{
if (len >= 2)
{
size_t lo, hi;
lo = 0;
hi = len - 1;
while (1)
{
while (1)
{
if (lo >= hi)
goto Ldone;
if ((this)[lo] == true)
break;
lo++;
}
while (1)
{
if (lo >= hi)
goto Ldone;
if ((this)[hi] == false)
break;
hi--;
}
(this)[lo] = false;
(this)[hi] = true;
lo++;
hi--;
}
Ldone:
;
}
return this;
}
unittest
{
debug(bitarray) printf("BitArray.sort.unittest\n");
__gshared size_t x = 0b1100011000;
__gshared BitArray ba = { 10, &x };
ba.sort;
for (size_t i = 0; i < 6; i++)
assert(ba[i] == false);
for (size_t i = 6; i < 10; i++)
assert(ba[i] == true);
}
/***************************************
* Support for operators == and != for bit arrays.
*/
bool opEquals(const ref BitArray a2) const pure nothrow
{ size_t i;
if (this.length != a2.length)
return false; // not equal
byte *p1 = cast(byte*)this.ptr;
byte *p2 = cast(byte*)a2.ptr;
auto n = this.length / 8;
for (i = 0; i < n; i++)
{
if (p1[i] != p2[i])
return false; // not equal
}
n = this.length & 7;
auto mask = cast(ubyte)((1 << n) - 1);
//printf("i = %d, n = %d, mask = %x, %x, %x\n", i, n, mask, p1[i], p2[i]);
return (mask == 0) || (p1[i] & mask) == (p2[i] & mask);
}
unittest
{
debug(bitarray) printf("BitArray.opEquals unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1];
static bool[] bc = [1,0,1,0,1,0,1];
static bool[] bd = [1,0,1,1,1];
static bool[] be = [1,0,1,0,1];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c; c.init(bc);
BitArray d; d.init(bd);
BitArray e; e.init(be);
assert(a != b);
assert(a != c);
assert(a != d);
assert(a == e);
}
/***************************************
* Implement comparison operators.
*/
int opCmp(const ref BitArray a2) const pure nothrow
{
size_t i;
auto len = this.length;
if (a2.length < len)
len = a2.length;
auto p1 = cast(ubyte*)this.ptr;
auto p2 = cast(ubyte*)a2.ptr;
auto n = len / 8;
for (i = 0; i < n; i++)
{
if (p1[i] != p2[i])
break; // not equal
}
for (auto j = i * 8; j < len; j++)
{ auto mask = cast(ubyte)(1 << j);
auto c = cast(int)(p1[i] & mask) - cast(int)(p2[i] & mask);
if (c)
return c;
}
version (D_LP64)
{
long c = this.len - a2.length;
if (c < 0)
return -1;
else
return c != 0;
}
else
return cast(int)this.len - cast(int)a2.length;
}
unittest
{
debug(bitarray) printf("BitArray.opCmp unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1];
static bool[] bc = [1,0,1,0,1,0,1];
static bool[] bd = [1,0,1,1,1];
static bool[] be = [1,0,1,0,1];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c; c.init(bc);
BitArray d; d.init(bd);
BitArray e; e.init(be);
assert(a > b);
assert(a >= b);
assert(a < c);
assert(a <= c);
assert(a < d);
assert(a <= d);
assert(a == e);
assert(a <= e);
assert(a >= e);
}
/***************************************
* Set BitArray to contents of ba[]
*/
void init(bool[] ba)
{
length = ba.length;
foreach (i, b; ba)
{
(this)[i] = b;
}
}
/***************************************
* Map BitArray onto v[], with numbits being the number of bits
* in the array. Does not copy the data.
*
* This is the inverse of opCast.
*/
void init(void[] v, size_t numbits)
in
{
assert(numbits <= v.length * 8);
assert((v.length & 3) == 0);
}
body
{
ptr = cast(typeof(ptr))v.ptr;
len = numbits;
}
unittest
{
debug(bitarray) printf("BitArray.init unittest\n");
static bool[] ba = [1,0,1,0,1];
BitArray a; a.init(ba);
BitArray b;
void[] v;
v = cast(void[])a;
b.init(v, a.length);
assert(b[0] == 1);
assert(b[1] == 0);
assert(b[2] == 1);
assert(b[3] == 0);
assert(b[4] == 1);
a[0] = 0;
assert(b[0] == 0);
assert(a == b);
}
/***************************************
* Convert to void[].
*/
void[] opCast()
{
return cast(void[])ptr[0 .. dim];
}
unittest
{
debug(bitarray) printf("BitArray.opCast unittest\n");
static bool[] ba = [1,0,1,0,1];
BitArray a; a.init(ba);
void[] v = cast(void[])a;
assert(v.length == a.dim * size_t.sizeof);
}
/***************************************
* Support for unary operator ~ for bit arrays.
*/
BitArray opCom()
{
auto dim = this.dim();
BitArray result;
result.length = len;
for (size_t i = 0; i < dim; i++)
result.ptr[i] = ~this.ptr[i];
if (len & 31)
result.ptr[dim - 1] &= ~(~0 << (len & 31));
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opCom unittest\n");
static bool[] ba = [1,0,1,0,1];
BitArray a; a.init(ba);
BitArray b = ~a;
assert(b[0] == 0);
assert(b[1] == 1);
assert(b[2] == 0);
assert(b[3] == 1);
assert(b[4] == 0);
}
/***************************************
* Support for binary operator & for bit arrays.
*/
BitArray opAnd(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
BitArray result;
result.length = len;
for (size_t i = 0; i < dim; i++)
result.ptr[i] = this.ptr[i] & e2.ptr[i];
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opAnd unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c = a & b;
assert(c[0] == 1);
assert(c[1] == 0);
assert(c[2] == 1);
assert(c[3] == 0);
assert(c[4] == 0);
}
/***************************************
* Support for binary operator | for bit arrays.
*/
BitArray opOr(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
BitArray result;
result.length = len;
for (size_t i = 0; i < dim; i++)
result.ptr[i] = this.ptr[i] | e2.ptr[i];
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opOr unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c = a | b;
assert(c[0] == 1);
assert(c[1] == 0);
assert(c[2] == 1);
assert(c[3] == 1);
assert(c[4] == 1);
}
/***************************************
* Support for binary operator ^ for bit arrays.
*/
BitArray opXor(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
BitArray result;
result.length = len;
for (size_t i = 0; i < dim; i++)
result.ptr[i] = this.ptr[i] ^ e2.ptr[i];
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opXor unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c = a ^ b;
assert(c[0] == 0);
assert(c[1] == 0);
assert(c[2] == 0);
assert(c[3] == 1);
assert(c[4] == 1);
}
/***************************************
* Support for binary operator - for bit arrays.
*
* $(I a - b) for BitArrays means the same thing as $(I a & ~b).
*/
BitArray opSub(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
BitArray result;
result.length = len;
for (size_t i = 0; i < dim; i++)
result.ptr[i] = this.ptr[i] & ~e2.ptr[i];
return result;
}
unittest
{
debug(bitarray) printf("BitArray.opSub unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c = a - b;
assert(c[0] == 0);
assert(c[1] == 0);
assert(c[2] == 0);
assert(c[3] == 0);
assert(c[4] == 1);
}
/***************************************
* Support for operator &= bit arrays.
*/
BitArray opAndAssign(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
for (size_t i = 0; i < dim; i++)
ptr[i] &= e2.ptr[i];
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opAndAssign unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
a &= b;
assert(a[0] == 1);
assert(a[1] == 0);
assert(a[2] == 1);
assert(a[3] == 0);
assert(a[4] == 0);
}
/***************************************
* Support for operator |= for bit arrays.
*/
BitArray opOrAssign(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
for (size_t i = 0; i < dim; i++)
ptr[i] |= e2.ptr[i];
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opOrAssign unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
a |= b;
assert(a[0] == 1);
assert(a[1] == 0);
assert(a[2] == 1);
assert(a[3] == 1);
assert(a[4] == 1);
}
/***************************************
* Support for operator ^= for bit arrays.
*/
BitArray opXorAssign(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
for (size_t i = 0; i < dim; i++)
ptr[i] ^= e2.ptr[i];
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opXorAssign unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
a ^= b;
assert(a[0] == 0);
assert(a[1] == 0);
assert(a[2] == 0);
assert(a[3] == 1);
assert(a[4] == 1);
}
/***************************************
* Support for operator -= for bit arrays.
*
* $(I a -= b) for BitArrays means the same thing as $(I a &= ~b).
*/
BitArray opSubAssign(BitArray e2)
in
{
assert(len == e2.length);
}
body
{
auto dim = this.dim();
for (size_t i = 0; i < dim; i++)
ptr[i] &= ~e2.ptr[i];
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opSubAssign unittest\n");
static bool[] ba = [1,0,1,0,1];
static bool[] bb = [1,0,1,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
a -= b;
assert(a[0] == 0);
assert(a[1] == 0);
assert(a[2] == 0);
assert(a[3] == 0);
assert(a[4] == 1);
}
/***************************************
* Support for operator ~= for bit arrays.
*/
BitArray opCatAssign(bool b)
{
length = len + 1;
(this)[len - 1] = b;
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opCatAssign unittest\n");
static bool[] ba = [1,0,1,0,1];
BitArray a; a.init(ba);
BitArray b;
b = (a ~= true);
assert(a[0] == 1);
assert(a[1] == 0);
assert(a[2] == 1);
assert(a[3] == 0);
assert(a[4] == 1);
assert(a[5] == 1);
assert(b == a);
}
/***************************************
* ditto
*/
BitArray opCatAssign(BitArray b)
{
auto istart = len;
length = len + b.length;
for (auto i = istart; i < len; i++)
(this)[i] = b[i - istart];
return this;
}
unittest
{
debug(bitarray) printf("BitArray.opCatAssign unittest\n");
static bool[] ba = [1,0];
static bool[] bb = [0,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c;
c = (a ~= b);
assert(a.length == 5);
assert(a[0] == 1);
assert(a[1] == 0);
assert(a[2] == 0);
assert(a[3] == 1);
assert(a[4] == 0);
assert(c == a);
}
/***************************************
* Support for binary operator ~ for bit arrays.
*/
BitArray opCat(bool b)
{
auto r = this.dup;
r.length = len + 1;
r[len] = b;
return r;
}
/** ditto */
BitArray opCat_r(bool b)
{
BitArray r;
r.length = len + 1;
r[0] = b;
for (size_t i = 0; i < len; i++)
r[1 + i] = (this)[i];
return r;
}
/** ditto */
BitArray opCat(BitArray b)
{
BitArray r;
r = this.dup();
r ~= b;
return r;
}
unittest
{
debug(bitarray) printf("BitArray.opCat unittest\n");
static bool[] ba = [1,0];
static bool[] bb = [0,1,0];
BitArray a; a.init(ba);
BitArray b; b.init(bb);
BitArray c;
c = (a ~ b);
assert(c.length == 5);
assert(c[0] == 1);
assert(c[1] == 0);
assert(c[2] == 0);
assert(c[3] == 1);
assert(c[4] == 0);
c = (a ~ true);
assert(c.length == 3);
assert(c[0] == 1);
assert(c[1] == 0);
assert(c[2] == 1);
c = (false ~ a);
assert(c.length == 3);
assert(c[0] == 0);
assert(c[1] == 1);
assert(c[2] == 0);
}
}
undeaD-1.0.10/src/undead/cstream.d 0000664 0000000 0000000 00000014257 13463741136 0016644 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
* $(RED Deprecated: This module is considered out-dated and not up to Phobos'
* current standards.)
*
* The std.cstream module bridges core.stdc.stdio (or std.stdio) and std.stream.
* Both core.stdc.stdio and std.stream are publicly imported by std.cstream.
*
* Macros:
* WIKI=Phobos/StdCstream
*
* Copyright: Copyright Ben Hinkle 2007 - 2009.
* License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
* Authors: Ben Hinkle
* Source: $(PHOBOSSRC std/_cstream.d)
*/
/* Copyright Ben Hinkle 2007 - 2009.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
module undead.cstream;
public import core.stdc.stdio;
public import undead.stream;
version(unittest) import std.stdio;
import std.algorithm;
/**
* A Stream wrapper for a C file of type FILE*.
*/
class CFile : Stream {
protected FILE* cfile;
/**
* Create the stream wrapper for the given C file.
* Params:
* cfile = a valid C $(B FILE) pointer to wrap.
* mode = a bitwise combination of $(B FileMode.In) for a readable file
* and $(B FileMode.Out) for a writeable file.
* seekable = indicates if the stream should be _seekable.
*/
this(FILE* cfile, FileMode mode, bool seekable = false) {
super();
this.file = cfile;
readable = cast(bool)(mode & FileMode.In);
writeable = cast(bool)(mode & FileMode.Out);
this.seekable = seekable;
}
/**
* Closes the stream.
*/
~this() { close(); }
/**
* Property to get or set the underlying file for this stream.
* Setting the file marks the stream as open.
*/
@property FILE* file() { return cfile; }
/**
* Ditto
*/
@property void file(FILE* cfile) {
this.cfile = cfile;
isopen = true;
}
/**
* Overrides of the $(B Stream) methods to call the underlying $(B FILE*)
* C functions.
*/
override void flush() { fflush(cfile); }
/**
* Ditto
*/
override void close() {
if (isopen)
fclose(cfile);
isopen = readable = writeable = seekable = false;
}
/**
* Ditto
*/
override bool eof() {
return cast(bool)(readEOF || feof(cfile));
}
/**
* Ditto
*/
override char getc() {
return cast(char)fgetc(cfile);
}
/**
* Ditto
*/
override char ungetc(char c) {
return cast(char)core.stdc.stdio.ungetc(c,cfile);
}
/**
* Ditto
*/
override size_t readBlock(void* buffer, size_t size) {
size_t n = fread(buffer,1,size,cfile);
readEOF = cast(bool)(n == 0);
return n;
}
/**
* Ditto
*/
override size_t writeBlock(const void* buffer, size_t size) {
return fwrite(buffer,1,size,cfile);
}
/**
* Ditto
*/
override ulong seek(long offset, SeekPos rel) {
readEOF = false;
if (fseek(cfile,cast(int)offset,rel) != 0)
throw new SeekException("unable to move file pointer");
return ftell(cfile);
}
/**
* Ditto
*/
override void writeLine(const(char)[] s) {
writeString(s);
writeString("\n");
}
/**
* Ditto
*/
override void writeLineW(const(wchar)[] s) {
writeStringW(s);
writeStringW("\n");
}
// run a few tests
unittest {
import undead.internal.file;
import std.internal.cstring : tempCString;
auto stream_file = (undead.internal.file.deleteme ~ "-stream.txt").tempCString();
FILE* f = fopen(stream_file,"w");
assert(f !is null);
CFile file = new CFile(f,FileMode.Out);
int i = 666;
// should be ok to write
assert(file.writeable);
file.writeLine("Testing stream.d:");
file.writeString("Hello, world!");
file.write(i);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
file.close();
// no operations are allowed when file is closed
assert(!file.readable && !file.writeable && !file.seekable);
f = fopen(stream_file,"r");
file = new CFile(f,FileMode.In,true);
// should be ok to read
assert(file.readable);
auto line = file.readLine();
auto exp = "Testing stream.d:";
assert(line[0] == 'T');
assert(line.length == exp.length);
assert(!std.algorithm.cmp(line, "Testing stream.d:"));
// jump over "Hello, "
file.seek(7, SeekPos.Current);
version (Windows)
assert(file.position == 19 + 7);
version (Posix)
assert(file.position == 18 + 7);
assert(!std.algorithm.cmp(file.readString(6), "world!"));
i = 0; file.read(i);
assert(i == 666);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
// we must be at the end of file
file.close();
f = fopen(stream_file,"w+");
file = new CFile(f,FileMode.In|FileMode.Out,true);
file.writeLine("Testing stream.d:");
file.writeLine("Another line");
file.writeLine("");
file.writeLine("That was blank");
file.position = 0;
char[][] lines;
foreach(char[] line; file) {
lines ~= line.dup;
}
assert( lines.length == 5 );
assert( lines[0] == "Testing stream.d:");
assert( lines[1] == "Another line");
assert( lines[2] == "");
assert( lines[3] == "That was blank");
file.position = 0;
lines = new char[][5];
foreach(ulong n, char[] line; file) {
lines[cast(size_t)(n-1)] = line.dup;
}
assert( lines[0] == "Testing stream.d:");
assert( lines[1] == "Another line");
assert( lines[2] == "");
assert( lines[3] == "That was blank");
file.close();
remove(stream_file);
}
}
/**
* CFile wrapper of core.stdc.stdio.stdin (not seekable).
*/
__gshared CFile din;
/**
* CFile wrapper of core.stdc.stdio.stdout (not seekable).
*/
__gshared CFile dout;
/**
* CFile wrapper of core.stdc.stdio.stderr (not seekable).
*/
__gshared CFile derr;
shared static this() {
// open standard I/O devices
din = new CFile(core.stdc.stdio.stdin,FileMode.In);
dout = new CFile(core.stdc.stdio.stdout,FileMode.Out);
derr = new CFile(core.stdc.stdio.stderr,FileMode.Out);
}
undeaD-1.0.10/src/undead/date.d 0000664 0000000 0000000 00000076046 13463741136 0016127 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
* $(RED Deprecated. It will be removed in February 2012.
* Please use std.datetime instead.)
*
* Dates are represented in several formats. The date implementation
* revolves around a central type, $(D d_time), from which other
* formats are converted to and from. Dates are calculated using the
* Gregorian calendar.
*
* References: $(WEB wikipedia.org/wiki/Gregorian_calendar, Gregorian
* calendar (Wikipedia))
*
* Macros: WIKI = Phobos/StdDate
*
* Copyright: Copyright Digital Mars 2000 - 2009.
* License: Boost License 1.0.
* Authors: $(WEB digitalmars.com, Walter Bright)
* Source: $(PHOBOSSRC std/_date.d)
*/
/* Copyright Digital Mars 2000 - 2009.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
module undead.date;
import std.conv, std.exception, std.stdio;
import core.stdc.stdlib;
import undead.datebase;
import undead.dateparse;
/+
pragma(msg, "Notice: As of Phobos 2.055, std.date and std.dateparse have been " ~
"deprecated. They will be removed in February 2012. " ~
"Please use std.datetime instead.");
deprecated:
+/
/**
* $(D d_time) is a signed arithmetic type giving the time elapsed
* since January 1, 1970. Negative values are for dates preceding
* 1970. The time unit used is Ticks. Ticks are milliseconds or
* smaller intervals.
*
* The usual arithmetic operations can be performed on d_time, such as adding,
* subtracting, etc. Elapsed time in Ticks can be computed by subtracting a
* starting d_time from an ending d_time.
*/
alias long d_time;
/**
* A value for d_time that does not represent a valid time.
*/
enum d_time d_time_nan = long.min;
/**
* Time broken down into its components.
*/
struct Date
{
int year = int.min; /// use int.min as "nan" year value
int month; /// 1..12
int day; /// 1..31
int hour; /// 0..23
int minute; /// 0..59
int second; /// 0..59
int ms; /// 0..999
int weekday; /// 0: not specified, 1..7: Sunday..Saturday
int tzcorrection = int.min; /// -1200..1200 correction in hours
/// Parse date out of string s[] and store it in this Date instance.
void parse(string s)
{
DateParse dp;
dp.parse(s, this);
}
}
enum
{
hoursPerDay = 24,
minutesPerHour = 60,
msPerMinute = 60 * 1000,
msPerHour = 60 * msPerMinute,
msPerDay = 86_400_000,
ticksPerMs = 1,
ticksPerSecond = 1000, /// Will be at least 1000
ticksPerMinute = ticksPerSecond * 60,
ticksPerHour = ticksPerMinute * 60,
ticksPerDay = ticksPerHour * 24,
}
deprecated alias ticksPerSecond TicksPerSecond;
deprecated alias ticksPerMs TicksPerMs;
deprecated alias ticksPerMinute TicksPerMinute;
deprecated alias ticksPerHour TicksPerHour;
deprecated alias ticksPerDay TicksPerDay;
deprecated
unittest
{
assert(ticksPerSecond == TicksPerSecond);
}
__gshared d_time localTZA = 0;
private immutable char[] daystr = "SunMonTueWedThuFriSat";
private immutable char[] monstr = "JanFebMarAprMayJunJulAugSepOctNovDec";
private immutable int[12] mdays =
[ 0,31,59,90,120,151,181,212,243,273,304,334 ];
/********************************
* Compute year and week [1..53] from t. The ISO 8601 week 1 is the first week
* of the year that includes January 4. Monday is the first day of the week.
* References:
* $(LINK2 http://en.wikipedia.org/wiki/ISO_8601, ISO 8601 (Wikipedia))
*/
void toISO8601YearWeek(d_time t, out int year, out int week)
{
year = yearFromTime(t);
auto yday = day(t) - dayFromYear(year);
/* Determine day of week Jan 4 falls on.
* Weeks begin on a Monday.
*/
auto d = dayFromYear(year);
auto w = (d + 3/*Jan4*/ + 3) % 7;
if (w < 0)
w += 7;
/* Find yday of beginning of ISO 8601 year
*/
auto ydaybeg = 3/*Jan4*/ - w;
/* Check if yday is actually the last week of the previous year
*/
if (yday < ydaybeg)
{
year -= 1;
week = 53;
return;
}
/* Check if yday is actually the first week of the next year
*/
if (yday >= 362) // possible
{ int d2;
int ydaybeg2;
d2 = dayFromYear(year + 1);
w = (d2 + 3/*Jan4*/ + 3) % 7;
if (w < 0)
w += 7;
//printf("w = %d\n", w);
ydaybeg2 = 3/*Jan4*/ - w;
if (d + yday >= d2 + ydaybeg2)
{
year += 1;
week = 1;
return;
}
}
week = (yday - ydaybeg) / 7 + 1;
}
/* ***********************************
* Divide time by divisor. Always round down, even if d is negative.
*/
pure d_time floor(d_time d, int divisor)
{
return (d < 0 ? d - divisor - 1 : d) / divisor;
}
int dmod(d_time n, d_time d)
{ d_time r;
r = n % d;
if (r < 0)
r += d;
assert(cast(int)r == r);
return cast(int)r;
}
/********************************
* Calculates the hour from time.
*
* Params:
* time = The time to compute the hour from.
* Returns:
* The calculated hour, 0..23.
*/
int hourFromTime(d_time time)
{
return dmod(floor(time, msPerHour), hoursPerDay);
}
/********************************
* Calculates the minute from time.
*
* Params:
* time = The time to compute the minute from.
* Returns:
* The calculated minute, 0..59.
*/
int minFromTime(d_time time)
{
return dmod(floor(time, msPerMinute), minutesPerHour);
}
/********************************
* Calculates the second from time.
*
* Params:
* time = The time to compute the second from.
* Returns:
* The calculated second, 0..59.
*/
int secFromTime(d_time time)
{
return dmod(floor(time, ticksPerSecond), 60);
}
/********************************
* Calculates the milisecond from time.
*
* Params:
* time = The time to compute the milisecond from.
* Returns:
* The calculated milisecond, 0..999.
*/
int msFromTime(d_time time)
{
return dmod(time / (ticksPerSecond / 1000), 1000);
}
int timeWithinDay(d_time t)
{
return dmod(t, msPerDay);
}
d_time toInteger(d_time n)
{
return n;
}
int day(d_time t)
{
return cast(int)floor(t, msPerDay);
}
pure bool leapYear(uint y)
{
return (y % 4) == 0 && (y % 100 || (y % 400) == 0);
}
unittest {
assert(!leapYear(1970));
assert(leapYear(1984));
assert(leapYear(2000));
assert(!leapYear(2100));
}
/********************************
* Calculates the number of days that exists in a year.
*
* Leap years have 366 days, while other years have 365.
*
* Params:
* year = The year to compute the number of days from.
* Returns:
* The number of days in the year, 365 or 366.
*/
pure uint daysInYear(uint year)
{
return (leapYear(year) ? 366 : 365);
}
/********************************
* Calculates the number of days elapsed since 1 January 1970
* until 1 January of the given year.
*
* Params:
* year = The year to compute the number of days from.
* Returns:
* The number of days elapsed.
*
* Example:
* ----------
* writeln(dayFromYear(1970)); // writes '0'
* writeln(dayFromYear(1971)); // writes '365'
* writeln(dayFromYear(1972)); // writes '730'
* ----------
*/
pure int dayFromYear(int year)
{
return cast(int) (365 * (year - 1970) +
floor((year - 1969), 4) -
floor((year - 1901), 100) +
floor((year - 1601), 400));
}
pure d_time timeFromYear(int y)
{
return cast(d_time)msPerDay * dayFromYear(y);
}
/*****************************
* Calculates the year from the d_time t.
*/
pure int yearFromTime(d_time t)
{
if (t == d_time_nan)
return 0;
// Hazard a guess
//y = 1970 + cast(int) (t / (365.2425 * msPerDay));
// Use integer only math
int y = 1970 + cast(int) (t / (3652425 * (msPerDay / 10000)));
if (timeFromYear(y) <= t)
{
while (timeFromYear(y + 1) <= t)
y++;
}
else
{
do
{
y--;
}
while (timeFromYear(y) > t);
}
return y;
}
/*******************************
* Determines if d_time t is a leap year.
*
* A leap year is every 4 years except years ending in 00 that are not
* divsible by 400.
*
* Returns: !=0 if it is a leap year.
*
* References:
* $(LINK2 http://en.wikipedia.org/wiki/Leap_year, Wikipedia)
*/
pure bool inLeapYear(d_time t)
{
return leapYear(yearFromTime(t));
}
/*****************************
* Calculates the month from the d_time t.
*
* Returns: Integer in the range 0..11, where
* 0 represents January and 11 represents December.
*/
int monthFromTime(d_time t)
{
auto year = yearFromTime(t);
auto day = day(t) - dayFromYear(year);
int month;
if (day < 59)
{
if (day < 31)
{ assert(day >= 0);
month = 0;
}
else
month = 1;
}
else
{
day -= leapYear(year);
if (day < 212)
{
if (day < 59)
month = 1;
else if (day < 90)
month = 2;
else if (day < 120)
month = 3;
else if (day < 151)
month = 4;
else if (day < 181)
month = 5;
else
month = 6;
}
else
{
if (day < 243)
month = 7;
else if (day < 273)
month = 8;
else if (day < 304)
month = 9;
else if (day < 334)
month = 10;
else if (day < 365)
month = 11;
else
assert(0);
}
}
return month;
}
/*******************************
* Compute which day in a month a d_time t is.
* Returns:
* Integer in the range 1..31
*/
int dateFromTime(d_time t)
{
auto year = yearFromTime(t);
auto day = day(t) - dayFromYear(year);
auto leap = leapYear(year);
auto month = monthFromTime(t);
int date;
switch (month)
{
case 0: date = day + 1; break;
case 1: date = day - 30; break;
case 2: date = day - 58 - leap; break;
case 3: date = day - 89 - leap; break;
case 4: date = day - 119 - leap; break;
case 5: date = day - 150 - leap; break;
case 6: date = day - 180 - leap; break;
case 7: date = day - 211 - leap; break;
case 8: date = day - 242 - leap; break;
case 9: date = day - 272 - leap; break;
case 10: date = day - 303 - leap; break;
case 11: date = day - 333 - leap; break;
default:
assert(0);
}
return date;
}
/*******************************
* Compute which day of the week a d_time t is.
* Returns:
* Integer in the range 0..6, where 0 represents Sunday
* and 6 represents Saturday.
*/
int weekDay(d_time t)
{
auto w = (cast(int)day(t) + 4) % 7;
if (w < 0)
w += 7;
return w;
}
/***********************************
* Convert from UTC to local time.
*/
d_time UTCtoLocalTime(d_time t)
{
return (t == d_time_nan)
? d_time_nan
: t + localTZA + daylightSavingTA(t);
}
/***********************************
* Convert from local time to UTC.
*/
d_time localTimetoUTC(d_time t)
{
return (t == d_time_nan)
? d_time_nan
/* BUGZILLA 1752 says this line should be:
* : t - localTZA - daylightSavingTA(t);
*/
: t - localTZA - daylightSavingTA(t - localTZA);
}
d_time makeTime(d_time hour, d_time min, d_time sec, d_time ms)
{
return hour * ticksPerHour +
min * ticksPerMinute +
sec * ticksPerSecond +
ms * ticksPerMs;
}
/* *****************************
* Params:
* month = 0..11
* date = day of month, 1..31
* Returns:
* number of days since start of epoch
*/
d_time makeDay(d_time year, d_time month, d_time date)
{
const y = cast(int)(year + floor(month, 12));
const m = dmod(month, 12);
const leap = leapYear(y);
auto t = timeFromYear(y) + cast(d_time) mdays[m] * msPerDay;
if (leap && month >= 2)
t += msPerDay;
if (yearFromTime(t) != y ||
monthFromTime(t) != m ||
dateFromTime(t) != 1)
{
return d_time_nan;
}
return day(t) + date - 1;
}
d_time makeDate(d_time day, d_time time)
{
if (day == d_time_nan || time == d_time_nan)
return d_time_nan;
return day * ticksPerDay + time;
}
d_time timeClip(d_time time)
{
//printf("TimeClip(%g) = %g\n", time, toInteger(time));
return toInteger(time);
}
/***************************************
* Determine the date in the month, 1..31, of the nth
* weekday.
* Params:
* year = year
* month = month, 1..12
* weekday = day of week 0..6 representing Sunday..Saturday
* n = nth occurrence of that weekday in the month, 1..5, where
* 5 also means "the last occurrence in the month"
* Returns:
* the date in the month, 1..31, of the nth weekday
*/
int dateFromNthWeekdayOfMonth(int year, int month, int weekday, int n)
in
{
assert(1 <= month && month <= 12);
assert(0 <= weekday && weekday <= 6);
assert(1 <= n && n <= 5);
}
body
{
// Get day of the first of the month
auto x = makeDay(year, month - 1, 1);
// Get the week day 0..6 of the first of this month
auto wd = weekDay(makeDate(x, 0));
// Get monthday of first occurrence of weekday in this month
auto mday = weekday - wd + 1;
if (mday < 1)
mday += 7;
// Add in number of weeks
mday += (n - 1) * 7;
// If monthday is more than the number of days in the month,
// back up to 'last' occurrence
if (mday > 28 && mday > daysInMonth(year, month))
{ assert(n == 5);
mday -= 7;
}
return mday;
}
unittest
{
assert(dateFromNthWeekdayOfMonth(2003, 3, 0, 5) == 30);
assert(dateFromNthWeekdayOfMonth(2003, 10, 0, 5) == 26);
assert(dateFromNthWeekdayOfMonth(2004, 3, 0, 5) == 28);
assert(dateFromNthWeekdayOfMonth(2004, 10, 0, 5) == 31);
}
/**************************************
* Determine the number of days in a month, 1..31.
* Params:
* month = 1..12
*/
int daysInMonth(int year, int month)
{
switch (month)
{
case 1:
case 3:
case 5:
case 7:
case 8:
case 10:
case 12:
return 31;
case 2:
return 28 + leapYear(year);
case 4:
case 6:
case 9:
case 11:
return 30;
default:
break;
}
return enforce(false, "Invalid month passed to daysInMonth");
}
unittest
{
assert(daysInMonth(2003, 2) == 28);
assert(daysInMonth(2004, 2) == 29);
}
/*************************************
* Converts UTC time into a text string of the form:
* "Www Mmm dd hh:mm:ss GMT+-TZ yyyy".
* For example, "Tue Apr 02 02:04:57 GMT-0800 1996".
* If time is invalid, i.e. is d_time_nan,
* the string "Invalid date" is returned.
*
* Example:
* ------------------------------------
d_time lNow;
char[] lNowString;
// Grab the date and time relative to UTC
lNow = std.date.getUTCtime();
// Convert this into the local date and time for display.
lNowString = std.date.UTCtoString(lNow);
* ------------------------------------
*/
string UTCtoString(d_time time)
{
// Years are supposed to be -285616 .. 285616, or 7 digits
// "Tue Apr 02 02:04:57 GMT-0800 1996"
auto buffer = new char[29 + 7 + 1];
if (time == d_time_nan)
return "Invalid Date";
auto dst = daylightSavingTA(time);
auto offset = localTZA + dst;
auto t = time + offset;
auto sign = '+';
if (offset < 0)
{ sign = '-';
// offset = -offset;
offset = -(localTZA + dst);
}
auto mn = cast(int)(offset / msPerMinute);
auto hr = mn / 60;
mn %= 60;
//printf("hr = %d, offset = %g, localTZA = %g, dst = %g, + = %g\n", hr, offset, localTZA, dst, localTZA + dst);
auto len = sprintf(buffer.ptr,
"%.3s %.3s %02d %02d:%02d:%02d GMT%c%02d%02d %d",
&daystr[weekDay(t) * 3],
&monstr[monthFromTime(t) * 3],
dateFromTime(t),
hourFromTime(t), minFromTime(t), secFromTime(t),
sign, hr, mn,
cast(long)yearFromTime(t));
// Ensure no buggy buffer overflows
//printf("len = %d, buffer.length = %d\n", len, buffer.length);
assert(len < buffer.length);
buffer = buffer[0 .. len];
return assumeUnique(buffer);
}
/// Alias for UTCtoString (deprecated).
deprecated alias UTCtoString toString;
/***********************************
* Converts t into a text string of the form: "Www, dd Mmm yyyy hh:mm:ss UTC".
* If t is invalid, "Invalid date" is returned.
*/
string toUTCString(d_time t)
{
// Years are supposed to be -285616 .. 285616, or 7 digits
// "Tue, 02 Apr 1996 02:04:57 GMT"
auto buffer = new char[25 + 7 + 1];
if (t == d_time_nan)
return "Invalid Date";
auto len = sprintf(buffer.ptr, "%.3s, %02d %.3s %d %02d:%02d:%02d UTC",
&daystr[weekDay(t) * 3], dateFromTime(t),
&monstr[monthFromTime(t) * 3],
yearFromTime(t),
hourFromTime(t), minFromTime(t), secFromTime(t));
// Ensure no buggy buffer overflows
assert(len < buffer.length);
return cast(string) buffer[0 .. len];
}
/************************************
* Converts the date portion of time into a text string of the form: "Www Mmm dd
* yyyy", for example, "Tue Apr 02 1996".
* If time is invalid, "Invalid date" is returned.
*/
string toDateString(d_time time)
{
// Years are supposed to be -285616 .. 285616, or 7 digits
// "Tue Apr 02 1996"
auto buffer = new char[29 + 7 + 1];
if (time == d_time_nan)
return "Invalid Date";
auto dst = daylightSavingTA(time);
auto offset = localTZA + dst;
auto t = time + offset;
auto len = sprintf(buffer.ptr, "%.3s %.3s %02d %d",
&daystr[weekDay(t) * 3],
&monstr[monthFromTime(t) * 3],
dateFromTime(t),
cast(long)yearFromTime(t));
// Ensure no buggy buffer overflows
assert(len < buffer.length);
return cast(string) buffer[0 .. len];
}
/******************************************
* Converts the time portion of t into a text string of the form: "hh:mm:ss
* GMT+-TZ", for example, "02:04:57 GMT-0800".
* If t is invalid, "Invalid date" is returned.
* The input must be in UTC, and the output is in local time.
*/
string toTimeString(d_time time)
{
// "02:04:57 GMT-0800"
auto buffer = new char[17 + 1];
if (time == d_time_nan)
return "Invalid Date";
auto dst = daylightSavingTA(time);
auto offset = localTZA + dst;
auto t = time + offset;
auto sign = '+';
if (offset < 0)
{ sign = '-';
// offset = -offset;
offset = -(localTZA + dst);
}
auto mn = cast(int)(offset / msPerMinute);
auto hr = mn / 60;
mn %= 60;
//printf("hr = %d, offset = %g, localTZA = %g, dst = %g, + = %g\n", hr, offset, localTZA, dst, localTZA + dst);
auto len = sprintf(buffer.ptr, "%02d:%02d:%02d GMT%c%02d%02d",
hourFromTime(t), minFromTime(t), secFromTime(t),
sign, hr, mn);
// Ensure no buggy buffer overflows
assert(len < buffer.length);
// Lop off terminating 0
return cast(string) buffer[0 .. len];
}
/******************************************
* Parses s as a textual date string, and returns it as a d_time. If
* the string is not a valid date, $(D d_time_nan) is returned.
*/
d_time parse(string s)
{
try
{
Date dp;
dp.parse(s);
auto time = makeTime(dp.hour, dp.minute, dp.second, dp.ms);
// Assume UTC if no tzcorrection is set (runnable/testdate).
if (dp.tzcorrection != int.min)
{
time += cast(d_time)(dp.tzcorrection / 100) * msPerHour +
cast(d_time)(dp.tzcorrection % 100) * msPerMinute;
}
auto day = makeDay(dp.year, dp.month - 1, dp.day);
auto result = makeDate(day,time);
return timeClip(result);
}
catch (Exception e)
{
return d_time_nan; // erroneous date string
}
}
extern(C) void std_date_static_this()
{
localTZA = getLocalTZA();
}
version (Windows)
{
private import core.sys.windows.windows;
//import c.time;
/******
* Get current UTC time.
*/
d_time getUTCtime()
{
SYSTEMTIME st;
GetSystemTime(&st); // get time in UTC
return SYSTEMTIME2d_time(&st, 0);
//return c.time.time(null) * ticksPerSecond;
}
static d_time FILETIME2d_time(const FILETIME *ft)
{
SYSTEMTIME st = void;
if (!FileTimeToSystemTime(ft, &st))
return d_time_nan;
return SYSTEMTIME2d_time(&st, 0);
}
FILETIME d_time2FILETIME(d_time dt)
{
static assert(10_000_000 >= ticksPerSecond);
static assert(10_000_000 % ticksPerSecond == 0);
enum ulong ticksFrom1601To1970 = 11_644_473_600UL * ticksPerSecond;
ulong t = (dt + ticksFrom1601To1970) * (10_000_000 / ticksPerSecond);
FILETIME result = void;
result.dwLowDateTime = cast(uint) (t & uint.max);
result.dwHighDateTime = cast(uint) (t >> 32);
return result;
}
unittest
{
auto dt = getUTCtime();
auto ft = d_time2FILETIME(dt);
auto dt1 = FILETIME2d_time(&ft);
assert(dt == dt1, text(dt, " != ", dt1));
}
static d_time SYSTEMTIME2d_time(const SYSTEMTIME *st, d_time t)
{
/* More info: http://delphicikk.atw.hu/listaz.php?id=2667&oldal=52
*/
d_time day = void;
d_time time = void;
if (st.wYear)
{
time = makeTime(st.wHour, st.wMinute, st.wSecond, st.wMilliseconds);
day = makeDay(st.wYear, st.wMonth - 1, st.wDay);
}
else
{ /* wYear being 0 is a flag to indicate relative time:
* wMonth is the month 1..12
* wDayOfWeek is weekday 0..6 corresponding to Sunday..Saturday
* wDay is the nth time, 1..5, that wDayOfWeek occurs
*/
auto year = yearFromTime(t);
auto mday = dateFromNthWeekdayOfMonth(year,
st.wMonth, st.wDay, st.wDayOfWeek);
day = makeDay(year, st.wMonth - 1, mday);
time = makeTime(st.wHour, st.wMinute, 0, 0);
}
auto n = makeDate(day,time);
return timeClip(n);
}
d_time getLocalTZA()
{
TIME_ZONE_INFORMATION tzi = void;
/* http://msdn.microsoft.com/library/en-us/sysinfo/base/gettimezoneinformation.asp
* http://msdn2.microsoft.com/en-us/library/ms725481.aspx
*/
auto r = GetTimeZoneInformation(&tzi);
//printf("bias = %d\n", tzi.Bias);
//printf("standardbias = %d\n", tzi.StandardBias);
//printf("daylightbias = %d\n", tzi.DaylightBias);
switch (r)
{
case TIME_ZONE_ID_STANDARD:
return -(tzi.Bias + tzi.StandardBias)
* cast(d_time)(60 * ticksPerSecond);
case TIME_ZONE_ID_DAYLIGHT:
// falthrough
//t = -(tzi.Bias + tzi.DaylightBias) * cast(d_time)(60 * ticksPerSecond);
//break;
case TIME_ZONE_ID_UNKNOWN:
return -(tzi.Bias) * cast(d_time)(60 * ticksPerSecond);
default:
return 0;
}
}
/*
* Get daylight savings time adjust for time dt.
*/
int daylightSavingTA(d_time dt)
{
TIME_ZONE_INFORMATION tzi = void;
d_time ts;
d_time td;
/* http://msdn.microsoft.com/library/en-us/sysinfo/base/gettimezoneinformation.asp
*/
auto r = GetTimeZoneInformation(&tzi);
auto t = 0;
switch (r)
{
case TIME_ZONE_ID_STANDARD:
case TIME_ZONE_ID_DAYLIGHT:
if (tzi.StandardDate.wMonth == 0 ||
tzi.DaylightDate.wMonth == 0)
break;
ts = SYSTEMTIME2d_time(&tzi.StandardDate, dt);
td = SYSTEMTIME2d_time(&tzi.DaylightDate, dt);
if (td <= dt && dt < ts)
{
t = -tzi.DaylightBias * (60 * ticksPerSecond);
//printf("DST is in effect, %d\n", t);
}
else
{
//printf("no DST\n");
}
break;
case TIME_ZONE_ID_UNKNOWN:
// Daylight savings time not used in this time zone
break;
default:
assert(0);
}
return t;
}
}
version (Posix)
{
private import core.sys.posix.time;
private import core.sys.posix.sys.time;
/******
* Get current UTC time.
*/
d_time getUTCtime()
{ timeval tv;
//printf("getUTCtime()\n");
if (gettimeofday(&tv, null))
{ // Some error happened - try time() instead
return time(null) * ticksPerSecond;
}
return tv.tv_sec * cast(d_time)ticksPerSecond +
(tv.tv_usec / (1000000 / cast(d_time)ticksPerSecond));
}
d_time getLocalTZA()
{
time_t t;
time(&t);
version (OSX)
{
tm result;
localtime_r(&t, &result);
return result.tm_gmtoff * ticksPerSecond;
}
else version (FreeBSD)
{
tm result;
localtime_r(&t, &result);
return result.tm_gmtoff * ticksPerSecond;
}
else
{
localtime(&t); // this will set timezone
return -(timezone * ticksPerSecond);
}
}
/*
* Get daylight savings time adjust for time dt.
*/
int daylightSavingTA(d_time dt)
{
tm *tmp;
time_t t;
int dst = 0;
if (dt != d_time_nan)
{
d_time seconds = dt / ticksPerSecond;
t = cast(time_t) seconds;
if (t == seconds) // if in range
{
tmp = localtime(&t);
if (tmp.tm_isdst > 0)
dst = ticksPerHour; // BUG: Assume daylight savings time is plus one hour.
}
else // out of range for system time, use our own calculation
{
/* BUG: this works for the US, but not other timezones.
*/
dt -= localTZA;
int year = yearFromTime(dt);
/* Compute time given year, month 1..12,
* week in month, weekday, hour
*/
d_time dstt(int year, int month, int week, int weekday, int hour)
{
auto mday = dateFromNthWeekdayOfMonth(year, month, weekday, week);
return timeClip(makeDate(
makeDay(year, month - 1, mday),
makeTime(hour, 0, 0, 0)));
}
d_time start;
d_time end;
if (year < 2007)
{ // Daylight savings time goes from 2 AM the first Sunday
// in April through 2 AM the last Sunday in October
start = dstt(year, 4, 1, 0, 2);
end = dstt(year, 10, 5, 0, 2);
}
else
{
// the second Sunday of March to
// the first Sunday in November
start = dstt(year, 3, 2, 0, 2);
end = dstt(year, 11, 1, 0, 2);
}
if (start <= dt && dt < end)
dst = ticksPerHour;
//writefln("start = %s, dt = %s, end = %s, dst = %s", start, dt, end, dst);
}
}
return dst;
}
}
/+ DOS File Time +/
/***
* Type representing the DOS file date/time format.
*/
alias uint DosFileTime;
/************************************
* Convert from DOS file date/time to d_time.
*/
d_time toDtime(DosFileTime time)
{
uint dt = cast(uint)time;
if (dt == 0)
return d_time_nan;
int year = ((dt >> 25) & 0x7F) + 1980;
int month = ((dt >> 21) & 0x0F) - 1; // 0..12
int dayofmonth = ((dt >> 16) & 0x1F); // 0..31
int hour = (dt >> 11) & 0x1F; // 0..23
int minute = (dt >> 5) & 0x3F; // 0..59
int second = (dt << 1) & 0x3E; // 0..58 (in 2 second increments)
d_time t;
t = undead.date.makeDate(undead.date.makeDay(year, month, dayofmonth),
undead.date.makeTime(hour, minute, second, 0));
assert(yearFromTime(t) == year);
assert(monthFromTime(t) == month);
assert(dateFromTime(t) == dayofmonth);
assert(hourFromTime(t) == hour);
assert(minFromTime(t) == minute);
assert(secFromTime(t) == second);
t -= localTZA + daylightSavingTA(t);
return t;
}
/****************************************
* Convert from d_time to DOS file date/time.
*/
DosFileTime toDosFileTime(d_time t)
{ uint dt;
if (t == d_time_nan)
return cast(DosFileTime)0;
t += localTZA + daylightSavingTA(t);
uint year = yearFromTime(t);
uint month = monthFromTime(t);
uint dayofmonth = dateFromTime(t);
uint hour = hourFromTime(t);
uint minute = minFromTime(t);
uint second = secFromTime(t);
dt = (year - 1980) << 25;
dt |= ((month + 1) & 0x0F) << 21;
dt |= (dayofmonth & 0x1F) << 16;
dt |= (hour & 0x1F) << 11;
dt |= (minute & 0x3F) << 5;
dt |= (second >> 1) & 0x1F;
return cast(DosFileTime)dt;
}
/**
Benchmarks code for speed assessment and comparison.
Params:
fun = aliases of callable objects (e.g. function names). Each should
take no arguments.
times = The number of times each function is to be executed.
result = The optional store for the return value. If $(D null) is
passed in, new store is allocated appropriately.
Returns:
An array of $(D n) $(D uint)s. Element at slot $(D i) contains the
number of milliseconds spent in calling the $(D i)th function $(D
times) times.
Example:
----
int a;
void f0() { }
void f1() { auto b = a; }
void f2() { auto b = to!(string)(a); }
auto r = benchmark!(f0, f1, f2)(10_000_000);
----
*/
ulong[] benchmark(fun...)(uint times, ulong[] result = null)
{
result.length = fun.length;
result.length = 0;
foreach (i, Unused; fun)
{
immutable t = getUTCtime();
foreach (j; 0 .. times)
{
fun[i]();
}
immutable delta = getUTCtime() - t;
result ~= cast(uint)delta;
}
foreach (ref e; result)
{
e *= 1000;
e /= ticksPerSecond;
}
return result;
}
unittest
{
int a;
void f0() { }
//void f1() { auto b = to!(string)(a); }
void f2() { auto b = (a); }
auto r = benchmark!(f0, f2)(100);
//writeln(r);
}
undeaD-1.0.10/src/undead/datebase.d 0000664 0000000 0000000 00000001356 13463741136 0016752 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
* The only purpose of this module is to do the static construction for
* std.date, to eliminate cyclic construction errors.
*
* Copyright: Copyright Digital Mars 2000 - 2009.
* License: Boost License 1.0.
* Authors: $(WEB digitalmars.com, Walter Bright)
* Source: $(PHOBOSSRC std/_datebase.d)
*/
/*
* Copyright Digital Mars 2000 - 2009.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
module undead.datebase;
extern(C) void std_date_static_this();
shared static this()
{
std_date_static_this();
}
undeaD-1.0.10/src/undead/dateparse.d 0000664 0000000 0000000 00000057617 13463741136 0017165 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
* $(RED Deprecated. It will be removed in February 2012.
* Please use std.datetime instead.)
*
* dateparse module.
*
* Copyright: Copyright Digital Mars 2000 - 2009.
* License: Boost License 1.0.
* Authors: $(WEB digitalmars.com, Walter Bright)
* Source: $(PHOBOSSRC std/_dateparse.d)
*/
/*
* Copyright Digital Mars 2000 - 2009.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
module undead.dateparse;
private
{
import std.algorithm, std.string;
import core.stdc.stdlib;
import undead.date;
}
//deprecated:
//debug=dateparse;
class DateParseError : Error
{
this(string s)
{
super("Invalid date string: " ~ s);
}
}
struct DateParse
{
void parse(string s, out Date date)
{
this = DateParse.init;
//version (Win32)
buffer = (cast(char *)alloca(s.length))[0 .. s.length];
//else
//buffer = new char[s.length];
debug(dateparse) printf("DateParse.parse('%.*s')\n", s);
if (!parseString(s))
{
goto Lerror;
}
/+
if (year == year.init)
year = 0;
else
+/
debug(dateparse)
printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
year, month, day,
hours, minutes, seconds, ms,
weekday, tzcorrection);
if (
year == year.init ||
(month < 1 || month > 12) ||
(day < 1 || day > 31) ||
(hours < 0 || hours > 23) ||
(minutes < 0 || minutes > 59) ||
(seconds < 0 || seconds > 59) ||
(tzcorrection != int.min &&
((tzcorrection < -2300 || tzcorrection > 2300) ||
(tzcorrection % 10)))
)
{
Lerror:
throw new DateParseError(s);
}
if (ampm)
{ if (hours > 12)
goto Lerror;
if (hours < 12)
{
if (ampm == 2) // if P.M.
hours += 12;
}
else if (ampm == 1) // if 12am
{
hours = 0; // which is midnight
}
}
// if (tzcorrection != tzcorrection.init)
// tzcorrection /= 100;
if (year >= 0 && year <= 99)
year += 1900;
date.year = year;
date.month = month;
date.day = day;
date.hour = hours;
date.minute = minutes;
date.second = seconds;
date.ms = ms;
date.weekday = weekday;
date.tzcorrection = tzcorrection;
}
private:
int year = int.min; // our "nan" Date value
int month; // 1..12
int day; // 1..31
int hours; // 0..23
int minutes; // 0..59
int seconds; // 0..59
int ms; // 0..999
int weekday; // 1..7
int ampm; // 0: not specified
// 1: AM
// 2: PM
int tzcorrection = int.min; // -1200..1200 correction in hours
string s;
int si;
int number;
char[] buffer;
enum DP : byte
{
err,
weekday,
month,
number,
end,
colon,
minus,
slash,
ampm,
plus,
tz,
dst,
dsttz,
}
DP nextToken()
{ int nest;
uint c;
int bi;
DP result = DP.err;
//printf("DateParse::nextToken()\n");
for (;;)
{
assert(si <= s.length);
if (si == s.length)
{ result = DP.end;
goto Lret;
}
//printf("\ts[%d] = '%c'\n", si, s[si]);
switch (s[si])
{
case ':': result = DP.colon; goto ret_inc;
case '+': result = DP.plus; goto ret_inc;
case '-': result = DP.minus; goto ret_inc;
case '/': result = DP.slash; goto ret_inc;
case '.':
version(DATE_DOT_DELIM)
{
result = DP.slash;
goto ret_inc;
}
else
{
si++;
break;
}
ret_inc:
si++;
goto Lret;
case ' ':
case '\n':
case '\r':
case '\t':
case ',':
si++;
break;
case '(': // comment
nest = 1;
for (;;)
{
si++;
if (si == s.length)
goto Lret; // error
switch (s[si])
{
case '(':
nest++;
break;
case ')':
if (--nest == 0)
goto Lendofcomment;
break;
default:
break;
}
}
Lendofcomment:
si++;
break;
default:
number = 0;
for (;;)
{
if (si == s.length)
// c cannot be undefined here
break;
c = s[si];
if (!(c >= '0' && c <= '9'))
break;
result = DP.number;
number = number * 10 + (c - '0');
si++;
}
if (result == DP.number)
goto Lret;
bi = 0;
bufloop:
while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
{
if (c < 'a') // if upper case
c += cast(uint)'a' - cast(uint)'A'; // to lower case
buffer[bi] = cast(char)c;
bi++;
do
{
si++;
if (si == s.length)
break bufloop;
c = s[si];
} while (c == '.'); // ignore embedded '.'s
}
result = classify(buffer[0 .. bi].idup);
goto Lret;
}
}
Lret:
//printf("-DateParse::nextToken()\n");
return result;
}
DP classify(string buf)
{
struct DateID
{
string name;
DP tok;
short value;
}
static immutable DateID[] dateidtab =
[
{ "january", DP.month, 1},
{ "february", DP.month, 2},
{ "march", DP.month, 3},
{ "april", DP.month, 4},
{ "may", DP.month, 5},
{ "june", DP.month, 6},
{ "july", DP.month, 7},
{ "august", DP.month, 8},
{ "september", DP.month, 9},
{ "october", DP.month, 10},
{ "november", DP.month, 11},
{ "december", DP.month, 12},
{ "jan", DP.month, 1},
{ "feb", DP.month, 2},
{ "mar", DP.month, 3},
{ "apr", DP.month, 4},
{ "jun", DP.month, 6},
{ "jul", DP.month, 7},
{ "aug", DP.month, 8},
{ "sep", DP.month, 9},
{ "sept", DP.month, 9},
{ "oct", DP.month, 10},
{ "nov", DP.month, 11},
{ "dec", DP.month, 12},
{ "sunday", DP.weekday, 1},
{ "monday", DP.weekday, 2},
{ "tuesday", DP.weekday, 3},
{ "tues", DP.weekday, 3},
{ "wednesday", DP.weekday, 4},
{ "wednes", DP.weekday, 4},
{ "thursday", DP.weekday, 5},
{ "thur", DP.weekday, 5},
{ "thurs", DP.weekday, 5},
{ "friday", DP.weekday, 6},
{ "saturday", DP.weekday, 7},
{ "sun", DP.weekday, 1},
{ "mon", DP.weekday, 2},
{ "tue", DP.weekday, 3},
{ "wed", DP.weekday, 4},
{ "thu", DP.weekday, 5},
{ "fri", DP.weekday, 6},
{ "sat", DP.weekday, 7},
{ "am", DP.ampm, 1},
{ "pm", DP.ampm, 2},
{ "gmt", DP.tz, +000},
{ "ut", DP.tz, +000},
{ "utc", DP.tz, +000},
{ "wet", DP.tz, +000},
{ "z", DP.tz, +000},
{ "wat", DP.tz, +100},
{ "a", DP.tz, +100},
{ "at", DP.tz, +200},
{ "b", DP.tz, +200},
{ "c", DP.tz, +300},
{ "ast", DP.tz, +400},
{ "d", DP.tz, +400},
{ "est", DP.tz, +500},
{ "e", DP.tz, +500},
{ "cst", DP.tz, +600},
{ "f", DP.tz, +600},
{ "mst", DP.tz, +700},
{ "g", DP.tz, +700},
{ "pst", DP.tz, +800},
{ "h", DP.tz, +800},
{ "yst", DP.tz, +900},
{ "i", DP.tz, +900},
{ "ahst", DP.tz, +1000},
{ "cat", DP.tz, +1000},
{ "hst", DP.tz, +1000},
{ "k", DP.tz, +1000},
{ "nt", DP.tz, +1100},
{ "l", DP.tz, +1100},
{ "idlw", DP.tz, +1200},
{ "m", DP.tz, +1200},
{ "cet", DP.tz, -100},
{ "fwt", DP.tz, -100},
{ "met", DP.tz, -100},
{ "mewt", DP.tz, -100},
{ "swt", DP.tz, -100},
{ "n", DP.tz, -100},
{ "eet", DP.tz, -200},
{ "o", DP.tz, -200},
{ "bt", DP.tz, -300},
{ "p", DP.tz, -300},
{ "zp4", DP.tz, -400},
{ "q", DP.tz, -400},
{ "zp5", DP.tz, -500},
{ "r", DP.tz, -500},
{ "zp6", DP.tz, -600},
{ "s", DP.tz, -600},
{ "wast", DP.tz, -700},
{ "t", DP.tz, -700},
{ "cct", DP.tz, -800},
{ "u", DP.tz, -800},
{ "jst", DP.tz, -900},
{ "v", DP.tz, -900},
{ "east", DP.tz, -1000},
{ "gst", DP.tz, -1000},
{ "w", DP.tz, -1000},
{ "x", DP.tz, -1100},
{ "idle", DP.tz, -1200},
{ "nzst", DP.tz, -1200},
{ "nzt", DP.tz, -1200},
{ "y", DP.tz, -1200},
{ "bst", DP.dsttz, 000},
{ "adt", DP.dsttz, +400},
{ "edt", DP.dsttz, +500},
{ "cdt", DP.dsttz, +600},
{ "mdt", DP.dsttz, +700},
{ "pdt", DP.dsttz, +800},
{ "ydt", DP.dsttz, +900},
{ "hdt", DP.dsttz, +1000},
{ "mest", DP.dsttz, -100},
{ "mesz", DP.dsttz, -100},
{ "sst", DP.dsttz, -100},
{ "fst", DP.dsttz, -100},
{ "wadt", DP.dsttz, -700},
{ "eadt", DP.dsttz, -1000},
{ "nzdt", DP.dsttz, -1200},
{ "dst", DP.dst, 0},
];
//message(DTEXT("DateParse::classify('%s')\n"), buf);
// Do a linear search. Yes, it would be faster with a binary
// one.
for (uint i = 0; i < dateidtab.length; i++)
{
if (cmp(dateidtab[i].name, buf) == 0)
{
number = dateidtab[i].value;
return dateidtab[i].tok;
}
}
return DP.err;
}
int parseString(string s)
{
int n1;
int dp;
int sisave;
int result;
//message(DTEXT("DateParse::parseString('%ls')\n"), s);
this.s = s;
si = 0;
dp = nextToken();
for (;;)
{
//message(DTEXT("\tdp = %d\n"), dp);
switch (dp)
{
case DP.end:
result = 1;
Lret:
return result;
case DP.err:
case_error:
//message(DTEXT("\terror\n"));
default:
result = 0;
goto Lret;
case DP.minus:
break; // ignore spurious '-'
case DP.weekday:
weekday = number;
break;
case DP.month: // month day, [year]
month = number;
dp = nextToken();
if (dp == DP.number)
{
day = number;
sisave = si;
dp = nextToken();
if (dp == DP.number)
{
n1 = number;
dp = nextToken();
if (dp == DP.colon)
{ // back up, not a year
si = sisave;
}
else
{ year = n1;
continue;
}
break;
}
}
continue;
case DP.number:
n1 = number;
dp = nextToken();
switch (dp)
{
case DP.end:
year = n1;
break;
case DP.minus:
case DP.slash: // n1/ ? ? ?
dp = parseCalendarDate(n1);
if (dp == DP.err)
goto case_error;
break;
case DP.colon: // hh:mm [:ss] [am | pm]
dp = parseTimeOfDay(n1);
if (dp == DP.err)
goto case_error;
break;
case DP.ampm:
hours = n1;
minutes = 0;
seconds = 0;
ampm = number;
break;
case DP.month:
day = n1;
month = number;
dp = nextToken();
if (dp == DP.number)
{ // day month year
year = number;
dp = nextToken();
}
break;
default:
year = n1;
break;
}
continue;
}
dp = nextToken();
}
// @@@ bug in the compiler: this is never reachable
assert(0);
}
int parseCalendarDate(int n1)
{
int n2;
int n3;
int dp;
debug(dateparse) printf("DateParse.parseCalendarDate(%d)\n", n1);
dp = nextToken();
if (dp == DP.month) // day/month
{
day = n1;
month = number;
dp = nextToken();
if (dp == DP.number)
{ // day/month year
year = number;
dp = nextToken();
}
else if (dp == DP.minus || dp == DP.slash)
{ // day/month/year
dp = nextToken();
if (dp != DP.number)
goto case_error;
year = number;
dp = nextToken();
}
return dp;
}
if (dp != DP.number)
goto case_error;
n2 = number;
//message(DTEXT("\tn2 = %d\n"), n2);
dp = nextToken();
if (dp == DP.minus || dp == DP.slash)
{
dp = nextToken();
if (dp != DP.number)
goto case_error;
n3 = number;
//message(DTEXT("\tn3 = %d\n"), n3);
dp = nextToken();
// case1: year/month/day
// case2: month/day/year
int case1, case2;
case1 = (n1 > 12 ||
(n2 >= 1 && n2 <= 12) &&
(n3 >= 1 && n3 <= 31));
case2 = ((n1 >= 1 && n1 <= 12) &&
(n2 >= 1 && n2 <= 31) ||
n3 > 31);
if (case1 == case2)
goto case_error;
if (case1)
{
year = n1;
month = n2;
day = n3;
}
else
{
month = n1;
day = n2;
year = n3;
}
}
else
{ // must be month/day
month = n1;
day = n2;
}
return dp;
case_error:
return DP.err;
}
int parseTimeOfDay(int n1)
{
int dp;
int sign;
// 12am is midnight
// 12pm is noon
//message(DTEXT("DateParse::parseTimeOfDay(%d)\n"), n1);
hours = n1;
dp = nextToken();
if (dp != DP.number)
goto case_error;
minutes = number;
dp = nextToken();
if (dp == DP.colon)
{
dp = nextToken();
if (dp != DP.number)
goto case_error;
seconds = number;
dp = nextToken();
}
else
seconds = 0;
if (dp == DP.ampm)
{
ampm = number;
dp = nextToken();
}
else if (dp == DP.plus || dp == DP.minus)
{
Loffset:
sign = (dp == DP.minus) ? -1 : 1;
dp = nextToken();
if (dp != DP.number)
goto case_error;
tzcorrection = -sign * number;
dp = nextToken();
}
else if (dp == DP.tz)
{
tzcorrection = number;
dp = nextToken();
if (number == 0 && (dp == DP.plus || dp == DP.minus))
goto Loffset;
if (dp == DP.dst)
{ tzcorrection += 100;
dp = nextToken();
}
}
else if (dp == DP.dsttz)
{
tzcorrection = number;
dp = nextToken();
}
return dp;
case_error:
return DP.err;
}
}
unittest
{
DateParse dp;
Date d;
dp.parse("March 10, 1959 12:00 -800", d);
assert(d.year == 1959);
assert(d.month == 3);
assert(d.day == 10);
assert(d.hour == 12);
assert(d.minute == 0);
assert(d.second == 0);
assert(d.ms == 0);
assert(d.weekday == 0);
assert(d.tzcorrection == 800);
dp.parse("Tue Apr 02 02:04:57 GMT-0800 1996", d);
assert(d.year == 1996);
assert(d.month == 4);
assert(d.day == 2);
assert(d.hour == 2);
assert(d.minute == 4);
assert(d.second == 57);
assert(d.ms == 0);
assert(d.weekday == 3);
assert(d.tzcorrection == 800);
dp.parse("March 14, -1980 21:14:50", d);
assert(d.year == 1980);
assert(d.month == 3);
assert(d.day == 14);
assert(d.hour == 21);
assert(d.minute == 14);
assert(d.second == 50);
assert(d.ms == 0);
assert(d.weekday == 0);
assert(d.tzcorrection == int.min);
dp.parse("Tue Apr 02 02:04:57 1996", d);
assert(d.year == 1996);
assert(d.month == 4);
assert(d.day == 2);
assert(d.hour == 2);
assert(d.minute == 4);
assert(d.second == 57);
assert(d.ms == 0);
assert(d.weekday == 3);
assert(d.tzcorrection == int.min);
dp.parse("Tue, 02 Apr 1996 02:04:57 G.M.T.", d);
assert(d.year == 1996);
assert(d.month == 4);
assert(d.day == 2);
assert(d.hour == 2);
assert(d.minute == 4);
assert(d.second == 57);
assert(d.ms == 0);
assert(d.weekday == 3);
assert(d.tzcorrection == 0);
dp.parse("December 31, 3000", d);
assert(d.year == 3000);
assert(d.month == 12);
assert(d.day == 31);
assert(d.hour == 0);
assert(d.minute == 0);
assert(d.second == 0);
assert(d.ms == 0);
assert(d.weekday == 0);
assert(d.tzcorrection == int.min);
dp.parse("Wed, 31 Dec 1969 16:00:00 GMT", d);
assert(d.year == 1969);
assert(d.month == 12);
assert(d.day == 31);
assert(d.hour == 16);
assert(d.minute == 0);
assert(d.second == 0);
assert(d.ms == 0);
assert(d.weekday == 4);
assert(d.tzcorrection == 0);
dp.parse("1/1/1999 12:30 AM", d);
assert(d.year == 1999);
assert(d.month == 1);
assert(d.day == 1);
assert(d.hour == 0);
assert(d.minute == 30);
assert(d.second == 0);
assert(d.ms == 0);
assert(d.weekday == 0);
assert(d.tzcorrection == int.min);
dp.parse("Tue, 20 May 2003 15:38:58 +0530", d);
assert(d.year == 2003);
assert(d.month == 5);
assert(d.day == 20);
assert(d.hour == 15);
assert(d.minute == 38);
assert(d.second == 58);
assert(d.ms == 0);
assert(d.weekday == 3);
assert(d.tzcorrection == -530);
debug(dateparse) printf("year = %d, month = %d, day = %d\n%02d:%02d:%02d.%03d\nweekday = %d, tzcorrection = %d\n",
d.year, d.month, d.day,
d.hour, d.minute, d.second, d.ms,
d.weekday, d.tzcorrection);
}
undeaD-1.0.10/src/undead/doformat.d 0000664 0000000 0000000 00000117304 13463741136 0017016 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
Copyright: Copyright Digital Mars 2000-2013.
License: $(HTTP boost.org/LICENSE_1_0.txt, Boost License 1.0).
Authors: $(HTTP walterbright.com, Walter Bright), $(HTTP erdani.com,
Andrei Alexandrescu), and Kenji Hara
Source: $(PHOBOSSRC std/_format.d)
*/
module undead.doformat;
//debug=format; // uncomment to turn on debugging printf's
import core.vararg;
import std.exception;
import std.meta;
import std.range.primitives;
import std.traits;
import std.format;
version(CRuntime_DigitalMars)
{
version = DigitalMarsC;
}
version (DigitalMarsC)
{
// This is DMC's internal floating point formatting function
extern (C)
{
extern shared char* function(int c, int flags, int precision,
in real* pdval,
char* buf, size_t* psl, int width) __pfloatfmt;
}
}
/**********************************************************************
* Signals a mismatch between a format and its corresponding argument.
*/
class FormatException : Exception
{
@safe pure nothrow
this()
{
super("format error");
}
@safe pure nothrow
this(string msg, string fn = __FILE__, size_t ln = __LINE__, Throwable next = null)
{
super(msg, fn, ln, next);
}
}
// Legacy implementation
enum Mangle : char
{
Tvoid = 'v',
Tbool = 'b',
Tbyte = 'g',
Tubyte = 'h',
Tshort = 's',
Tushort = 't',
Tint = 'i',
Tuint = 'k',
Tlong = 'l',
Tulong = 'm',
Tfloat = 'f',
Tdouble = 'd',
Treal = 'e',
Tifloat = 'o',
Tidouble = 'p',
Tireal = 'j',
Tcfloat = 'q',
Tcdouble = 'r',
Tcreal = 'c',
Tchar = 'a',
Twchar = 'u',
Tdchar = 'w',
Tarray = 'A',
Tsarray = 'G',
Taarray = 'H',
Tpointer = 'P',
Tfunction = 'F',
Tident = 'I',
Tclass = 'C',
Tstruct = 'S',
Tenum = 'E',
Ttypedef = 'T',
Tdelegate = 'D',
Tconst = 'x',
Timmutable = 'y',
}
// return the TypeInfo for a primitive type and null otherwise. This
// is required since for arrays of ints we only have the mangled char
// to work from. If arrays always subclassed TypeInfo_Array this
// routine could go away.
private TypeInfo primitiveTypeInfo(Mangle m)
{
// BUG: should fix this in static this() to avoid double checked locking bug
__gshared TypeInfo[Mangle] dic;
if (!dic.length)
{
dic = [
Mangle.Tvoid : typeid(void),
Mangle.Tbool : typeid(bool),
Mangle.Tbyte : typeid(byte),
Mangle.Tubyte : typeid(ubyte),
Mangle.Tshort : typeid(short),
Mangle.Tushort : typeid(ushort),
Mangle.Tint : typeid(int),
Mangle.Tuint : typeid(uint),
Mangle.Tlong : typeid(long),
Mangle.Tulong : typeid(ulong),
Mangle.Tfloat : typeid(float),
Mangle.Tdouble : typeid(double),
Mangle.Treal : typeid(real),
Mangle.Tifloat : typeid(ifloat),
Mangle.Tidouble : typeid(idouble),
Mangle.Tireal : typeid(ireal),
Mangle.Tcfloat : typeid(cfloat),
Mangle.Tcdouble : typeid(cdouble),
Mangle.Tcreal : typeid(creal),
Mangle.Tchar : typeid(char),
Mangle.Twchar : typeid(wchar),
Mangle.Tdchar : typeid(dchar)
];
}
auto p = m in dic;
return p ? *p : null;
}
// This stuff has been removed from the docs and is planned for deprecation.
/*
* Interprets variadic argument list pointed to by argptr whose types
* are given by arguments[], formats them according to embedded format
* strings in the variadic argument list, and sends the resulting
* characters to putc.
*
* The variadic arguments are consumed in order. Each is formatted
* into a sequence of chars, using the default format specification
* for its type, and the characters are sequentially passed to putc.
* If a $(D char[]), $(D wchar[]), or $(D dchar[]) argument is
* encountered, it is interpreted as a format string. As many
* arguments as specified in the format string are consumed and
* formatted according to the format specifications in that string and
* passed to putc. If there are too few remaining arguments, a
* $(D FormatException) is thrown. If there are more remaining arguments than
* needed by the format specification, the default processing of
* arguments resumes until they are all consumed.
*
* Params:
* putc = Output is sent do this delegate, character by character.
* arguments = Array of $(D TypeInfo)s, one for each argument to be formatted.
* argptr = Points to variadic argument list.
*
* Throws:
* Mismatched arguments and formats result in a $(D FormatException) being thrown.
*
* Format_String:
* $(I Format strings)
* consist of characters interspersed with
* $(I format specifications). Characters are simply copied
* to the output (such as putc) after any necessary conversion
* to the corresponding UTF-8 sequence.
*
* A $(I format specification) starts with a '%' character,
* and has the following grammar:
$(CONSOLE
$(I FormatSpecification):
$(B '%%')
$(B '%') $(I Flags) $(I Width) $(I Precision) $(I FormatChar)
$(I Flags):
$(I empty)
$(B '-') $(I Flags)
$(B '+') $(I Flags)
$(B '#') $(I Flags)
$(B '0') $(I Flags)
$(B ' ') $(I Flags)
$(I Width):
$(I empty)
$(I Integer)
$(B '*')
$(I Precision):
$(I empty)
$(B '.')
$(B '.') $(I Integer)
$(B '.*')
$(I Integer):
$(I Digit)
$(I Digit) $(I Integer)
$(I Digit):
$(B '0')
$(B '1')
$(B '2')
$(B '3')
$(B '4')
$(B '5')
$(B '6')
$(B '7')
$(B '8')
$(B '9')
$(I FormatChar):
$(B 's')
$(B 'b')
$(B 'd')
$(B 'o')
$(B 'x')
$(B 'X')
$(B 'e')
$(B 'E')
$(B 'f')
$(B 'F')
$(B 'g')
$(B 'G')
$(B 'a')
$(B 'A')
)
$(DL
$(DT $(I Flags))
$(DL
$(DT $(B '-'))
$(DD
Left justify the result in the field.
It overrides any $(B 0) flag.)
$(DT $(B '+'))
$(DD Prefix positive numbers in a signed conversion with a $(B +).
It overrides any $(I space) flag.)
$(DT $(B '#'))
$(DD Use alternative formatting:
$(DL
$(DT For $(B 'o'):)
$(DD Add to precision as necessary so that the first digit
of the octal formatting is a '0', even if both the argument
and the $(I Precision) are zero.)
$(DT For $(B 'x') ($(B 'X')):)
$(DD If non-zero, prefix result with $(B 0x) ($(B 0X)).)
$(DT For floating point formatting:)
$(DD Always insert the decimal point.)
$(DT For $(B 'g') ($(B 'G')):)
$(DD Do not elide trailing zeros.)
))
$(DT $(B '0'))
$(DD For integer and floating point formatting when not nan or
infinity, use leading zeros
to pad rather than spaces.
Ignore if there's a $(I Precision).)
$(DT $(B ' '))
$(DD Prefix positive numbers in a signed conversion with a space.)
)
$(DT $(I Width))
$(DD
Specifies the minimum field width.
If the width is a $(B *), the next argument, which must be
of type $(B int), is taken as the width.
If the width is negative, it is as if the $(B -) was given
as a $(I Flags) character.)
$(DT $(I Precision))
$(DD Gives the precision for numeric conversions.
If the precision is a $(B *), the next argument, which must be
of type $(B int), is taken as the precision. If it is negative,
it is as if there was no $(I Precision).)
$(DT $(I FormatChar))
$(DD
$(DL
$(DT $(B 's'))
$(DD The corresponding argument is formatted in a manner consistent
with its type:
$(DL
$(DT $(B bool))
$(DD The result is 'true' or 'false'.)
$(DT integral types)
$(DD The $(B %d) format is used.)
$(DT floating point types)
$(DD The $(B %g) format is used.)
$(DT string types)
$(DD The result is the string converted to UTF-8.)
A $(I Precision) specifies the maximum number of characters
to use in the result.
$(DT classes derived from $(B Object))
$(DD The result is the string returned from the class instance's
$(B .toString()) method.
A $(I Precision) specifies the maximum number of characters
to use in the result.)
$(DT non-string static and dynamic arrays)
$(DD The result is [s0, s1, ...]
where sk is the kth element
formatted with the default format.)
))
$(DT $(B 'b','d','o','x','X'))
$(DD The corresponding argument must be an integral type
and is formatted as an integer. If the argument is a signed type
and the $(I FormatChar) is $(B d) it is converted to
a signed string of characters, otherwise it is treated as
unsigned. An argument of type $(B bool) is formatted as '1'
or '0'. The base used is binary for $(B b), octal for $(B o),
decimal
for $(B d), and hexadecimal for $(B x) or $(B X).
$(B x) formats using lower case letters, $(B X) uppercase.
If there are fewer resulting digits than the $(I Precision),
leading zeros are used as necessary.
If the $(I Precision) is 0 and the number is 0, no digits
result.)
$(DT $(B 'e','E'))
$(DD A floating point number is formatted as one digit before
the decimal point, $(I Precision) digits after, the $(I FormatChar),
±, followed by at least a two digit exponent: $(I d.dddddd)e$(I ±dd).
If there is no $(I Precision), six
digits are generated after the decimal point.
If the $(I Precision) is 0, no decimal point is generated.)
$(DT $(B 'f','F'))
$(DD A floating point number is formatted in decimal notation.
The $(I Precision) specifies the number of digits generated
after the decimal point. It defaults to six. At least one digit
is generated before the decimal point. If the $(I Precision)
is zero, no decimal point is generated.)
$(DT $(B 'g','G'))
$(DD A floating point number is formatted in either $(B e) or
$(B f) format for $(B g); $(B E) or $(B F) format for
$(B G).
The $(B f) format is used if the exponent for an $(B e) format
is greater than -5 and less than the $(I Precision).
The $(I Precision) specifies the number of significant
digits, and defaults to six.
Trailing zeros are elided after the decimal point, if the fractional
part is zero then no decimal point is generated.)
$(DT $(B 'a','A'))
$(DD A floating point number is formatted in hexadecimal
exponential notation 0x$(I h.hhhhhh)p$(I ±d).
There is one hexadecimal digit before the decimal point, and as
many after as specified by the $(I Precision).
If the $(I Precision) is zero, no decimal point is generated.
If there is no $(I Precision), as many hexadecimal digits as
necessary to exactly represent the mantissa are generated.
The exponent is written in as few digits as possible,
but at least one, is in decimal, and represents a power of 2 as in
$(I h.hhhhhh)*2$(I ±d).
The exponent for zero is zero.
The hexadecimal digits, x and p are in upper case if the
$(I FormatChar) is upper case.)
)
Floating point NaN's are formatted as $(B nan) if the
$(I FormatChar) is lower case, or $(B NAN) if upper.
Floating point infinities are formatted as $(B inf) or
$(B infinity) if the
$(I FormatChar) is lower case, or $(B INF) or $(B INFINITY) if upper.
))
Example:
-------------------------
import core.stdc.stdio;
import std.format;
void myPrint(...)
{
void putc(dchar c)
{
fputc(c, stdout);
}
std.format.doFormat(&putc, _arguments, _argptr);
}
void main()
{
int x = 27;
// prints 'The answer is 27:6'
myPrint("The answer is %s:", x, 6);
}
------------------------
*/
void doFormat()(scope void delegate(dchar) putc, TypeInfo[] arguments, va_list ap)
{
import std.utf : encode, toUCSindex, isValidDchar, UTFException, toUTF8;
import core.stdc.string : strlen;
import core.stdc.stdlib : alloca, malloc, realloc, free;
import core.stdc.stdio : snprintf;
size_t bufLength = 1024;
void* argBuffer = malloc(bufLength);
scope(exit) free(argBuffer);
size_t bufUsed = 0;
foreach (ti; arguments)
{
// Ensure the required alignment
bufUsed += ti.talign - 1;
bufUsed -= (cast(size_t)argBuffer + bufUsed) & (ti.talign - 1);
auto pos = bufUsed;
// Align to next word boundary
bufUsed += ti.tsize + size_t.sizeof - 1;
bufUsed -= (cast(size_t)argBuffer + bufUsed) & (size_t.sizeof - 1);
// Resize buffer if necessary
while (bufUsed > bufLength)
{
bufLength *= 2;
argBuffer = realloc(argBuffer, bufLength);
}
// Copy argument into buffer
va_arg(ap, ti, argBuffer + pos);
}
auto argptr = argBuffer;
void* skipArg(TypeInfo ti)
{
// Ensure the required alignment
argptr += ti.talign - 1;
argptr -= cast(size_t)argptr & (ti.talign - 1);
auto p = argptr;
// Align to next word boundary
argptr += ti.tsize + size_t.sizeof - 1;
argptr -= cast(size_t)argptr & (size_t.sizeof - 1);
return p;
}
auto getArg(T)()
{
return *cast(T*)skipArg(typeid(T));
}
TypeInfo ti;
Mangle m;
uint flags;
int field_width;
int precision;
enum : uint
{
FLdash = 1,
FLplus = 2,
FLspace = 4,
FLhash = 8,
FLlngdbl = 0x20,
FL0pad = 0x40,
FLprecision = 0x80,
}
static TypeInfo skipCI(TypeInfo valti)
{
for (;;)
{
if (typeid(valti).name.length == 18 &&
typeid(valti).name[9..18] == "Invariant")
valti = (cast(TypeInfo_Invariant)valti).base;
else if (typeid(valti).name.length == 14 &&
typeid(valti).name[9..14] == "Const")
valti = (cast(TypeInfo_Const)valti).base;
else
break;
}
return valti;
}
void formatArg(char fc)
{
bool vbit;
ulong vnumber;
char vchar;
dchar vdchar;
Object vobject;
real vreal;
creal vcreal;
Mangle m2;
int signed = 0;
uint base = 10;
int uc;
char[ulong.sizeof * 8] tmpbuf; // long enough to print long in binary
const(char)* prefix = "";
string s;
void putstr(const char[] s)
{
//printf("putstr: s = %.*s, flags = x%x\n", s.length, s.ptr, flags);
ptrdiff_t padding = field_width -
(strlen(prefix) + toUCSindex(s, s.length));
ptrdiff_t prepad = 0;
ptrdiff_t postpad = 0;
if (padding > 0)
{
if (flags & FLdash)
postpad = padding;
else
prepad = padding;
}
if (flags & FL0pad)
{
while (*prefix)
putc(*prefix++);
while (prepad--)
putc('0');
}
else
{
while (prepad--)
putc(' ');
while (*prefix)
putc(*prefix++);
}
foreach (dchar c; s)
putc(c);
while (postpad--)
putc(' ');
}
void putreal(real v)
{
//printf("putreal %Lg\n", vreal);
switch (fc)
{
case 's':
fc = 'g';
break;
case 'f', 'F', 'e', 'E', 'g', 'G', 'a', 'A':
break;
default:
//printf("fc = '%c'\n", fc);
Lerror:
throw new FormatException("incompatible format character for floating point type");
}
version (DigitalMarsC)
{
uint sl;
char[] fbuf = tmpbuf;
if (!(flags & FLprecision))
precision = 6;
while (1)
{
sl = fbuf.length;
prefix = (*__pfloatfmt)(fc, flags | FLlngdbl,
precision, &v, cast(char*)fbuf, &sl, field_width);
if (sl != -1)
break;
sl = fbuf.length * 2;
fbuf = (cast(char*)alloca(sl * char.sizeof))[0 .. sl];
}
putstr(fbuf[0 .. sl]);
}
else
{
ptrdiff_t sl;
char[] fbuf = tmpbuf;
char[12] format;
format[0] = '%';
int i = 1;
if (flags & FLdash)
format[i++] = '-';
if (flags & FLplus)
format[i++] = '+';
if (flags & FLspace)
format[i++] = ' ';
if (flags & FLhash)
format[i++] = '#';
if (flags & FL0pad)
format[i++] = '0';
format[i + 0] = '*';
format[i + 1] = '.';
format[i + 2] = '*';
format[i + 3] = 'L';
format[i + 4] = fc;
format[i + 5] = 0;
if (!(flags & FLprecision))
precision = -1;
while (1)
{
sl = fbuf.length;
int n;
version (CRuntime_Microsoft)
{
import std.math : isNaN, isInfinity;
if (isNaN(v)) // snprintf writes 1.#QNAN
n = snprintf(fbuf.ptr, sl, "nan");
else if (isInfinity(v)) // snprintf writes 1.#INF
n = snprintf(fbuf.ptr, sl, v < 0 ? "-inf" : "inf");
else
n = snprintf(fbuf.ptr, sl, format.ptr, field_width,
precision, cast(double)v);
}
else
n = snprintf(fbuf.ptr, sl, format.ptr, field_width,
precision, v);
//printf("format = '%s', n = %d\n", cast(char*)format, n);
if (n >= 0 && n < sl)
{ sl = n;
break;
}
if (n < 0)
sl = sl * 2;
else
sl = n + 1;
fbuf = (cast(char*)alloca(sl * char.sizeof))[0 .. sl];
}
putstr(fbuf[0 .. sl]);
}
return;
}
static Mangle getMan(TypeInfo ti)
{
auto m = cast(Mangle)typeid(ti).name[9];
if (typeid(ti).name.length == 20 &&
typeid(ti).name[9..20] == "StaticArray")
m = cast(Mangle)'G';
return m;
}
/* p = pointer to the first element in the array
* len = number of elements in the array
* valti = type of the elements
*/
void putArray(void* p, size_t len, TypeInfo valti)
{
//printf("\nputArray(len = %u), tsize = %u\n", len, valti.tsize);
putc('[');
valti = skipCI(valti);
size_t tsize = valti.tsize;
auto argptrSave = argptr;
auto tiSave = ti;
auto mSave = m;
ti = valti;
//printf("\n%.*s\n", typeid(valti).name.length, typeid(valti).name.ptr);
m = getMan(valti);
while (len--)
{
//doFormat(putc, (&valti)[0 .. 1], p);
argptr = p;
formatArg('s');
p += tsize;
if (len > 0) putc(',');
}
m = mSave;
ti = tiSave;
argptr = argptrSave;
putc(']');
}
void putAArray(ubyte[long] vaa, TypeInfo valti, TypeInfo keyti)
{
putc('[');
bool comma=false;
auto argptrSave = argptr;
auto tiSave = ti;
auto mSave = m;
valti = skipCI(valti);
keyti = skipCI(keyti);
foreach (ref fakevalue; vaa)
{
if (comma) putc(',');
comma = true;
void *pkey = &fakevalue;
version (D_LP64)
pkey -= (long.sizeof + 15) & ~(15);
else
pkey -= (long.sizeof + size_t.sizeof - 1) & ~(size_t.sizeof - 1);
// the key comes before the value
auto keysize = keyti.tsize;
version (D_LP64)
auto keysizet = (keysize + 15) & ~(15);
else
auto keysizet = (keysize + size_t.sizeof - 1) & ~(size_t.sizeof - 1);
void* pvalue = pkey + keysizet;
//doFormat(putc, (&keyti)[0..1], pkey);
m = getMan(keyti);
argptr = pkey;
ti = keyti;
formatArg('s');
putc(':');
//doFormat(putc, (&valti)[0..1], pvalue);
m = getMan(valti);
argptr = pvalue;
ti = valti;
formatArg('s');
}
m = mSave;
ti = tiSave;
argptr = argptrSave;
putc(']');
}
//printf("formatArg(fc = '%c', m = '%c')\n", fc, m);
int mi;
switch (m)
{
case Mangle.Tbool:
vbit = getArg!(bool)();
if (fc != 's')
{ vnumber = vbit;
goto Lnumber;
}
putstr(vbit ? "true" : "false");
return;
case Mangle.Tchar:
vchar = getArg!(char)();
if (fc != 's')
{ vnumber = vchar;
goto Lnumber;
}
L2:
putstr((&vchar)[0 .. 1]);
return;
case Mangle.Twchar:
vdchar = getArg!(wchar)();
goto L1;
case Mangle.Tdchar:
vdchar = getArg!(dchar)();
L1:
if (fc != 's')
{ vnumber = vdchar;
goto Lnumber;
}
if (vdchar <= 0x7F)
{ vchar = cast(char)vdchar;
goto L2;
}
else
{ if (!isValidDchar(vdchar))
throw new UTFException("invalid dchar in format");
char[4] vbuf;
putstr(vbuf[0 .. encode(vbuf, vdchar)]);
}
return;
case Mangle.Tbyte:
signed = 1;
vnumber = getArg!(byte)();
goto Lnumber;
case Mangle.Tubyte:
vnumber = getArg!(ubyte)();
goto Lnumber;
case Mangle.Tshort:
signed = 1;
vnumber = getArg!(short)();
goto Lnumber;
case Mangle.Tushort:
vnumber = getArg!(ushort)();
goto Lnumber;
case Mangle.Tint:
signed = 1;
vnumber = getArg!(int)();
goto Lnumber;
case Mangle.Tuint:
Luint:
vnumber = getArg!(uint)();
goto Lnumber;
case Mangle.Tlong:
signed = 1;
vnumber = cast(ulong)getArg!(long)();
goto Lnumber;
case Mangle.Tulong:
Lulong:
vnumber = getArg!(ulong)();
goto Lnumber;
case Mangle.Tclass:
vobject = getArg!(Object)();
if (vobject is null)
s = "null";
else
s = vobject.toString();
goto Lputstr;
case Mangle.Tpointer:
vnumber = cast(ulong)getArg!(void*)();
if (fc != 'x') uc = 1;
flags |= FL0pad;
if (!(flags & FLprecision))
{ flags |= FLprecision;
precision = (void*).sizeof;
}
base = 16;
goto Lnumber;
case Mangle.Tfloat:
case Mangle.Tifloat:
if (fc == 'x' || fc == 'X')
goto Luint;
vreal = getArg!(float)();
goto Lreal;
case Mangle.Tdouble:
case Mangle.Tidouble:
if (fc == 'x' || fc == 'X')
goto Lulong;
vreal = getArg!(double)();
goto Lreal;
case Mangle.Treal:
case Mangle.Tireal:
vreal = getArg!(real)();
goto Lreal;
case Mangle.Tcfloat:
vcreal = getArg!(cfloat)();
goto Lcomplex;
case Mangle.Tcdouble:
vcreal = getArg!(cdouble)();
goto Lcomplex;
case Mangle.Tcreal:
vcreal = getArg!(creal)();
goto Lcomplex;
case Mangle.Tsarray:
putArray(argptr, (cast(TypeInfo_StaticArray)ti).len, (cast(TypeInfo_StaticArray)ti).next);
return;
case Mangle.Tarray:
mi = 10;
if (typeid(ti).name.length == 14 &&
typeid(ti).name[9..14] == "Array")
{ // array of non-primitive types
TypeInfo tn = (cast(TypeInfo_Array)ti).next;
tn = skipCI(tn);
switch (cast(Mangle)typeid(tn).name[9])
{
case Mangle.Tchar: goto LarrayChar;
case Mangle.Twchar: goto LarrayWchar;
case Mangle.Tdchar: goto LarrayDchar;
default:
break;
}
void[] va = getArg!(void[])();
putArray(va.ptr, va.length, tn);
return;
}
if (typeid(ti).name.length == 25 &&
typeid(ti).name[9..25] == "AssociativeArray")
{ // associative array
ubyte[long] vaa = getArg!(ubyte[long])();
putAArray(vaa,
(cast(TypeInfo_AssociativeArray)ti).next,
(cast(TypeInfo_AssociativeArray)ti).key);
return;
}
while (1)
{
m2 = cast(Mangle)typeid(ti).name[mi];
switch (m2)
{
case Mangle.Tchar:
LarrayChar:
s = getArg!(string)();
goto Lputstr;
case Mangle.Twchar:
LarrayWchar:
wchar[] sw = getArg!(wchar[])();
s = toUTF8(sw);
goto Lputstr;
case Mangle.Tdchar:
LarrayDchar:
s = toUTF8(getArg!(dstring)());
Lputstr:
if (fc != 's')
throw new FormatException("string");
if (flags & FLprecision && precision < s.length)
s = s[0 .. precision];
putstr(s);
break;
case Mangle.Tconst:
case Mangle.Timmutable:
mi++;
continue;
default:
TypeInfo ti2 = primitiveTypeInfo(m2);
if (!ti2)
goto Lerror;
void[] va = getArg!(void[])();
putArray(va.ptr, va.length, ti2);
}
return;
}
assert(0);
case Mangle.Tenum:
ti = (cast(TypeInfo_Enum)ti).base;
m = cast(Mangle)typeid(ti).name[9];
formatArg(fc);
return;
case Mangle.Tstruct:
{ TypeInfo_Struct tis = cast(TypeInfo_Struct)ti;
if (tis.xtoString is null)
throw new FormatException("Can't convert " ~ tis.toString()
~ " to string: \"string toString()\" not defined");
s = tis.xtoString(skipArg(tis));
goto Lputstr;
}
default:
goto Lerror;
}
Lnumber:
switch (fc)
{
case 's':
case 'd':
if (signed)
{ if (cast(long)vnumber < 0)
{ prefix = "-";
vnumber = -vnumber;
}
else if (flags & FLplus)
prefix = "+";
else if (flags & FLspace)
prefix = " ";
}
break;
case 'b':
signed = 0;
base = 2;
break;
case 'o':
signed = 0;
base = 8;
break;
case 'X':
uc = 1;
if (flags & FLhash && vnumber)
prefix = "0X";
signed = 0;
base = 16;
break;
case 'x':
if (flags & FLhash && vnumber)
prefix = "0x";
signed = 0;
base = 16;
break;
default:
goto Lerror;
}
if (!signed)
{
switch (m)
{
case Mangle.Tbyte:
vnumber &= 0xFF;
break;
case Mangle.Tshort:
vnumber &= 0xFFFF;
break;
case Mangle.Tint:
vnumber &= 0xFFFFFFFF;
break;
default:
break;
}
}
if (flags & FLprecision && fc != 'p')
flags &= ~FL0pad;
if (vnumber < base)
{
if (vnumber == 0 && precision == 0 && flags & FLprecision &&
!(fc == 'o' && flags & FLhash))
{
putstr(null);
return;
}
if (precision == 0 || !(flags & FLprecision))
{ vchar = cast(char)('0' + vnumber);
if (vnumber < 10)
vchar = cast(char)('0' + vnumber);
else
vchar = cast(char)((uc ? 'A' - 10 : 'a' - 10) + vnumber);
goto L2;
}
}
{
ptrdiff_t n = tmpbuf.length;
char c;
int hexoffset = uc ? ('A' - ('9' + 1)) : ('a' - ('9' + 1));
while (vnumber)
{
c = cast(char)((vnumber % base) + '0');
if (c > '9')
c += hexoffset;
vnumber /= base;
tmpbuf[--n] = c;
}
if (tmpbuf.length - n < precision && precision < tmpbuf.length)
{
ptrdiff_t m = tmpbuf.length - precision;
tmpbuf[m .. n] = '0';
n = m;
}
else if (flags & FLhash && fc == 'o')
prefix = "0";
putstr(tmpbuf[n .. tmpbuf.length]);
return;
}
Lreal:
putreal(vreal);
return;
Lcomplex:
putreal(vcreal.re);
if (vcreal.im >= 0)
{
putc('+');
}
putreal(vcreal.im);
putc('i');
return;
Lerror:
throw new FormatException("formatArg");
}
for (int j = 0; j < arguments.length; )
{
ti = arguments[j++];
//printf("arg[%d]: '%.*s' %d\n", j, typeid(ti).name.length, typeid(ti).name.ptr, typeid(ti).name.length);
//ti.print();
flags = 0;
precision = 0;
field_width = 0;
ti = skipCI(ti);
int mi = 9;
do
{
if (typeid(ti).name.length <= mi)
goto Lerror;
m = cast(Mangle)typeid(ti).name[mi++];
} while (m == Mangle.Tconst || m == Mangle.Timmutable);
if (m == Mangle.Tarray)
{
if (typeid(ti).name.length == 14 &&
typeid(ti).name[9..14] == "Array")
{
TypeInfo tn = (cast(TypeInfo_Array)ti).next;
tn = skipCI(tn);
switch (cast(Mangle)typeid(tn).name[9])
{
case Mangle.Tchar:
case Mangle.Twchar:
case Mangle.Tdchar:
ti = tn;
mi = 9;
break;
default:
break;
}
}
L1:
Mangle m2 = cast(Mangle)typeid(ti).name[mi];
string fmt; // format string
wstring wfmt;
dstring dfmt;
/* For performance reasons, this code takes advantage of the
* fact that most format strings will be ASCII, and that the
* format specifiers are always ASCII. This means we only need
* to deal with UTF in a couple of isolated spots.
*/
switch (m2)
{
case Mangle.Tchar:
fmt = getArg!(string)();
break;
case Mangle.Twchar:
wfmt = getArg!(wstring)();
fmt = toUTF8(wfmt);
break;
case Mangle.Tdchar:
dfmt = getArg!(dstring)();
fmt = toUTF8(dfmt);
break;
case Mangle.Tconst:
case Mangle.Timmutable:
mi++;
goto L1;
default:
formatArg('s');
continue;
}
for (size_t i = 0; i < fmt.length; )
{ dchar c = fmt[i++];
dchar getFmtChar()
{ // Valid format specifier characters will never be UTF
if (i == fmt.length)
throw new FormatException("invalid specifier");
return fmt[i++];
}
int getFmtInt()
{ int n;
while (1)
{
n = n * 10 + (c - '0');
if (n < 0) // overflow
throw new FormatException("int overflow");
c = getFmtChar();
if (c < '0' || c > '9')
break;
}
return n;
}
int getFmtStar()
{ Mangle m;
TypeInfo ti;
if (j == arguments.length)
throw new FormatException("too few arguments");
ti = arguments[j++];
m = cast(Mangle)typeid(ti).name[9];
if (m != Mangle.Tint)
throw new FormatException("int argument expected");
return getArg!(int)();
}
if (c != '%')
{
if (c > 0x7F) // if UTF sequence
{
i--; // back up and decode UTF sequence
import std.utf : decode;
c = decode(fmt, i);
}
Lputc:
putc(c);
continue;
}
// Get flags {-+ #}
flags = 0;
while (1)
{
c = getFmtChar();
switch (c)
{
case '-': flags |= FLdash; continue;
case '+': flags |= FLplus; continue;
case ' ': flags |= FLspace; continue;
case '#': flags |= FLhash; continue;
case '0': flags |= FL0pad; continue;
case '%': if (flags == 0)
goto Lputc;
break;
default: break;
}
break;
}
// Get field width
field_width = 0;
if (c == '*')
{
field_width = getFmtStar();
if (field_width < 0)
{ flags |= FLdash;
field_width = -field_width;
}
c = getFmtChar();
}
else if (c >= '0' && c <= '9')
field_width = getFmtInt();
if (flags & FLplus)
flags &= ~FLspace;
if (flags & FLdash)
flags &= ~FL0pad;
// Get precision
precision = 0;
if (c == '.')
{ flags |= FLprecision;
//flags &= ~FL0pad;
c = getFmtChar();
if (c == '*')
{
precision = getFmtStar();
if (precision < 0)
{ precision = 0;
flags &= ~FLprecision;
}
c = getFmtChar();
}
else if (c >= '0' && c <= '9')
precision = getFmtInt();
}
if (j == arguments.length)
goto Lerror;
ti = arguments[j++];
ti = skipCI(ti);
mi = 9;
do
{
m = cast(Mangle)typeid(ti).name[mi++];
} while (m == Mangle.Tconst || m == Mangle.Timmutable);
if (c > 0x7F) // if UTF sequence
goto Lerror; // format specifiers can't be UTF
formatArg(cast(char)c);
}
}
else
{
formatArg('s');
}
}
return;
Lerror:
throw new FormatException();
}
private bool needToSwapEndianess(Char)(ref FormatSpec!Char f)
{
import std.system : endian, Endian;
return endian == Endian.littleEndian && f.flPlus
|| endian == Endian.bigEndian && f.flDash;
}
unittest
{
string res;
void putc(dchar c)
{
res ~= c;
}
void myPrint(...)
{
undead.doformat.doFormat(&putc, _arguments, _argptr);
}
myPrint("The answer is %s:", 27, 6);
assert(res == "The answer is 27:6");
}
undeaD-1.0.10/src/undead/internal/ 0000775 0000000 0000000 00000000000 13463741136 0016644 5 ustar 00root root 0000000 0000000 undeaD-1.0.10/src/undead/internal/file.d 0000664 0000000 0000000 00000001234 13463741136 0017730 0 ustar 00root root 0000000 0000000 // Written in the D programming language
module undead.internal.file;
// Copied from std.file. undead doesn't have access to it, but some modules
// in undead used std.file.deleteme when they were in Phobos, so this gives
// them access to a version of it.
public @property string deleteme() @safe
{
import std.conv : to;
import std.file : tempDir;
import std.path : buildPath;
import std.process : thisProcessID;
static _deleteme = "deleteme.dmd.unittest.pid";
static _first = true;
if(_first)
{
_deleteme = buildPath(tempDir(), _deleteme) ~ to!string(thisProcessID);
_first = false;
}
return _deleteme;
}
undeaD-1.0.10/src/undead/metastrings.d 0000664 0000000 0000000 00000011306 13463741136 0017536 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
/**
Templates with which to do compile-time manipulation of strings.
Macros:
WIKI = Phobos/StdMetastrings
Copyright: Copyright Digital Mars 2007 - 2009.
License: Boost License 1.0.
Authors: $(WEB digitalmars.com, Walter Bright),
Don Clugston
Source: $(PHOBOSSRC std/_metastrings.d)
*/
/*
Copyright Digital Mars 2007 - 2009.
Distributed under the Boost Software License, Version 1.0.
(See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
*/
module undead.metastrings;
/**
Formats constants into a string at compile time. Analogous to $(XREF
string,format).
Parameters:
A = tuple of constants, which can be strings, characters, or integral
values.
Formats:
* The formats supported are %s for strings, and %%
* for the % character.
Example:
---
import std.metastrings;
import std.stdio;
void main()
{
string s = Format!("Arg %s = %s", "foo", 27);
writefln(s); // "Arg foo = 27"
}
* ---
*/
template Format(A...)
{
static if (A.length == 0)
enum Format = "";
else static if (is(typeof(A[0]) : const(char)[]))
enum Format = FormatString!(A[0], A[1..$]);
else
enum Format = toStringNow!(A[0]) ~ Format!(A[1..$]);
}
template FormatString(const(char)[] F, A...)
{
static if (F.length == 0)
enum FormatString = Format!(A);
else static if (F.length == 1)
enum FormatString = F[0] ~ Format!(A);
else static if (F[0..2] == "%s")
enum FormatString
= toStringNow!(A[0]) ~ FormatString!(F[2..$],A[1..$]);
else static if (F[0..2] == "%%")
enum FormatString = "%" ~ FormatString!(F[2..$],A);
else
{
static assert(F[0] != '%', "unrecognized format %" ~ F[1]);
enum FormatString = F[0] ~ FormatString!(F[1..$],A);
}
}
unittest
{
auto s = Format!("hel%slo", "world", -138, 'c', true);
assert(s == "helworldlo-138ctrue", "[" ~ s ~ "]");
}
/**
* Convert constant argument to a string.
*/
template toStringNow(ulong v)
{
static if (v < 10)
enum toStringNow = "" ~ cast(char)(v + '0');
else
enum toStringNow = toStringNow!(v / 10) ~ toStringNow!(v % 10);
}
unittest
{
static assert(toStringNow!(1uL << 62) == "4611686018427387904");
}
/// ditto
template toStringNow(long v)
{
static if (v < 0)
enum toStringNow = "-" ~ toStringNow!(cast(ulong) -v);
else
enum toStringNow = toStringNow!(cast(ulong) v);
}
unittest
{
static assert(toStringNow!(0x100000000) == "4294967296");
static assert(toStringNow!(-138L) == "-138");
}
/// ditto
template toStringNow(uint U)
{
enum toStringNow = toStringNow!(cast(ulong)U);
}
/// ditto
template toStringNow(int I)
{
enum toStringNow = toStringNow!(cast(long)I);
}
/// ditto
template toStringNow(bool B)
{
enum toStringNow = B ? "true" : "false";
}
/// ditto
template toStringNow(string S)
{
enum toStringNow = S;
}
/// ditto
template toStringNow(char C)
{
enum toStringNow = "" ~ C;
}
/********
* Parse unsigned integer literal from the start of string s.
* returns:
* .value = the integer literal as a string,
* .rest = the string following the integer literal
* Otherwise:
* .value = null,
* .rest = s
*/
template parseUinteger(const(char)[] s)
{
static if (s.length == 0)
{
enum value = "";
enum rest = "";
}
else static if (s[0] >= '0' && s[0] <= '9')
{
enum value = s[0] ~ parseUinteger!(s[1..$]).value;
enum rest = parseUinteger!(s[1..$]).rest;
}
else
{
enum value = "";
enum rest = s;
}
}
/********
Parse integer literal optionally preceded by $(D '-') from the start
of string $(D s).
Returns:
.value = the integer literal as a string,
.rest = the string following the integer literal
Otherwise:
.value = null,
.rest = s
*/
template parseInteger(const(char)[] s)
{
static if (s.length == 0)
{
enum value = "";
enum rest = "";
}
else static if (s[0] >= '0' && s[0] <= '9')
{
enum value = s[0] ~ parseUinteger!(s[1..$]).value;
enum rest = parseUinteger!(s[1..$]).rest;
}
else static if (s.length >= 2 &&
s[0] == '-' && s[1] >= '0' && s[1] <= '9')
{
enum value = s[0..2] ~ parseUinteger!(s[2..$]).value;
enum rest = parseUinteger!(s[2..$]).rest;
}
else
{
enum value = "";
enum rest = s;
}
}
unittest
{
assert(parseUinteger!("1234abc").value == "1234");
assert(parseUinteger!("1234abc").rest == "abc");
assert(parseInteger!("-1234abc").value == "-1234");
assert(parseInteger!("-1234abc").rest == "abc");
}
undeaD-1.0.10/src/undead/regexp.d 0000664 0000000 0000000 00000303672 13463741136 0016502 0 ustar 00root root 0000000 0000000 // Written in the D programming language.
// Regular Expressions.
/**
* $(RED Deprecated. It will be removed in February 2012.
* Please use $(LINK2 std_regex.html, std.regex) instead.)
*
* $(LINK2 http://www.digitalmars.com/ctg/regular.html, Regular
* expressions) are a powerful method of string pattern matching. The
* regular expression language used in this library is the same as
* that commonly used, however, some of the very advanced forms may
* behave slightly differently. The standard observed is the $(WEB
* www.ecma-international.org/publications/standards/Ecma-262.htm,
* ECMA standard) for regular expressions.
*
* undead.regexp is designed to work only with valid UTF strings as input.
* To validate untrusted input, use std.utf.validate().
*
* In the following guide, $(I pattern)[] refers to a
* $(LINK2 http://www.digitalmars.com/ctg/regular.html, regular expression).
* The $(I attributes)[] refers to
* a string controlling the interpretation
* of the regular expression.
* It consists of a sequence of one or more
* of the following characters:
*
*
* Attribute Characters
* $(TR $(TH Attribute) $(TH Action))
*
* $(TD $(B g))
* $(TD global; repeat over the whole input string)
*
*
* $(TD $(B i))
* $(TD case insensitive)
*
*
* $(TD $(B m))
* $(TD treat as multiple lines separated by newlines)
*
*
*
* The $(I format)[] string has the formatting characters:
*
*
* Formatting Characters
* $(TR $(TH Format) $(TH Replaced With))
* $(TR
* $(TD $(B $$)) $(TD $)
* )
* $(TR
* $(TD $(B $&)) $(TD The matched substring.)
* )
* $(TR
* $(TD $(B $`)) $(TD The portion of string that precedes the matched substring.)
* )
* $(TR
* $(TD $(B $')) $(TD The portion of string that follows the matched substring.)
* )
* $(TR
* $(TD $(B $(DOLLAR))$(I n)) $(TD The $(I n)th capture, where $(I n)
* is a single digit 1-9
* and $$(I n) is not followed by a decimal digit.)
* )
* $(TR
* $(TD $(B $(DOLLAR))$(I nn)) $(TD The $(I nn)th capture, where $(I nn)
* is a two-digit decimal
* number 01-99.
* If $(I nn)th capture is undefined or more than the number
* of parenthesized subexpressions, use the empty
* string instead.)
* )
*
*
* Any other $ are left as is.
*
* References:
* $(LINK2 http://en.wikipedia.org/wiki/Regular_expressions, Wikipedia)
* Macros:
* WIKI = StdRegexp
* DOLLAR = $
*
* Copyright: Copyright Digital Mars 2000 - 2011.
* License: Boost License 1.0.
* Authors: $(WEB digitalmars.com, Walter Bright)
* Source: $(PHOBOSSRC std/_regexp.d)
*/
/* Copyright Digital Mars 2000 - 2011.
* Distributed under the Boost Software License, Version 1.0.
* (See accompanying file LICENSE_1_0.txt or copy at
* http://www.boost.org/LICENSE_1_0.txt)
*/
/*
Escape sequences:
\nnn starts out a 1, 2 or 3 digit octal sequence,
where n is an octal digit. If nnn is larger than
0377, then the 3rd digit is not part of the sequence
and is not consumed.
For maximal portability, use exactly 3 digits.
\xXX starts out a 1 or 2 digit hex sequence. X
is a hex character. If the first character after the \x
is not a hex character, the value of the sequence is 'x'
and the XX are not consumed.
For maximal portability, use exactly 2 digits.
\uUUUU is a unicode sequence. There are exactly
4 hex characters after the \u, if any are not, then
the value of the sequence is 'u', and the UUUU are not
consumed.
Character classes:
[a-b], where a is greater than b, will produce
an error.
References:
http://www.unicode.org/unicode/reports/tr18/
*/
module undead.regexp;
//pragma(msg, "Notice: As of Phobos 2.055, std.regexp has been deprecated. " ~
// "It will be removed in February 2012. Please use std.regex instead.");
//debug = regexp; // uncomment to turn on debugging printf's
private
{
import core.stdc.stdio;
import core.stdc.stdlib;
import core.stdc.string;
import std.array;
import std.stdio;
import std.string;
import std.ascii;
import std.outbuffer;
import std.bitmanip;
import std.utf;
import std.algorithm;
import std.array;
import std.traits;
}
//deprecated:
/** Regular expression to extract an _email address.
* References:
* $(LINK2 http://www.regular-expressions.info/email.html, How to Find or Validate an Email Address)$(BR)
* $(LINK2 http://tools.ietf.org/html/rfc2822#section-3.4.1, RFC 2822 Internet Message Format)
*/
string email =
r"[a-zA-Z]([.]?([[a-zA-Z0-9_]-]+)*)?@([[a-zA-Z0-9_]\-_]+\.)+[a-zA-Z]{2,6}";
/** Regular expression to extract a _url */
string url = r"(([h|H][t|T]|[f|F])[t|T][p|P]([s|S]?)\:\/\/|~/|/)?([\w]+:\w+@)?(([a-zA-Z]{1}([\w\-]+\.)+([\w]{2,5}))(:[\d]{1,5})?)?((/?\w+/)+|/?)(\w+\.[\w]{3,4})?([,]\w+)*((\?\w+=\w+)?(&\w+=\w+)*([,]\w*)*)?";
/************************************
* One of these gets thrown on compilation errors
*/
class RegExpException : Exception
{
this(string msg)
{
super(msg);
}
}
struct regmatch_t
{
sizediff_t rm_so; // index of start of match
sizediff_t rm_eo; // index past end of match
}
private alias char rchar; // so we can make a wchar version
/******************************************************
* Search string for matches with regular expression
* pattern with attributes.
* Replace each match with string generated from format.
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* format = Replacement string format.
* attributes = Regular expression attributes.
* Returns:
* the resulting string
* Example:
* Replace the letters 'a' with the letters 'ZZ'.
* ---
* s = "Strap a rocket engine on a chicken."
* sub(s, "a", "ZZ") // result: StrZZp a rocket engine on a chicken.
* sub(s, "a", "ZZ", "g") // result: StrZZp ZZ rocket engine on ZZ chicken.
* ---
* The replacement format can reference the matches using
* the $&, $$, $', $`, $0 .. $99 notation:
* ---
* sub(s, "[ar]", "[$&]", "g") // result: St[r][a]p [a] [r]ocket engine on [a] chi
* ---
*/
string sub(string s, string pattern, string format, string attributes = null)
{
auto r = new RegExp(pattern, attributes);
auto result = r.replace(s, format);
r.destroy();
return result;
}
unittest
{
debug(regexp) printf("regexp.sub.unittest\n");
string r = sub("hello", "ll", "ss");
assert(r == "hesso");
}
/*******************************************************
* Search string for matches with regular expression
* pattern with attributes.
* Pass each match to delegate dg.
* Replace each match with the return value from dg.
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* dg = Delegate
* attributes = Regular expression attributes.
* Returns: the resulting string.
* Example:
* Capitalize the letters 'a' and 'r':
* ---
* s = "Strap a rocket engine on a chicken.";
* sub(s, "[ar]",
* delegate char[] (RegExp m)
* {
* return toUpper(m[0]);
* },
* "g"); // result: StRAp A Rocket engine on A chicken.
* ---
*/
string sub(string s, string pattern, string delegate(RegExp) dg, string attributes = null)
{
auto r = new RegExp(pattern, attributes);
string result = s;
size_t lastindex = 0;
size_t offset = 0;
while (r.test(s, lastindex))
{
auto so = r.pmatch[0].rm_so;
auto eo = r.pmatch[0].rm_eo;
string replacement = dg(r);
// Optimize by using std.string.replace if possible - Dave Fladebo
string slice = result[offset + so .. offset + eo];
if (r.attributes & RegExp.REA.global && // global, so replace all
!(r.attributes & RegExp.REA.ignoreCase) && // not ignoring case
!(r.attributes & RegExp.REA.multiline) && // not multiline
pattern == slice) // simple pattern (exact match, no special characters)
{
debug(regexp)
printf("result: %.*s, pattern: %.*s, slice: %.*s, replacement: %.*s\n",
result.length, result.ptr,
pattern.length, pattern.ptr,
slice.length, slice.ptr,
replacement.length, replacement.ptr);
result = replace(result,slice,replacement);
break;
}
result = replaceSlice(result, result[offset + so .. offset + eo], replacement);
if (r.attributes & RegExp.REA.global)
{
offset += replacement.length - (eo - so);
if (lastindex == eo)
lastindex++; // always consume some source
else
lastindex = eo;
}
else
break;
}
r.destroy();
return result;
}
unittest
{
debug(regexp) printf("regexp.sub.unittest\n");
string foo(RegExp r) { return "ss"; }
auto r = sub("hello", "ll", delegate string(RegExp r) { return "ss"; });
assert(r == "hesso");
r = sub("hello", "l", delegate string(RegExp r) { return "l"; }, "g");
assert(r == "hello");
auto s = sub("Strap a rocket engine on a chicken.",
"[ar]",
delegate string (RegExp m)
{
return std.string.toUpper(m[0]);
},
"g");
assert(s == "StRAp A Rocket engine on A chicken.");
}
/*************************************************
* Search $(D_PARAM s[]) for first match with $(D_PARAM pattern).
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* Returns:
* index into s[] of match if found, -1 if no match.
* Example:
* ---
* auto s = "abcabcabab";
* find(s, RegExp("b")); // match, returns 1
* find(s, RegExp("f")); // no match, returns -1
* ---
*/
sizediff_t find(string s, RegExp pattern)
{
return pattern.test(s)
? pattern.pmatch[0].rm_so
: -1;
}
unittest
{
debug(regexp) printf("regexp.find.unittest\n");
auto i = find("xabcy", RegExp("abc"));
assert(i == 1);
i = find("cba", RegExp("abc"));
assert(i == -1);
}
/**
Returns:
Same as $(D_PARAM find(s, RegExp(pattern, attributes))).
WARNING:
This function is scheduled for deprecation due to unnecessary
ambiguity with the homonym function in std.string. Instead of
$(D_PARAM undead.regexp.find(s, p, a)), you may want to use $(D_PARAM
find(s, RegExp(p, a))).
*/
sizediff_t
find(string s, string pattern, string attributes = null)
{
auto r = new RegExp(pattern, attributes);
scope(exit) r.destroy();
return r.test(s) ? r.pmatch[0].rm_so : -1;
}
unittest
{
debug(regexp) printf("regexp.find.unittest\n");
auto i = find("xabcy", "abc");
assert(i == 1);
i = find("cba", "abc");
assert(i == -1);
}
/*************************************************
* Search $(D_PARAM s[]) for last match with $(D_PARAM pattern).
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* Returns:
* index into s[] of match if found, -1 if no match.
* Example:
* ---
* auto s = "abcabcabab";
* rfind(s, RegExp("b")); // match, returns 9
* rfind(s, RegExp("f")); // no match, returns -1
* ---
*/
sizediff_t rfind(string s, RegExp pattern)
{
sizediff_t i = -1, lastindex = 0;
while (pattern.test(s, lastindex))
{
auto eo = pattern.pmatch[0].rm_eo;
i = pattern.pmatch[0].rm_so;
if (lastindex == eo)
lastindex++; // always consume some source
else
lastindex = eo;
}
return i;
}
unittest
{
sizediff_t i;
debug(regexp) printf("regexp.rfind.unittest\n");
i = rfind("abcdefcdef", RegExp("c"));
assert(i == 6);
i = rfind("abcdefcdef", RegExp("cd"));
assert(i == 6);
i = rfind("abcdefcdef", RegExp("x"));
assert(i == -1);
i = rfind("abcdefcdef", RegExp("xy"));
assert(i == -1);
i = rfind("abcdefcdef", RegExp(""));
assert(i == 10);
}
/*************************************************
Returns:
Same as $(D_PARAM rfind(s, RegExp(pattern, attributes))).
WARNING:
This function is scheduled for deprecation due to unnecessary
ambiguity with the homonym function in std.string. Instead of
$(D_PARAM undead.regexp.rfind(s, p, a)), you may want to use $(D_PARAM
rfind(s, RegExp(p, a))).
*/
sizediff_t
rfind(string s, string pattern, string attributes = null)
{
typeof(return) i = -1, lastindex = 0;
auto r = new RegExp(pattern, attributes);
while (r.test(s, lastindex))
{
auto eo = r.pmatch[0].rm_eo;
i = r.pmatch[0].rm_so;
if (lastindex == eo)
lastindex++; // always consume some source
else
lastindex = eo;
}
r.destroy();
return i;
}
unittest
{
sizediff_t i;
debug(regexp) printf("regexp.rfind.unittest\n");
i = rfind("abcdefcdef", "c");
assert(i == 6);
i = rfind("abcdefcdef", "cd");
assert(i == 6);
i = rfind("abcdefcdef", "x");
assert(i == -1);
i = rfind("abcdefcdef", "xy");
assert(i == -1);
i = rfind("abcdefcdef", "");
assert(i == 10);
}
/********************************************
* Split s[] into an array of strings, using the regular
* expression $(D_PARAM pattern) as the separator.
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* Returns:
* array of slices into s[]
* Example:
* ---
* foreach (s; split("abcabcabab", RegExp("C.", "i")))
* {
* writefln("s = '%s'", s);
* }
* // Prints:
* // s = 'ab'
* // s = 'b'
* // s = 'bab'
* ---
*/
string[] split(string s, RegExp pattern)
{
return pattern.split(s);
}
unittest
{
debug(regexp) printf("regexp.split.unittest()\n");
string[] result;
result = split("ab", RegExp("a*"));
assert(result.length == 2);
assert(result[0] == "");
assert(result[1] == "b");
foreach (i, s; split("abcabcabab", RegExp("C.", "i")))
{
//writefln("s[%d] = '%s'", i, s);
if (i == 0) assert(s == "ab");
else if (i == 1) assert(s == "b");
else if (i == 2) assert(s == "bab");
else assert(0);
}
}
/********************************************
Returns:
Same as $(D_PARAM split(s, RegExp(pattern, attributes))).
WARNING:
This function is scheduled for deprecation due to unnecessary
ambiguity with the homonym function in std.string. Instead of
$(D_PARAM undead.regexp.split(s, p, a)), you may want to use $(D_PARAM
split(s, RegExp(p, a))).
*/
string[] split(string s, string pattern, string attributes = null)
{
auto r = new RegExp(pattern, attributes);
auto result = r.split(s);
r.destroy();
return result;
}
unittest
{
debug(regexp) printf("regexp.split.unittest()\n");
string[] result;
result = split("ab", "a*");
assert(result.length == 2);
assert(result[0] == "");
assert(result[1] == "b");
foreach (i, s; split("abcabcabab", "C.", "i"))
{
//writefln("s[%d] = '%s'", i, s.length, s.ptr);
if (i == 0) assert(s == "ab");
else if (i == 1) assert(s == "b");
else if (i == 2) assert(s == "bab");
else assert(0);
}
}
/****************************************************
* Search s[] for first match with pattern[] with attributes[].
* Params:
* s = String to search.
* pattern = Regular expression pattern.
* attributes = Regular expression attributes.
* Returns:
* corresponding RegExp if found, null if not.
* Example:
* ---
* import std.stdio;
* import undead.regexp;
*
* void main()
* {
* if (auto m = undead.regexp.search("abcdef", "c"))
* {
* writefln("%s[%s]%s", m.pre, m[0], m.post);
* }
* }
* // Prints:
* // ab[c]def
* ---
*/
RegExp search(string s, string pattern, string attributes = null)
{
auto r = new RegExp(pattern, attributes);
if (!r.test(s))
{
r.destroy();
r = null;
assert(r is null);
}
return r;
}
unittest
{
debug(regexp) printf("regexp.string.unittest()\n");
if (auto m = undead.regexp.search("abcdef", "c()"))
{
auto result = std.string.format("%s[%s]%s", m.pre, m[0], m.post);
assert(result == "ab[c]def");
assert(m[1] == null);
assert(m[2] == null);
}
else
assert(0);
if (auto n = undead.regexp.search("abcdef", "g"))
{
assert(0);
}
}
/* ********************************* RegExp ******************************** */
/*****************************
* RegExp is a class to handle regular expressions.
*
* It is the core foundation for adding powerful string pattern matching
* capabilities to programs like grep, text editors, awk, sed, etc.
*/
class RegExp
{
/*****
* Construct a RegExp object. Compile pattern
* with attributes into
* an internal form for fast execution.
* Params:
* pattern = regular expression
* attributes = _attributes
* Throws: RegExpException if there are any compilation errors.
* Example:
* Declare two variables and assign to them a RegExp object:
* ---
* auto r = new RegExp("pattern");
* auto s = new RegExp(r"p[1-5]\s*");
* ---
*/
public this(string pattern, string attributes = null)
{
pmatch = (&gmatch)[0 .. 1];
compile(pattern, attributes);
}
/*****
* Generate instance of RegExp.
* Params:
* pattern = regular expression
* attributes = _attributes
* Throws: RegExpException if there are any compilation errors.
* Example:
* Declare two variables and assign to them a RegExp object:
* ---
* auto r = RegExp("pattern");
* auto s = RegExp(r"p[1-5]\s*");
* ---
*/
public static RegExp opCall(string pattern, string attributes = null)
{
return new RegExp(pattern, attributes);
}
unittest
{
debug(regexp) printf("regexp.opCall.unittest()\n");
auto r1 = RegExp("hello", "m");
string msg;
try
{
auto r2 = RegExp("hello", "q");
assert(0);
}
catch (RegExpException ree)
{
msg = ree.toString();
//writefln("message: %s", ree);
}
assert(std.algorithm.countUntil(msg, "unrecognized attribute") >= 0);
}
/************************************
* Set up for start of foreach loop.
* Returns:
* search() returns instance of RegExp set up to _search string[].
* Example:
* ---
* import std.stdio;
* import undead.regexp;
*
* void main()
* {
* foreach(m; RegExp("ab").search("abcabcabab"))
* {
* writefln("%s[%s]%s", m.pre, m[0], m.post);
* }
* }
* // Prints:
* // [ab]cabcabab
* // abc[ab]cabab
* // abcabc[ab]ab
* // abcabcab[ab]
* ---
*/
public RegExp search(string string)
{
input = string;
pmatch[0].rm_eo = 0;
return this;
}
/** ditto */
public int opApply(scope int delegate(ref RegExp) dg)
{
int result;
RegExp r = this;
while (test())
{
result = dg(r);
if (result)
break;
}
return result;
}
unittest
{
debug(regexp) printf("regexp.search.unittest()\n");
int i;
foreach(m; RegExp("ab").search("abcabcabab"))
{
auto s = std.string.format("%s[%s]%s", m.pre, m[0], m.post);
if (i == 0) assert(s == "[ab]cabcabab");
else if (i == 1) assert(s == "abc[ab]cabab");
else if (i == 2) assert(s == "abcabc[ab]ab");
else if (i == 3) assert(s == "abcabcab[ab]");
else assert(0);
i++;
}
}
/******************
* Retrieve match n.
*
* n==0 means the matched substring, n>0 means the
* n'th parenthesized subexpression.
* if n is larger than the number of parenthesized subexpressions,
* null is returned.
*/
public string opIndex(size_t n)
{
if (n >= pmatch.length)
return null;
else
{
auto rm_so = pmatch[n].rm_so;
auto rm_eo = pmatch[n].rm_eo;
if (rm_so == rm_eo)
return null;
return input[rm_so .. rm_eo];
}
}
/**
Same as $(D_PARAM opIndex(n)).
WARNING:
Scheduled for deprecation due to confusion with overloaded
$(D_PARAM match(string)). Instead of $(D_PARAM regex.match(n))
you may want to use $(D_PARAM regex[n]).
*/
public string match(size_t n)
{
return this[n];
}
/*******************
* Return the slice of the input that precedes the matched substring.
*/
public @property string pre()
{
return input[0 .. pmatch[0].rm_so];
}
/*******************
* Return the slice of the input that follows the matched substring.
*/
public @property string post()
{
return input[pmatch[0].rm_eo .. $];
}
uint re_nsub; // number of parenthesized subexpression matches
regmatch_t[] pmatch; // array [re_nsub + 1]
string input; // the string to search
// per instance:
string pattern; // source text of the regular expression
string flags; // source text of the attributes parameter
int errors;
uint attributes;
enum REA
{
global = 1, // has the g attribute
ignoreCase = 2, // has the i attribute
multiline = 4, // if treat as multiple lines separated
// by newlines, or as a single line
dotmatchlf = 8, // if . matches \n
}
private:
size_t src; // current source index in input[]
size_t src_start; // starting index for match in input[]
size_t p; // position of parser in pattern[]
regmatch_t gmatch; // match for the entire regular expression
// (serves as storage for pmatch[0])
const(ubyte)[] program; // pattern[] compiled into regular expression program
OutBuffer buf;
/******************************************/
// Opcodes
enum : ubyte
{
REend, // end of program
REchar, // single character
REichar, // single character, case insensitive
REdchar, // single UCS character
REidchar, // single wide character, case insensitive
REanychar, // any character
REanystar, // ".*"
REstring, // string of characters
REistring, // string of characters, case insensitive
REtestbit, // any in bitmap, non-consuming
REbit, // any in the bit map
REnotbit, // any not in the bit map
RErange, // any in the string
REnotrange, // any not in the string
REor, // a | b
REplus, // 1 or more
REstar, // 0 or more
REquest, // 0 or 1
REnm, // n..m
REnmq, // n..m, non-greedy version
REbol, // beginning of line
REeol, // end of line
REparen, // parenthesized subexpression
REgoto, // goto offset
REwordboundary,
REnotwordboundary,
REdigit,
REnotdigit,
REspace,
REnotspace,
REword,
REnotword,
REbackref,
};
// BUG: should this include '$'?
private int isword(dchar c) { return isAlphaNum(c) || c == '_'; }
private uint inf = ~0u;
/* ********************************
* Throws RegExpException on error
*/
public void compile(string pattern, string attributes)
{
//printf("RegExp.compile('%.*s', '%.*s')\n", pattern.length, pattern.ptr, attributes.length, attributes.ptr);
this.attributes = 0;
foreach (rchar c; attributes)
{ REA att;
switch (c)
{
case 'g': att = REA.global; break;
case 'i': att = REA.ignoreCase; break;
case 'm': att = REA.multiline; break;
default:
error("unrecognized attribute");
return;
}
if (this.attributes & att)
{ error("redundant attribute");
return;
}
this.attributes |= att;
}
input = null;
this.pattern = pattern;
this.flags = attributes;
uint oldre_nsub = re_nsub;
re_nsub = 0;
errors = 0;
buf = new OutBuffer();
buf.reserve(pattern.length * 8);
p = 0;
parseRegexp();
if (p < pattern.length)
{ error("unmatched ')'");
}
// @@@ SKIPPING OPTIMIZATION SOLVES BUG 941 @@@
//optimize();
program = buf.data;
buf.data = null;
buf.destroy();
if (re_nsub > oldre_nsub)
{
if (pmatch.ptr is &gmatch)
pmatch = null;
pmatch.length = re_nsub + 1;
}
pmatch[0].rm_so = 0;
pmatch[0].rm_eo = 0;
}
/********************************************
* Split s[] into an array of strings, using the regular
* expression as the separator.
* Returns:
* array of slices into s[]
*/
public string[] split(string s)
{
debug(regexp) printf("regexp.split()\n");
string[] result;
if (s.length)
{
sizediff_t p, q;
for (q = p; q != s.length;)
{
if (test(s, q))
{
q = pmatch[0].rm_so;
auto e = pmatch[0].rm_eo;
if (e != p)
{
result ~= s[p .. q];
for (size_t i = 1; i < pmatch.length; i++)
{
auto so = pmatch[i].rm_so;
auto eo = pmatch[i].rm_eo;
if (so == eo)
{ so = 0; // -1 gives array bounds error
eo = 0;
}
result ~= s[so .. eo];
}
q = p = e;
continue;
}
}
q++;
}
result ~= s[p .. s.length];
}
else if (!test(s))
result ~= s;
return result;
}
unittest
{
debug(regexp) printf("regexp.split.unittest()\n");
auto r = new RegExp("a*?", null);
string[] result;
string j;
int i;
result = r.split("ab");
assert(result.length == 2);
i = (result[0] == "a");
assert(i == 1);
i = (result[1] == "b");
assert(i == 1);
r = new RegExp("a*", null);
result = r.split("ab");
assert(result.length == 2);
i = (result[0] == "");
assert(i == 1);
i = (result[1] == "b");
assert(i == 1);
r = new RegExp("<(\\/)?([^<>]+)>", null);
result = r.split("afontbarhello");
debug(regexp)
{
for (i = 0; i < result.length; i++)
printf("result[%d] = '%.*s'\n", i, result[i].length, result[i].ptr);
}
j = join(result, ",");
//printf("j = '%.*s'\n", j.length, j.ptr);
i = (j == "a,,b,font,/,b,bar,,TAG,hello,/,TAG,");
assert(i == 1);
r = new RegExp("a[bc]", null);
result = r.match("123ab");
j = join(result, ",");
i = (j == "ab");
assert(i == 1);
result = r.match("ac");
j = join(result, ",");
i = (j == "ac");
assert(i == 1);
}
/*************************************************
* Search string[] for match with regular expression.
* Returns:
* index of match if successful, -1 if not found
*/
public sizediff_t find(string string)
{
if (test(string))
return pmatch[0].rm_so;
else
return -1; // no match
}
//deprecated alias find search;
unittest
{
debug(regexp) printf("regexp.find.unittest()\n");
RegExp r = new RegExp("abc", null);
auto i = r.find("xabcy");
assert(i == 1);
i = r.find("cba");
assert(i == -1);
}
/*************************************************
* Search s[] for match.
* Returns:
* If global attribute, return same value as exec(s).
* If not global attribute, return array of all matches.
*/
public string[] match(string s)
{
string[] result;
if (attributes & REA.global)
{
sizediff_t lastindex = 0;
while (test(s, lastindex))
{
auto eo = pmatch[0].rm_eo;
result ~= input[pmatch[0].rm_so .. eo];
if (lastindex == eo)
lastindex++; // always consume some source
else
lastindex = eo;
}
}
else
{
result = exec(s);
}
return result;
}
unittest
{
debug(regexp) printf("regexp.match.unittest()\n");
int i;
string[] result;
string j;
RegExp r;
r = new RegExp("a[bc]", null);
result = r.match("1ab2ac3");
j = join(result, ",");
i = (j == "ab");
assert(i == 1);
r = new RegExp("a[bc]", "g");
result = r.match("1ab2ac3");
j = join(result, ",");
i = (j == "ab,ac");
assert(i == 1);
}
/*************************************************
* Find regular expression matches in s[]. Replace those matches
* with a new string composed of format[] merged with the result of the
* matches.
* If global, replace all matches. Otherwise, replace first match.
* Returns: the new string
*/
public string replace(string s, string format)
{
debug(regexp) printf("string = %.*s, format = %.*s\n", s.length, s.ptr, format.length, format.ptr);
string result = s;
sizediff_t lastindex = 0;
size_t offset = 0;
for (;;)
{
if (!test(s, lastindex))
break;
auto so = pmatch[0].rm_so;
auto eo = pmatch[0].rm_eo;
string replacement = replace(format);
// Optimize by using replace if possible - Dave Fladebo
string slice = result[offset + so .. offset + eo];
if (attributes & REA.global && // global, so replace all
!(attributes & REA.ignoreCase) && // not ignoring case
!(attributes & REA.multiline) && // not multiline
pattern == slice && // simple pattern (exact match, no special characters)
format == replacement) // simple format, not $ formats
{
debug(regexp)
{
auto sss = result[offset + so .. offset + eo];
printf("pattern: %.*s, slice: %.*s, format: %.*s, replacement: %.*s\n",
pattern.length, pattern.ptr, sss.length, sss.ptr, format.length, format.ptr, replacement.length, replacement.ptr);
}
result = std.array.replace(result,slice,replacement);
break;
}
result = replaceSlice(result, result[offset + so .. offset + eo], replacement);
if (attributes & REA.global)
{
offset += replacement.length - (eo - so);
if (lastindex == eo)
lastindex++; // always consume some source
else
lastindex = eo;
}
else
break;
}
return result;
}
unittest
{
debug(regexp) printf("regexp.replace.unittest()\n");
int i;
string result;
RegExp r;
r = new RegExp("a[bc]", "g");
result = r.replace("1ab2ac3", "x$&y");
i = (result == "1xaby2xacy3");
assert(i == 1);
r = new RegExp("ab", "g");
result = r.replace("1ab2ac3", "xy");
i = (result == "1xy2ac3");
assert(i == 1);
}
/*************************************************
* Search string[] for match.
* Returns:
* array of slices into string[] representing matches
*/
public string[] exec(string s)
{
debug(regexp) printf("regexp.exec(string = '%.*s')\n", s.length, s.ptr);
input = s;
pmatch[0].rm_so = 0;
pmatch[0].rm_eo = 0;
return exec();
}
/*************************************************
* Pick up where last exec(string) or exec() left off,
* searching string[] for next match.
* Returns:
* array of slices into string[] representing matches
*/
public string[] exec()
{
if (!test())
return null;
auto result = new string[pmatch.length];
for (int i = 0; i < pmatch.length; i++)
{
if (pmatch[i].rm_so == pmatch[i].rm_eo)
result[i] = null;
else
result[i] = input[pmatch[i].rm_so .. pmatch[i].rm_eo];
}
return result;
}
/************************************************
* Search s[] for match.
* Returns: 0 for no match, !=0 for match
* Example:
---
import std.stdio;
import undead.regexp;
import std.string;
int grep(int delegate(char[]) pred, char[][] list)
{
int count;
foreach (s; list)
{ if (pred(s))
++count;
}
return count;
}
void main()
{
auto x = grep(&RegExp("[Ff]oo").test,
std.string.split("mary had a foo lamb"));
writefln(x);
}
---
* which prints: 1
*/
//@@@
public bool test(string s)
{
return test(s, 0 /*pmatch[0].rm_eo*/) != 0;
}
/************************************************
* Pick up where last test(string) or test() left off, and search again.
* Returns: 0 for no match, !=0 for match
*/
public int test()
{
return test(input, pmatch[0].rm_eo);
}
/************************************************
* Test s[] starting at startindex against regular expression.
* Returns: 0 for no match, !=0 for match
*/
public int test(string s, size_t startindex)
{
char firstc;
input = s;
debug (regexp) printf("RegExp.test(input[] = '%.*s', startindex = %zd)\n", input.length, input.ptr, startindex);
pmatch[0].rm_so = 0;
pmatch[0].rm_eo = 0;
if (startindex < 0 || startindex > input.length)
{
return 0; // fail
}
//debug(regexp) printProgram(program);
// First character optimization
firstc = 0;
if (program[0] == REchar)
{
firstc = program[1];
if (attributes & REA.ignoreCase && isAlpha(firstc))
firstc = 0;
}
for (auto si = startindex; ; si++)
{
if (firstc)
{
if (si == input.length)
break; // no match
if (input[si] != firstc)
{
si++;
if (!chr(si, firstc)) // if first character not found
break; // no match
}
}
for (size_t i = 0; i < re_nsub + 1; i++)
{
pmatch[i].rm_so = -1;
pmatch[i].rm_eo = -1;
}
src_start = src = si;
if (trymatch(0, program.length))
{
pmatch[0].rm_so = si;
pmatch[0].rm_eo = src;
//debug(regexp) printf("start = %d, end = %d\n", gmatch.rm_so, gmatch.rm_eo);
return 1;
}
// If possible match must start at beginning, we are done
if (program[0] == REbol || program[0] == REanystar)
{
if (attributes & REA.multiline)
{
// Scan for the next \n
if (!chr(si, '\n'))
break; // no match if '\n' not found
}
else
break;
}
if (si == input.length)
break;
debug(regexp)
{
auto sss = input[si + 1 .. input.length];
printf("Starting new try: '%.*s'\n", sss.length, sss.ptr);
}
}
return 0; // no match
}
/**
Returns whether string $(D_PARAM s) matches $(D_PARAM this).
*/
alias test opEquals;
// bool opEquals(string s)
// {
// return test(s);
// }
unittest
{
assert("abc" == RegExp(".b."));
assert("abc" != RegExp(".b.."));
}
int chr(ref size_t si, rchar c)
{
for (; si < input.length; si++)
{
if (input[si] == c)
return 1;
}
return 0;
}
void printProgram(const(ubyte)[] prog)
{
//debug(regexp)
{
size_t len;
uint n;
uint m;
ushort *pu;
uint *puint;
char[] str;
printf("printProgram()\n");
for (size_t pc = 0; pc < prog.length; )
{
printf("%3d: ", pc);
//printf("prog[pc] = %d, REchar = %d, REnmq = %d\n", prog[pc], REchar, REnmq);
switch (prog[pc])
{
case REchar:
printf("\tREchar '%c'\n", prog[pc + 1]);
pc += 1 + char.sizeof;
break;
case REichar:
printf("\tREichar '%c'\n", prog[pc + 1]);
pc += 1 + char.sizeof;
break;
case REdchar:
printf("\tREdchar '%c'\n", *cast(dchar *)&prog[pc + 1]);
pc += 1 + dchar.sizeof;
break;
case REidchar:
printf("\tREidchar '%c'\n", *cast(dchar *)&prog[pc + 1]);
pc += 1 + dchar.sizeof;
break;
case REanychar:
printf("\tREanychar\n");
pc++;
break;
case REstring:
len = *cast(size_t *)&prog[pc + 1];
str = (cast(char*)&prog[pc + 1 + size_t.sizeof])[0 .. len];
printf("\tREstring x%x, '%.*s'\n", len, str.length, str.ptr);
pc += 1 + size_t.sizeof + len * rchar.sizeof;
break;
case REistring:
len = *cast(size_t *)&prog[pc + 1];
str = (cast(char*)&prog[pc + 1 + size_t.sizeof])[0 .. len];
printf("\tREistring x%x, '%.*s'\n", len, str.length, str.ptr);
pc += 1 + size_t.sizeof + len * rchar.sizeof;
break;
case REtestbit:
pu = cast(ushort *)&prog[pc + 1];
printf("\tREtestbit %d, %d\n", pu[0], pu[1]);
len = pu[1];
pc += 1 + 2 * ushort.sizeof + len;
break;
case REbit:
pu = cast(ushort *)&prog[pc + 1];
len = pu[1];
printf("\tREbit cmax=%02x, len=%d:", pu[0], len);
for (n = 0; n < len; n++)
printf(" %02x", prog[pc + 1 + 2 * ushort.sizeof + n]);
printf("\n");
pc += 1 + 2 * ushort.sizeof + len;
break;
case REnotbit:
pu = cast(ushort *)&prog[pc + 1];
printf("\tREnotbit %d, %d\n", pu[0], pu[1]);
len = pu[1];
pc += 1 + 2 * ushort.sizeof + len;
break;
case RErange:
len = *cast(uint *)&prog[pc + 1];
printf("\tRErange %d\n", len);
// BUG: REAignoreCase?
pc += 1 + uint.sizeof + len;
break;
case REnotrange:
len = *cast(uint *)&prog[pc + 1];
printf("\tREnotrange %d\n", len);
// BUG: REAignoreCase?
pc += 1 + uint.sizeof + len;
break;
case REbol:
printf("\tREbol\n");
pc++;
break;
case REeol:
printf("\tREeol\n");
pc++;
break;
case REor:
len = *cast(uint *)&prog[pc + 1];
printf("\tREor %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len);
pc += 1 + uint.sizeof;
break;
case REgoto:
len = *cast(uint *)&prog[pc + 1];
printf("\tREgoto %d, pc=>%d\n", len, pc + 1 + uint.sizeof + len);
pc += 1 + uint.sizeof;
break;
case REanystar:
printf("\tREanystar\n");
pc++;
break;
case REnm:
case REnmq:
// len, n, m, ()
puint = cast(uint *)&prog[pc + 1];
len = puint[0];
n = puint[1];
m = puint[2];
printf("\tREnm%s len=%d, n=%u, m=%u, pc=>%d\n",
(prog[pc] == REnmq) ? "q".ptr : " ".ptr,
len, n, m, pc + 1 + uint.sizeof * 3 + len);
pc += 1 + uint.sizeof * 3;
break;
case REparen:
// len, n, ()
puint = cast(uint *)&prog[pc + 1];
len = puint[0];
n = puint[1];
printf("\tREparen len=%d n=%d, pc=>%d\n", len, n, pc + 1 + uint.sizeof * 2 + len);
pc += 1 + uint.sizeof * 2;
break;
case REend:
printf("\tREend\n");
return;
case REwordboundary:
printf("\tREwordboundary\n");
pc++;
break;
case REnotwordboundary:
printf("\tREnotwordboundary\n");
pc++;
break;
case REdigit:
printf("\tREdigit\n");
pc++;
break;
case REnotdigit:
printf("\tREnotdigit\n");
pc++;
break;
case REspace:
printf("\tREspace\n");
pc++;
break;
case REnotspace:
printf("\tREnotspace\n");
pc++;
break;
case REword:
printf("\tREword\n");
pc++;
break;
case REnotword:
printf("\tREnotword\n");
pc++;
break;
case REbackref:
printf("\tREbackref %d\n", prog[1]);
pc += 2;
break;
default:
assert(0);
}
}
}
}
/**************************************************
* Match input against a section of the program[].
* Returns:
* 1 if successful match
* 0 no match
*/
int trymatch(size_t pc, size_t pcend)
{
size_t len;
size_t n;
size_t m;
size_t count;
size_t pop;
size_t ss;
regmatch_t *psave;
size_t c1;
size_t c2;
ushort* pu;
uint* puint;
debug(regexp)
{
auto sss = input[src .. input.length];
printf("RegExp.trymatch(pc = %zd, src = '%.*s', pcend = %zd)\n", pc, sss.length, sss.ptr, pcend);
}
auto srcsave = src;
psave = null;
for (;;)
{
if (pc == pcend) // if done matching
{ debug(regex) printf("\tprogend\n");
return 1;
}
//printf("\top = %d\n", program[pc]);
switch (program[pc])
{
case REchar:
if (src == input.length)
goto Lnomatch;
debug(regexp) printf("\tREchar '%c', src = '%c'\n", program[pc + 1], input[src]);
if (program[pc + 1] != input[src])
goto Lnomatch;
src++;
pc += 1 + char.sizeof;
break;
case REichar:
if (src == input.length)
goto Lnomatch;
debug(regexp) printf("\tREichar '%c', src = '%c'\n", program[pc + 1], input[src]);
c1 = program[pc + 1];
c2 = input[src];
if (c1 != c2)
{
if (isLower(cast(rchar)c2))
c2 = std.ascii.toUpper(cast(rchar)c2);
else
goto Lnomatch;
if (c1 != c2)
goto Lnomatch;
}
src++;
pc += 1 + char.sizeof;
break;
case REdchar:
debug(regexp) printf("\tREdchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]);
if (src == input.length)
goto Lnomatch;
if (*(cast(dchar *)&program[pc + 1]) != input[src])
goto Lnomatch;
src++;
pc += 1 + dchar.sizeof;
break;
case REidchar:
debug(regexp) printf("\tREidchar '%c', src = '%c'\n", *(cast(dchar *)&program[pc + 1]), input[src]);
if (src == input.length)
goto Lnomatch;
c1 = *(cast(dchar *)&program[pc + 1]);
c2 = input[src];
if (c1 != c2)
{
if (isLower(cast(rchar)c2))
c2 = std.ascii.toUpper(cast(rchar)c2);
else
goto Lnomatch;
if (c1 != c2)
goto Lnomatch;
}
src++;
pc += 1 + dchar.sizeof;
break;
case REanychar:
debug(regexp) printf("\tREanychar\n");
if (src == input.length)
goto Lnomatch;
if (!(attributes & REA.dotmatchlf) && input[src] == cast(rchar)'\n')
goto Lnomatch;
src += std.utf.stride(input, src);
//src++;
pc++;
break;
case REstring:
len = *cast(size_t *)&program[pc + 1];
debug(regexp)
{
auto sss2 = (&program[pc + 1 + size_t.sizeof])[0 .. len];
printf("\tREstring x%x, '%.*s'\n", len, sss2.length, sss2.ptr);
}
if (src + len > input.length)
goto Lnomatch;
if (memcmp(&program[pc + 1 + size_t.sizeof], &input[src], len * rchar.sizeof))
goto Lnomatch;
src += len;
pc += 1 + size_t.sizeof + len * rchar.sizeof;
break;
case REistring:
len = *cast(size_t *)&program[pc + 1];
debug(regexp)
{
auto sss2 = (&program[pc + 1 + size_t.sizeof])[0 .. len];
printf("\tREistring x%x, '%.*s'\n", len, sss2.length, sss2.ptr);
}
if (src + len > input.length)
goto Lnomatch;
if (icmp((cast(char*)&program[pc + 1 + size_t.sizeof])[0..len],
input[src .. src + len]))
goto Lnomatch;
src += len;
pc += 1 + size_t.sizeof + len * rchar.sizeof;
break;
case REtestbit:
pu = (cast(ushort *)&program[pc + 1]);
if (src == input.length)
goto Lnomatch;
debug(regexp) printf("\tREtestbit %d, %d, '%c', x%02x\n",
pu[0], pu[1], input[src], input[src]);
len = pu[1];
c1 = input[src];
//printf("[x%02x]=x%02x, x%02x\n", c1 >> 3, ((&program[pc + 1 + 4])[c1 >> 3] ), (1 << (c1 & 7)));
if (c1 <= pu[0] &&
!((&(program[pc + 1 + 4]))[c1 >> 3] & (1 << (c1 & 7))))
goto Lnomatch;
pc += 1 + 2 * ushort.sizeof + len;
break;
case REbit:
pu = (cast(ushort *)&program[pc + 1]);
if (src == input.length)
goto Lnomatch;
debug(regexp) printf("\tREbit %d, %d, '%c'\n",
pu[0], pu[1], input[src]);
len = pu[1];
c1 = input[src];
if (c1 > pu[0])
goto Lnomatch;
if (!((&program[pc + 1 + 4])[c1 >> 3] & (1 << (c1 & 7))))
goto Lnomatch;
src++;
pc += 1 + 2 * ushort.sizeof + len;
break;
case REnotbit:
pu = (cast(ushort *)&program[pc + 1]);
if (src == input.length)
goto Lnomatch;
debug(regexp) printf("\tREnotbit %d, %d, '%c'\n",
pu[0], pu[1], input[src]);
len = pu[1];
c1 = input[src];
if (c1 <= pu[0] &&
((&program[pc + 1 + 4])[c1 >> 3] & (1 << (c1 & 7))))
goto Lnomatch;
src++;
pc += 1 + 2 * ushort.sizeof + len;
break;
case RErange:
len = *cast(uint *)&program[pc + 1];
debug(regexp) printf("\tRErange %d\n", len);
if (src == input.length)
goto Lnomatch;
// BUG: REA.ignoreCase?
if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) == null)
goto Lnomatch;
src++;
pc += 1 + uint.sizeof + len;
break;
case REnotrange:
len = *cast(uint *)&program[pc + 1];
debug(regexp) printf("\tREnotrange %d\n", len);
if (src == input.length)
goto Lnomatch;
// BUG: REA.ignoreCase?
if (memchr(cast(char*)&program[pc + 1 + uint.sizeof], input[src], len) != null)
goto Lnomatch;
src++;
pc += 1 + uint.sizeof + len;
break;
case REbol:
debug(regexp) printf("\tREbol\n");
if (src == 0)
{
}
else if (attributes & REA.multiline)
{
if (input[src - 1] != '\n')
goto Lnomatch;
}
else
goto Lnomatch;
pc++;
break;
case REeol:
debug(regexp) printf("\tREeol\n");
if (src == input.length)
{
}
else if (attributes & REA.multiline && input[src] == '\n')
src++;
else
goto Lnomatch;
pc++;
break;
case REor:
len = (cast(uint *)&program[pc + 1])[0];
debug(regexp) printf("\tREor %d\n", len);
pop = pc + 1 + uint.sizeof;
ss = src;
if (trymatch(pop, pcend))
{
if (pcend != program.length)
{
auto s = src;
if (trymatch(pcend, program.length))
{ debug(regexp) printf("\tfirst operand matched\n");
src = s;
return 1;
}
else
{
// If second branch doesn't match to end, take first anyway
src = ss;
if (!trymatch(pop + len, program.length))
{
debug(regexp) printf("\tfirst operand matched\n");
src = s;
return 1;
}
}
src = ss;
}
else
{ debug(regexp) printf("\tfirst operand matched\n");
return 1;
}
}
pc = pop + len; // proceed with 2nd branch
break;
case REgoto:
debug(regexp) printf("\tREgoto\n");
len = (cast(uint *)&program[pc + 1])[0];
pc += 1 + uint.sizeof + len;
break;
case REanystar:
debug(regexp) printf("\tREanystar\n");
pc++;
for (;;)
{
auto s1 = src;
if (src == input.length)
break;
if (!(attributes & REA.dotmatchlf) && input[src] == '\n')
break;
src++;
auto s2 = src;
// If no match after consumption, but it
// did match before, then no match
if (!trymatch(pc, program.length))
{
src = s1;
// BUG: should we save/restore pmatch[]?
if (trymatch(pc, program.length))
{
src = s1; // no match
break;
}
}
src = s2;
}
break;
case REnm:
case REnmq:
// len, n, m, ()
puint = cast(uint *)&program[pc + 1];
len = puint[0];
n = puint[1];
m = puint[2];
debug(regexp) printf("\tREnm%s len=%d, n=%u, m=%u\n",
(program[pc] == REnmq) ? "q".ptr : "".ptr, len, n, m);
pop = pc + 1 + uint.sizeof * 3;
for (count = 0; count < n; count++)
{
if (!trymatch(pop, pop + len))
goto Lnomatch;
}
if (!psave && count < m)
{
//version (Win32)
psave = cast(regmatch_t *)alloca((re_nsub + 1) * regmatch_t.sizeof);
//else
//psave = new regmatch_t[re_nsub + 1];
}
if (program[pc] == REnmq) // if minimal munch
{
for (; count < m; count++)
{
memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof);
auto s1 = src;
if (trymatch(pop + len, program.length))
{
src = s1;
memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof);
break;
}
if (!trymatch(pop, pop + len))
{ debug(regexp) printf("\tdoesn't match subexpression\n");
break;
}
// If source is not consumed, don't
// infinite loop on the match
if (s1 == src)
{ debug(regexp) printf("\tsource is not consumed\n");
break;
}
}
}
else // maximal munch
{
for (; count < m; count++)
{
memcpy(psave, pmatch.ptr, (re_nsub + 1) * regmatch_t.sizeof);
auto s1 = src;
if (!trymatch(pop, pop + len))
{ debug(regexp) printf("\tdoesn't match subexpression\n");
break;
}
auto s2 = src;
// If source is not consumed, don't
// infinite loop on the match
if (s1 == s2)
{ debug(regexp) printf("\tsource is not consumed\n");
break;
}
// If no match after consumption, but it
// did match before, then no match
if (!trymatch(pop + len, program.length))
{
src = s1;
if (trymatch(pop + len, program.length))
{
src = s1; // no match
memcpy(pmatch.ptr, psave, (re_nsub + 1) * regmatch_t.sizeof);
break;
}
}
src = s2;
}
}
debug(regexp) printf("\tREnm len=%d, n=%u, m=%u, DONE count=%d\n", len, n, m, count);
pc = pop + len;
break;
case REparen:
// len, ()
debug(regexp) printf("\tREparen\n");
puint = cast(uint *)&program[pc + 1];
len = puint[0];
n = puint[1];
pop = pc + 1 + uint.sizeof * 2;
ss = src;
if (!trymatch(pop, pop + len))
goto Lnomatch;
pmatch[n + 1].rm_so = ss;
pmatch[n + 1].rm_eo = src;
pc = pop + len;
break;
case REend:
debug(regexp) printf("\tREend\n");
return 1; // successful match
case REwordboundary:
debug(regexp) printf("\tREwordboundary\n");
if (src > 0 && src < input.length)
{
c1 = input[src - 1];
c2 = input[src];
if (!(
(isword(cast(rchar)c1) && !isword(cast(rchar)c2)) ||
(!isword(cast(rchar)c1) && isword(cast(rchar)c2))
)
)
goto Lnomatch;
}
pc++;
break;
case REnotwordboundary:
debug(regexp) printf("\tREnotwordboundary\n");
if (src == 0 || src == input.length)
goto Lnomatch;
c1 = input[src - 1];
c2 = input[src];
if (
(isword(cast(rchar)c1) && !isword(cast(rchar)c2)) ||
(!isword(cast(rchar)c1) && isword(cast(rchar)c2))
)
goto Lnomatch;
pc++;
break;
case REdigit:
debug(regexp) printf("\tREdigit\n");
if (src == input.length)
goto Lnomatch;
if (!isDigit(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REnotdigit:
debug(regexp) printf("\tREnotdigit\n");
if (src == input.length)
goto Lnomatch;
if (isDigit(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REspace:
debug(regexp) printf("\tREspace\n");
if (src == input.length)
goto Lnomatch;
if (!isWhite(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REnotspace:
debug(regexp) printf("\tREnotspace\n");
if (src == input.length)
goto Lnomatch;
if (isWhite(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REword:
debug(regexp) printf("\tREword\n");
if (src == input.length)
goto Lnomatch;
if (!isword(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REnotword:
debug(regexp) printf("\tREnotword\n");
if (src == input.length)
goto Lnomatch;
if (isword(input[src]))
goto Lnomatch;
src++;
pc++;
break;
case REbackref:
{
n = program[pc + 1];
debug(regexp) printf("\tREbackref %d\n", n);
auto so = pmatch[n + 1].rm_so;
auto eo = pmatch[n + 1].rm_eo;
len = eo - so;
if (src + len > input.length)
goto Lnomatch;
else if (attributes & REA.ignoreCase)
{
if (icmp(input[src .. src + len], input[so .. eo]))
goto Lnomatch;
}
else if (memcmp(&input[src], &input[so], len * rchar.sizeof))
goto Lnomatch;
src += len;
pc += 2;
break;
}
default:
assert(0);
}
}
Lnomatch:
debug(regexp) printf("\tnomatch pc=%d\n", pc);
src = srcsave;
return 0;
}
/* =================== Compiler ================== */
int parseRegexp()
{
size_t gotooffset;
uint len1;
uint len2;
debug(regexp)
{
auto sss = pattern[p .. pattern.length];
printf("parseRegexp() '%.*s'\n", sss.length, sss.ptr);
}
auto offset = buf.offset;
for (;;)
{
assert(p <= pattern.length);
if (p == pattern.length)
{ buf.write(REend);
return 1;
}
switch (pattern[p])
{
case ')':
return 1;
case '|':
p++;
gotooffset = buf.offset;
buf.write(REgoto);
buf.write(cast(uint)0);
len1 = cast(uint)(buf.offset - offset);
buf.spread(offset, 1 + uint.sizeof);
gotooffset += 1 + uint.sizeof;
parseRegexp();
len2 = cast(uint)(buf.offset - (gotooffset + 1 + uint.sizeof));
buf.data[offset] = REor;
(cast(uint *)&buf.data[offset + 1])[0] = len1;
(cast(uint *)&buf.data[gotooffset + 1])[0] = len2;
break;
default:
parsePiece();
break;
}
}
}
int parsePiece()
{
uint len;
uint n;
uint m;
ubyte op;
auto plength = pattern.length;
debug(regexp)
{
auto sss = pattern[p .. pattern.length];
printf("parsePiece() '%.*s'\n", sss.length, sss.ptr);
}
auto offset = buf.offset;
parseAtom();
if (p == plength)
return 1;
switch (pattern[p])
{
case '*':
// Special optimization: replace .* with REanystar
if (buf.offset - offset == 1 &&
buf.data[offset] == REanychar &&
p + 1 < plength &&
pattern[p + 1] != '?')
{
buf.data[offset] = REanystar;
p++;
break;
}
n = 0;
m = inf;
goto Lnm;
case '+':
n = 1;
m = inf;
goto Lnm;
case '?':
n = 0;
m = 1;
goto Lnm;
case '{': // {n} {n,} {n,m}
p++;
if (p == plength || !isDigit(pattern[p]))
goto Lerr;
n = 0;
do
{
// BUG: handle overflow
n = n * 10 + pattern[p] - '0';
p++;
if (p == plength)
goto Lerr;
} while (isDigit(pattern[p]));
if (pattern[p] == '}') // {n}
{ m = n;
goto Lnm;
}
if (pattern[p] != ',')
goto Lerr;
p++;
if (p == plength)
goto Lerr;
if (pattern[p] == /*{*/ '}') // {n,}
{ m = inf;
goto Lnm;
}
if (!isDigit(pattern[p]))
goto Lerr;
m = 0; // {n,m}
do
{
// BUG: handle overflow
m = m * 10 + pattern[p] - '0';
p++;
if (p == plength)
goto Lerr;
} while (isDigit(pattern[p]));
if (pattern[p] != /*{*/ '}')
goto Lerr;
goto Lnm;
Lnm:
p++;
op = REnm;
if (p < plength && pattern[p] == '?')
{ op = REnmq; // minimal munch version
p++;
}
len = cast(uint)(buf.offset - offset);
buf.spread(offset, 1 + uint.sizeof * 3);
buf.data[offset] = op;
uint* puint = cast(uint *)&buf.data[offset + 1];
puint[0] = len;
puint[1] = n;
puint[2] = m;
break;
default:
break;
}
return 1;
Lerr:
error("badly formed {n,m}");
assert(0);
}
int parseAtom()
{ ubyte op;
size_t offset;
rchar c;
debug(regexp)
{
auto sss = pattern[p .. pattern.length];
printf("parseAtom() '%.*s'\n", sss.length, sss.ptr);
}
if (p < pattern.length)
{
c = pattern[p];
switch (c)
{
case '*':
case '+':
case '?':
error("*+? not allowed in atom");
p++;
return 0;
case '(':
p++;
buf.write(REparen);
offset = buf.offset;
buf.write(cast(uint)0); // reserve space for length
buf.write(re_nsub);
re_nsub++;
parseRegexp();
*cast(uint *)&buf.data[offset] =
cast(uint)(buf.offset - (offset + uint.sizeof * 2));
if (p == pattern.length || pattern[p] != ')')
{
error("')' expected");
return 0;
}
p++;
break;
case '[':
if (!parseRange())
return 0;
break;
case '.':
p++;
buf.write(REanychar);
break;
case '^':
p++;
buf.write(REbol);
break;
case '$':
p++;
buf.write(REeol);
break;
case '\\':
p++;
if (p == pattern.length)
{ error("no character past '\\'");
return 0;
}
c = pattern[p];
switch (c)
{
case 'b': op = REwordboundary; goto Lop;
case 'B': op = REnotwordboundary; goto Lop;
case 'd': op = REdigit; goto Lop;
case 'D': op = REnotdigit; goto Lop;
case 's': op = REspace; goto Lop;
case 'S': op = REnotspace; goto Lop;
case 'w': op = REword; goto Lop;
case 'W': op = REnotword; goto Lop;
Lop:
buf.write(op);
p++;
break;
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
case 'c':
case 'x':
case 'u':
case '0':
c = cast(char)escape();
goto Lbyte;
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
c -= '1';
if (c < re_nsub)
{ buf.write(REbackref);
buf.write(cast(ubyte)c);
}
else
{ error("no matching back reference");
return 0;
}
p++;
break;
default:
p++;
goto Lbyte;
}
break;
default:
p++;
Lbyte:
op = REchar;
if (attributes & REA.ignoreCase)
{
if (isAlpha(c))
{
op = REichar;
c = cast(char)std.ascii.toUpper(c);
}
}
if (op == REchar && c <= 0xFF)
{
// Look ahead and see if we can make this into
// an REstring
auto q = p;
for (; q < pattern.length; ++q)
{ rchar qc = pattern[q];
switch (qc)
{
case '{':
case '*':
case '+':
case '?':
if (q == p)
goto Lchar;
q--;
break;
case '(': case ')':
case '|':
case '[': case ']':
case '.': case '^':
case '$': case '\\':
case '}':
break;
default:
continue;
}
break;
}
auto len = q - p;
if (len > 0)
{
debug(regexp) printf("writing string len %d, c = '%c', pattern[p] = '%c'\n", len+1, c, pattern[p]);
buf.reserve(5 + (1 + len) * rchar.sizeof);
buf.write((attributes & REA.ignoreCase) ? REistring : REstring);
buf.write(len + 1);
buf.write(c);
buf.write(pattern[p .. p + len]);
p = q;
break;
}
}
if (c >= 0x80)
{
// Convert to dchar opcode
op = (op == REchar) ? REdchar : REidchar;
buf.write(op);
buf.write(c);
}
else
{
Lchar:
debug(regexp) printf("It's an REchar '%c'\n", c);
buf.write(op);
buf.write(cast(char)c);
}
break;
}
}
return 1;
}
private:
class Range
{
size_t maxc;
size_t maxb;
OutBuffer buf;
ubyte* base;
BitArray bits;
this(OutBuffer buf)
{
this.buf = buf;
if (buf.data.length)
this.base = &buf.data[buf.offset];
}
void setbitmax(size_t u)
{
//printf("setbitmax(x%x), maxc = x%x\n", u, maxc);
if (u > maxc)
{
maxc = u;
auto b = u / 8;
if (b >= maxb)
{
auto u2 = base ? base - &buf.data[0] : 0;
buf.fill0(b - maxb + 1);
base = &buf.data[u2];
maxb = b + 1;
//bits = (cast(bit*)this.base)[0 .. maxc + 1];
bits = BitArray(maxc + 1, cast(size_t*)this.base);
}
bits.length = maxc + 1;
}
}
void setbit2(size_t u)
{
setbitmax(u + 1);
//printf("setbit2 [x%02x] |= x%02x\n", u >> 3, 1 << (u & 7));
bits[u] = 1;
}
};
int parseRange()
{
int c;
int c2;
uint i;
uint cmax;
cmax = 0x7F;
p++;
ubyte op = REbit;
if (p == pattern.length)
{
error("invalid range");
return 0;
}
if (pattern[p] == '^')
{ p++;
op = REnotbit;
if (p == pattern.length)
{
error("invalid range");
return 0;
}
}
buf.write(op);
auto offset = buf.offset;
buf.write(cast(uint)0); // reserve space for length
buf.reserve(128 / 8);
auto r = new Range(buf);
if (op == REnotbit)
r.setbit2(0);
switch (pattern[p])
{
case ']':
case '-':
c = pattern[p];
p++;
r.setbit2(c);
break;
default:
break;
}
enum RS { start, rliteral, dash }
RS rs;
rs = RS.start;
for (;;)
{
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case ']':
switch (rs)
{ case RS.dash:
r.setbit2('-');
goto case;
case RS.rliteral:
r.setbit2(c);
break;
case RS.start:
break;
default:
assert(0);
}
p++;
break;
case '\\':
p++;
r.setbitmax(cmax);
if (p == pattern.length)
goto Lerr;
switch (pattern[p])
{
case 'd':
for (i = '0'; i <= '9'; i++)
r.bits[i] = 1;
goto Lrs;
case 'D':
for (i = 1; i < '0'; i++)
r.bits[i] = 1;
for (i = '9' + 1; i <= cmax; i++)
r.bits[i] = 1;
goto Lrs;
case 's':
for (i = 0; i <= cmax; i++)
if (isWhite(i))
r.bits[i] = 1;
goto Lrs;
case 'S':
for (i = 1; i <= cmax; i++)
if (!isWhite(i))
r.bits[i] = 1;
goto Lrs;
case 'w':
for (i = 0; i <= cmax; i++)
if (isword(cast(rchar)i))
r.bits[i] = 1;
goto Lrs;
case 'W':
for (i = 1; i <= cmax; i++)
if (!isword(cast(rchar)i))
r.bits[i] = 1;
goto Lrs;
Lrs:
switch (rs)
{ case RS.dash:
r.setbit2('-');
goto case;
case RS.rliteral:
r.setbit2(c);
break;
default:
break;
}
rs = RS.start;
continue;
default:
break;
}
c2 = escape();
goto Lrange;
case '-':
p++;
if (rs == RS.start)
goto Lrange;
else if (rs == RS.rliteral)
rs = RS.dash;
else if (rs == RS.dash)
{
r.setbit2(c);
r.setbit2('-');
rs = RS.start;
}
continue;
default:
c2 = pattern[p];
p++;
Lrange:
switch (rs)
{ case RS.rliteral:
r.setbit2(c);
goto case;
case RS.start:
c = c2;
rs = RS.rliteral;
break;
case RS.dash:
if (c > c2)
{ error("inverted range in character class");
return 0;
}
r.setbitmax(c2);
//printf("c = %x, c2 = %x\n",c,c2);
for (; c <= c2; c++)
r.bits[c] = 1;
rs = RS.start;
break;
default:
assert(0);
}
continue;
}
break;
}
if (attributes & REA.ignoreCase)
{
// BUG: what about dchar?
r.setbitmax(0x7F);
for (c = 'a'; c <= 'z'; c++)
{
if (r.bits[c])
r.bits[c + 'A' - 'a'] = 1;
else if (r.bits[c + 'A' - 'a'])
r.bits[c] = 1;
}
}
//printf("maxc = %d, maxb = %d\n",r.maxc,r.maxb);
(cast(ushort *)&buf.data[offset])[0] = cast(ushort)r.maxc;
(cast(ushort *)&buf.data[offset])[1] = cast(ushort)r.maxb;
return 1;
Lerr:
error("invalid range");
return 0;
}
void error(string msg)
{
errors++;
debug(regexp) printf("error: %.*s\n", msg.length, msg.ptr);
//assert(0);
//*(char*)0=0;
throw new RegExpException(msg);
}
// p is following the \ char
int escape()
in
{
assert(p < pattern.length);
}
body
{ int c;
int i;
rchar tc;
c = pattern[p]; // none of the cases are multibyte
switch (c)
{
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
// BUG: Perl does \a and \e too, should we?
case 'c':
++p;
if (p == pattern.length)
goto Lretc;
c = pattern[p];
// Note: we are deliberately not allowing dchar letters
if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')))
{
Lcerr:
error("letter expected following \\c");
return 0;
}
c &= 0x1F;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
c -= '0';
for (i = 0; i < 2; i++)
{
p++;
if (p == pattern.length)
goto Lretc;
tc = pattern[p];
if ('0' <= tc && tc <= '7')
{ c = c * 8 + (tc - '0');
// Treat overflow as if last
// digit was not an octal digit
if (c >= 0xFF)
{ c >>= 3;
return c;
}
}
else
return c;
}
break;
case 'x':
c = 0;
for (i = 0; i < 2; i++)
{
p++;
if (p == pattern.length)
goto Lretc;
tc = pattern[p];
if ('0' <= tc && tc <= '9')
c = c * 16 + (tc - '0');
else if ('a' <= tc && tc <= 'f')
c = c * 16 + (tc - 'a' + 10);
else if ('A' <= tc && tc <= 'F')
c = c * 16 + (tc - 'A' + 10);
else if (i == 0) // if no hex digits after \x
{
// Not a valid \xXX sequence
return 'x';
}
else
return c;
}
break;
case 'u':
c = 0;
for (i = 0; i < 4; i++)
{
p++;
if (p == pattern.length)
goto Lretc;
tc = pattern[p];
if ('0' <= tc && tc <= '9')
c = c * 16 + (tc - '0');
else if ('a' <= tc && tc <= 'f')
c = c * 16 + (tc - 'a' + 10);
else if ('A' <= tc && tc <= 'F')
c = c * 16 + (tc - 'A' + 10);
else
{
// Not a valid \uXXXX sequence
p -= i;
return 'u';
}
}
break;
default:
break;
}
p++;
Lretc:
return c;
}
/* ==================== optimizer ======================= */
void optimize()
{ ubyte[] prog;
debug(regexp) printf("RegExp.optimize()\n");
prog = buf.toBytes();
for (size_t i = 0; 1;)
{
//printf("\tprog[%d] = %d, %d\n", i, prog[i], REstring);
switch (prog[i])
{
case REend:
case REanychar:
case REanystar:
case REbackref:
case REeol:
case REchar:
case REichar:
case REdchar:
case REidchar:
case REstring:
case REistring:
case REtestbit:
case REbit:
case REnotbit:
case RErange:
case REnotrange:
case REwordboundary:
case REnotwordboundary:
case REdigit:
case REnotdigit:
case REspace:
case REnotspace:
case REword:
case REnotword:
return;
case REbol:
i++;
continue;
case REor:
case REnm:
case REnmq:
case REparen:
case REgoto:
{
auto bitbuf = new OutBuffer;
auto r = new Range(bitbuf);
auto offset = i;
if (starrchars(r, prog[i .. prog.length]))
{
debug(regexp) printf("\tfilter built\n");
buf.spread(offset, 1 + 4 + r.maxb);
buf.data[offset] = REtestbit;
(cast(ushort *)&buf.data[offset + 1])[0] = cast(ushort)r.maxc;
(cast(ushort *)&buf.data[offset + 1])[1] = cast(ushort)r.maxb;
i = offset + 1 + 4;
buf.data[i .. i + r.maxb] = r.base[0 .. r.maxb];
}
return;
}
default:
assert(0);
}
}
}
/////////////////////////////////////////
// OR the leading character bits into r.
// Limit the character range from 0..7F,
// trymatch() will allow through anything over maxc.
// Return 1 if success, 0 if we can't build a filter or
// if there is no point to one.
int starrchars(Range r, const(ubyte)[] prog)
{ rchar c;
uint maxc;
size_t maxb;
size_t len;
uint b;
uint n;
uint m;
const(ubyte)* pop;
//printf("RegExp.starrchars(prog = %p, progend = %p)\n", prog, progend);
for (size_t i = 0; i < prog.length;)
{
switch (prog[i])
{
case REchar:
c = prog[i + 1];
if (c <= 0x7F)
r.setbit2(c);
return 1;
case REichar:
c = prog[i + 1];
if (c <= 0x7F)
{ r.setbit2(c);
r.setbit2(std.ascii.toLower(cast(rchar)c));
}
return 1;
case REdchar:
case REidchar:
return 1;
case REanychar:
return 0; // no point
case REstring:
len = *cast(size_t *)&prog[i + 1];
assert(len);
c = *cast(rchar *)&prog[i + 1 + size_t.sizeof];
debug(regexp) printf("\tREstring %d, '%c'\n", len, c);
if (c <= 0x7F)
r.setbit2(c);
return 1;
case REistring:
len = *cast(size_t *)&prog[i + 1];
assert(len);
c = *cast(rchar *)&prog[i + 1 + size_t.sizeof];
debug(regexp) printf("\tREistring %d, '%c'\n", len, c);
if (c <= 0x7F)
{ r.setbit2(std.ascii.toUpper(cast(rchar)c));
r.setbit2(std.ascii.toLower(cast(rchar)c));
}
return 1;
case REtestbit:
case REbit:
maxc = (cast(ushort *)&prog[i + 1])[0];
maxb = (cast(ushort *)&prog[i + 1])[1];
if (maxc <= 0x7F)
r.setbitmax(maxc);
else
maxb = r.maxb;
for (b = 0; b < maxb; b++)
r.base[b] |= prog[i + 1 + 4 + b];
return 1;
case REnotbit:
maxc = (cast(ushort *)&prog[i + 1])[0];
maxb = (cast(ushort *)&prog[i + 1])[1];
if (maxc <= 0x7F)
r.setbitmax(maxc);
else
maxb = r.maxb;
for (b = 0; b < maxb; b++)
r.base[b] |= ~cast(int)prog[i + 1 + 4 + b];
return 1;
case REbol:
case REeol:
return 0;
case REor:
len = (cast(uint *)&prog[i + 1])[0];
return starrchars(r, prog[i + 1 + uint.sizeof .. prog.length]) &&
starrchars(r, prog[i + 1 + uint.sizeof + len .. prog.length]);
case REgoto:
len = (cast(uint *)&prog[i + 1])[0];
i += 1 + uint.sizeof + len;
break;
case REanystar:
return 0;
case REnm:
case REnmq:
// len, n, m, ()
len = (cast(uint *)&prog[i + 1])[0];
n = (cast(uint *)&prog[i + 1])[1];
m = (cast(uint *)&prog[i + 1])[2];
pop = &prog[i + 1 + uint.sizeof * 3];
if (!starrchars(r, pop[0 .. len]))
return 0;
if (n)
return 1;
i += 1 + uint.sizeof * 3 + len;
break;
case REparen:
// len, ()
len = (cast(uint *)&prog[i + 1])[0];
n = (cast(uint *)&prog[i + 1])[1];
pop = &prog[0] + i + 1 + uint.sizeof * 2;
return starrchars(r, pop[0 .. len]);
case REend:
return 0;
case REwordboundary:
case REnotwordboundary:
return 0;
case REdigit:
r.setbitmax('9');
for (c = '0'; c <= '9'; c++)
r.bits[c] = 1;
return 1;
case REnotdigit:
r.setbitmax(0x7F);
for (c = 0; c <= '0'; c++)
r.bits[c] = 1;
for (c = '9' + 1; c <= r.maxc; c++)
r.bits[c] = 1;
return 1;
case REspace:
r.setbitmax(0x7F);
for (c = 0; c <= r.maxc; c++)
if (isWhite(c))
r.bits[c] = 1;
return 1;
case REnotspace:
r.setbitmax(0x7F);
for (c = 0; c <= r.maxc; c++)
if (!isWhite(c))
r.bits[c] = 1;
return 1;
case REword:
r.setbitmax(0x7F);
for (c = 0; c <= r.maxc; c++)
if (isword(cast(rchar)c))
r.bits[c] = 1;
return 1;
case REnotword:
r.setbitmax(0x7F);
for (c = 0; c <= r.maxc; c++)
if (!isword(cast(rchar)c))
r.bits[c] = 1;
return 1;
case REbackref:
return 0;
default:
assert(0);
}
}
return 1;
}
/* ==================== replace ======================= */
/***********************
* After a match is found with test(), this function
* will take the match results and, using the format
* string, generate and return a new string.
*/
public string replace(string format)
{
return replace3(format, input, pmatch[0 .. re_nsub + 1]);
}
// Static version that doesn't require a RegExp object to be created
public static string replace3(string format, string input, regmatch_t[] pmatch)
{
string result;
size_t c2;
sizediff_t rm_so, rm_eo, i;
// printf("replace3(format = '%.*s', input = '%.*s')\n", format.length, format.ptr, input.length, input.ptr);
result.length = format.length;
result.length = 0;
for (size_t f = 0; f < format.length; f++)
{
char c = format[f];
L1:
if (c != '$')
{
result ~= c;
continue;
}
++f;
if (f == format.length)
{
result ~= '$';
break;
}
c = format[f];
switch (c)
{
case '&':
rm_so = pmatch[0].rm_so;
rm_eo = pmatch[0].rm_eo;
goto Lstring;
case '`':
rm_so = 0;
rm_eo = pmatch[0].rm_so;
goto Lstring;
case '\'':
rm_so = pmatch[0].rm_eo;
rm_eo = input.length;
goto Lstring;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
i = c - '0';
if (f + 1 == format.length)
{
if (i == 0)
{
result ~= '$';
result ~= c;
continue;
}
}
else
{
c2 = format[f + 1];
if (c2 >= '0' && c2 <= '9')
{
i = (c - '0') * 10 + (c2 - '0');
f++;
}
if (i == 0)
{
result ~= '$';
result ~= c;
c = cast(char)c2;
goto L1;
}
}
if (i < pmatch.length)
{ rm_so = pmatch[i].rm_so;
rm_eo = pmatch[i].rm_eo;
goto Lstring;
}
break;
Lstring:
if (rm_so != rm_eo)
result ~= input[rm_so .. rm_eo];
break;
default:
result ~= '$';
result ~= c;
break;
}
}
return result;
}
/************************************
* Like replace(char[] format), but uses old style formatting:
Format
| Description
|
&
| replace with the match
|
\n
| replace with the nth parenthesized match, n is 1..9
|
\c
| replace with char c.
|
*/
public string replaceOld(string format)
{
string result;
//printf("replace: this = %p so = %d, eo = %d\n", this, pmatch[0].rm_so, pmatch[0].rm_eo);
//printf("3input = '%.*s'\n", input.length, input.ptr);
result.length = format.length;
result.length = 0;
for (size_t i; i < format.length; i++)
{
char c = format[i];
switch (c)
{
case '&':
{
auto sss = input[pmatch[0].rm_so .. pmatch[0].rm_eo];
//printf("match = '%.*s'\n", sss.length, sss.ptr);
result ~= sss;
}
break;
case '\\':
if (i + 1 < format.length)
{
c = format[++i];
if (c >= '1' && c <= '9')
{ uint j;
j = c - '0';
if (j <= re_nsub && pmatch[j].rm_so != pmatch[j].rm_eo)
result ~= input[pmatch[j].rm_so .. pmatch[j].rm_eo];
break;
}
}
result ~= c;
break;
default:
result ~= c;
break;
}
}
return result;
}
}
unittest
{ // Created and placed in public domain by Don Clugston
auto m = search("aBC r s", `bc\x20r[\40]s`, "i");
assert(m.pre=="a");
assert(m[0]=="BC r s");
auto m2 = search("7xxyxxx", `^\d([a-z]{2})\D\1`);
assert(m2[0]=="7xxyxx");
// Just check the parsing.
auto m3 = search("dcbxx", `ca|b[\d\]\D\s\S\w-\W]`);
auto m4 = search("xy", `[^\ca-\xFa\r\n\b\f\t\v\0123]{2,485}$`);
auto m5 = search("xxx", `^^\r\n\b{13,}\f{4}\t\v\u02aF3a\w\W`);
auto m6 = search("xxy", `.*y`);
assert(m6[0]=="xxy");
auto m7 = search("QWDEfGH", "(ca|b|defg)+", "i");
assert(m7[0]=="DEfG");
auto m8 = search("dcbxx", `a?\B\s\S`);
auto m9 = search("dcbxx", `[-w]`);
auto m10 = search("dcbsfd", `aB[c-fW]dB|\d|\D|\u012356|\w|\W|\s|\S`, "i");
auto m11 = search("dcbsfd", `[]a-]`);
m.replaceOld(`a&b\1c`);
m.replace(`a$&b$'$1c`);
}
// Andrei
//------------------------------------------------------------------------------
struct Pattern(Char)
{
immutable(Char)[] pattern;
this(immutable(Char)[] pattern)
{
this.pattern = pattern;
}
}
Pattern!(Char) pattern(Char)(immutable(Char)[] pat)
{
return typeof(return)(pat);
}
struct Splitter(Range)
{
Range _input;
size_t _chunkLength;
RegExp _rx;
private Range search()
{
//rx = undead.regexp.search(_input, "(" ~ _separator.pattern ~ ")");
auto i = undead.regexp.find(cast(string) _input, _rx);
return _input[i >= 0 ? i : _input.length .. _input.length];
}
private void advance()
{
//writeln("(" ~ _separator.pattern ~ ")");
//writeln(_input);
//assert(_rx[0].length > 0);
_chunkLength += _rx[0].length;
}
this(Range input, Pattern!(char) separator)
{
_input = input;
_rx = RegExp(separator.pattern);
_chunkLength = _input.length - search().length;
}
ref auto opSlice()
{
return this;
}
@property Range front()
{
return _input[0 .. _chunkLength];
}
@property bool empty()
{
return _input.empty;
}
void popFront()
{
if (_chunkLength == _input.length)
{
_input = _input[_chunkLength .. _input.length];
return;
}
advance();
_input = _input[_chunkLength .. _input.length];
_chunkLength = _input.length - search().length;
}
}
Splitter!(Range) splitter(Range)(Range r, Pattern!(char) pat)
{
static assert(is(Unqual!(typeof(Range.init[0])) == char),
Unqual!(typeof(Range.init[0])).stringof);
return typeof(return)(cast(string) r, pat);
}
unittest
{
auto s1 = ", abc, de, fg, hi, ";
auto sp2 = splitter(s1, pattern(", *"));
//foreach (e; sp2) writeln("[", e, "]");
assert(equal(sp2, ["", "abc", "de", "fg", "hi"][]));
}
unittest
{
auto str= "foo";
string[] re_strs= [
r"^(h|a|)fo[oas]$",
r"^(a|b|)fo[oas]$",
r"^(a|)foo$",
r"(a|)foo",
r"^(h|)foo$",
r"(h|)foo",
r"(h|a|)fo[oas]",
r"^(a|b|)fo[o]$",
r"[abf][ops](o|oo|)(h|a|)",
r"(h|)[abf][ops](o|oo|)",
r"(c|)[abf][ops](o|oo|)"
];
foreach (re_str; re_strs) {
auto re= new RegExp(re_str);
auto matches= cast(bool)re.test(str);
assert(matches);
//writefln("'%s' matches '%s' ? %s", str, re_str, matches);
}
for (char c='a'; c<='z'; ++c) {
auto re_str= "("~c~"|)foo";
auto re= new RegExp(re_str);
auto matches= cast(bool)re.test(str);
assert(matches);
//writefln("'%s' matches '%s' ? %s", str, re_str, matches);
}
}
undeaD-1.0.10/src/undead/socketstream.d 0000664 0000000 0000000 00000007764 13463741136 0017717 0 ustar 00root root 0000000 0000000 // Written in the D programming language
/*
Copyright (C) 2004 Christopher E. Miller
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
/**************
* $(RED Deprecated: This module is considered out-dated and not up to Phobos'
* current standards.)
*
* $(D SocketStream) is a stream for a blocking,
* connected $(D Socket).
*
* Example:
* See $(SAMPLESRC htmlget.d)
* Authors: Christopher E. Miller
* References:
* $(LINK2 std_stream.html, std.stream)
* Source: $(PHOBOSSRC std/_socketstream.d)
* Macros: WIKI=Phobos/StdSocketstream
*/
module undead.socketstream;
private import undead.stream;
private import std.socket;
/**************
* $(D SocketStream) is a stream for a blocking,
* connected $(D Socket).
*/
class SocketStream: Stream
{
private:
Socket sock;
public:
/**
* Constructs a SocketStream with the specified Socket and FileMode flags.
*/
this(Socket sock, FileMode mode)
{
if(mode & FileMode.In)
readable = true;
if(mode & FileMode.Out)
writeable = true;
this.sock = sock;
}
/**
* Uses mode $(D FileMode.In | FileMode.Out).
*/
this(Socket sock)
{
writeable = readable = true;
this.sock = sock;
}
/**
* Property to get the $(D Socket) that is being streamed.
*/
Socket socket()
{
return sock;
}
/**
* Attempts to read the entire block, waiting if necessary.
*/
override size_t readBlock(void* _buffer, size_t size)
{
ubyte* buffer = cast(ubyte*)_buffer;
assertReadable();
if (size == 0)
return size;
auto len = sock.receive(buffer[0 .. size]);
readEOF = cast(bool)(len == 0);
if (len == sock.ERROR)
len = 0;
return len;
}
/**
* Attempts to write the entire block, waiting if necessary.
*/
override size_t writeBlock(const void* _buffer, size_t size)
{
ubyte* buffer = cast(ubyte*)_buffer;
assertWriteable();
if (size == 0)
return size;
auto len = sock.send(buffer[0 .. size]);
readEOF = cast(bool)(len == 0);
if (len == sock.ERROR)
len = 0;
return len;
}
/**
* Socket streams do not support seeking. This disabled method throws
* a $(D SeekException).
*/
override ulong seek(long offset, SeekPos whence)
{
throw new SeekException("Cannot seek a socket.");
}
/**
* Does not return the entire stream because that would
* require the remote connection to be closed.
*/
override string toString()
{
return sock.toString();
}
/**
* Close the $(D Socket).
*/
override void close()
{
sock.close();
super.close();
}
}
undeaD-1.0.10/src/undead/stream.d 0000664 0000000 0000000 00000261456 13463741136 0016506 0 ustar 00root root 0000000 0000000 // Written in the D programming language
/**
* $(RED Deprecated: This module is considered out-dated and not up to Phobos'
* current standards.)
*
* Source: $(PHOBOSSRC std/_stream.d)
* Macros:
* WIKI = Phobos/StdStream
*/
/*
* Copyright (c) 2001-2005
* Pavel "EvilOne" Minayev
* with buffering and endian support added by Ben Hinkle
* with buffered readLine performance improvements by Dave Fladebo
* with opApply inspired by (and mostly copied from) Regan Heath
* with bug fixes and MemoryStream/SliceStream enhancements by Derick Eddington
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Author makes no representations about
* the suitability of this software for any purpose. It is provided
* "as is" without express or implied warranty.
*/
module undead.stream;
import std.internal.cstring;
/* Class structure:
* InputStream interface for reading
* OutputStream interface for writing
* Stream abstract base of stream implementations
* File an OS file stream
* FilterStream a base-class for wrappers around another stream
* BufferedStream a buffered stream wrapping another stream
* BufferedFile a buffered File
* EndianStream a wrapper stream for swapping byte order and BOMs
* SliceStream a portion of another stream
* MemoryStream a stream entirely stored in main memory
* TArrayStream a stream wrapping an array-like buffer
*/
/// A base class for stream exceptions.
class StreamException: Exception {
/// Construct a StreamException with given error message.
this(string msg) { super(msg); }
}
/// Thrown when unable to read data from Stream.
class ReadException: StreamException {
/// Construct a ReadException with given error message.
this(string msg) { super(msg); }
}
/// Thrown when unable to write data to Stream.
class WriteException: StreamException {
/// Construct a WriteException with given error message.
this(string msg) { super(msg); }
}
/// Thrown when unable to move Stream pointer.
class SeekException: StreamException {
/// Construct a SeekException with given error message.
this(string msg) { super(msg); }
}
// seek whence...
enum SeekPos {
Set,
Current,
End
}
private {
import std.conv;
import std.algorithm;
import std.ascii;
//import std.format;
import std.system; // for Endian enumeration
import std.utf;
import undead.utf;
import core.bitop; // for bswap
import core.vararg;
static import std.file;
import undead.internal.file;
import undead.doformat;
}
/// InputStream is the interface for readable streams.
interface InputStream {
/***
* Read exactly size bytes into the buffer.
*
* Throws a ReadException if it is not correct.
*/
void readExact(void* buffer, size_t size);
/***
* Read a block of data big enough to fill the given array buffer.
*
* Returns: the actual number of bytes read. Unfilled bytes are not modified.
*/
size_t read(ubyte[] buffer);
/***
* Read a basic type or counted string.
*
* Throw a ReadException if it could not be read.
* Outside of byte, ubyte, and char, the format is
* implementation-specific and should not be used except as opposite actions
* to write.
*/
void read(out byte x);
void read(out ubyte x); /// ditto
void read(out short x); /// ditto
void read(out ushort x); /// ditto
void read(out int x); /// ditto
void read(out uint x); /// ditto
void read(out long x); /// ditto
void read(out ulong x); /// ditto
void read(out float x); /// ditto
void read(out double x); /// ditto
void read(out real x); /// ditto
void read(out ifloat x); /// ditto
void read(out idouble x); /// ditto
void read(out ireal x); /// ditto
void read(out cfloat x); /// ditto
void read(out cdouble x); /// ditto
void read(out creal x); /// ditto
void read(out char x); /// ditto
void read(out wchar x); /// ditto
void read(out dchar x); /// ditto
// reads a string, written earlier by write()
void read(out char[] s); /// ditto
// reads a Unicode string, written earlier by write()
void read(out wchar[] s); /// ditto
/***
* Read a line that is terminated with some combination of carriage return and
* line feed or end-of-file.
*
* The terminators are not included. The wchar version
* is identical. The optional buffer parameter is filled (reallocating
* it if necessary) and a slice of the result is returned.
*/
char[] readLine();
char[] readLine(char[] result); /// ditto
wchar[] readLineW(); /// ditto
wchar[] readLineW(wchar[] result); /// ditto
/***
* Overload foreach statements to read the stream line by line and call the
* supplied delegate with each line or with each line with line number.
*
* The string passed in line may be reused between calls to the delegate.
* Line numbering starts at 1.
* Breaking out of the foreach will leave the stream
* position at the beginning of the next line to be read.
* For example, to echo a file line-by-line with line numbers run:
* ------------------------------------
* Stream file = new BufferedFile("sample.txt");
* foreach(ulong n, char[] line; file)
* {
* writefln("line %d: %s", n, line);
* }
* file.close();
* ------------------------------------
*/
// iterate through the stream line-by-line
int opApply(scope int delegate(ref char[] line) dg);
int opApply(scope int delegate(ref ulong n, ref char[] line) dg); /// ditto
int opApply(scope int delegate(ref wchar[] line) dg); /// ditto
int opApply(scope int delegate(ref ulong n, ref wchar[] line) dg); /// ditto
/// Read a string of the given length,
/// throwing ReadException if there was a problem.
char[] readString(size_t length);
/***
* Read a string of the given length, throwing ReadException if there was a
* problem.
*
* The file format is implementation-specific and should not be used
* except as opposite actions to write.
*/
wchar[] readStringW(size_t length);
/***
* Read and return the next character in the stream.
*
* This is the only method that will handle ungetc properly.
* getcw's format is implementation-specific.
* If EOF is reached then getc returns char.init and getcw returns wchar.init.
*/
char getc();
wchar getcw(); /// ditto
/***
* Push a character back onto the stream.
*
* They will be returned in first-in last-out order from getc/getcw.
* Only has effect on further calls to getc() and getcw().
*/
char ungetc(char c);
wchar ungetcw(wchar c); /// ditto
/***
* Scan a string from the input using a similar form to C's scanf
* and std.format.
*
* An argument of type string is interpreted as a format string.
* All other arguments must be pointer types.
* If a format string is not present a default will be supplied computed from
* the base type of the pointer type. An argument of type string* is filled
* (possibly with appending characters) and a slice of the result is assigned
* back into the argument. For example the following readf statements
* are equivalent:
* --------------------------
* int x;
* double y;
* string s;
* file.readf(&x, " hello ", &y, &s);
* file.readf("%d hello %f %s", &x, &y, &s);
* file.readf("%d hello %f", &x, &y, "%s", &s);
* --------------------------
*/
int vreadf(TypeInfo[] arguments, va_list args);
int readf(...); /// ditto
/// Retrieve the number of bytes available for immediate reading.
@property size_t available();
/***
* Return whether the current file position is the same as the end of the
* file.
*
* This does not require actually reading past the end, as with stdio. For
* non-seekable streams this might only return true after attempting to read
* past the end.
*/
@property bool eof();
@property bool isOpen(); /// Return true if the stream is currently open.
}
/// Interface for writable streams.
interface OutputStream {
/***
* Write exactly size bytes from buffer, or throw a WriteException if that
* could not be done.
*/
void writeExact(const void* buffer, size_t size);
/***
* Write as much of the buffer as possible,
* returning the number of bytes written.
*/
size_t write(const(ubyte)[] buffer);
/***
* Write a basic type.
*
* Outside of byte, ubyte, and char, the format is implementation-specific
* and should only be used in conjunction with read.
* Throw WriteException on error.
*/
void write(byte x);
void write(ubyte x); /// ditto
void write(short x); /// ditto
void write(ushort x); /// ditto
void write(int x); /// ditto
void write(uint x); /// ditto
void write(long x); /// ditto
void write(ulong x); /// ditto
void write(float x); /// ditto
void write(double x); /// ditto
void write(real x); /// ditto
void write(ifloat x); /// ditto
void write(idouble x); /// ditto
void write(ireal x); /// ditto
void write(cfloat x); /// ditto
void write(cdouble x); /// ditto
void write(creal x); /// ditto
void write(char x); /// ditto
void write(wchar x); /// ditto
void write(dchar x); /// ditto
/***
* Writes a string, together with its length.
*
* The format is implementation-specific
* and should only be used in conjunction with read.
* Throw WriteException on error.
*/
void write(const(char)[] s);
void write(const(wchar)[] s); /// ditto
/***
* Write a line of text,
* appending the line with an operating-system-specific line ending.
*
* Throws WriteException on error.
*/
void writeLine(const(char)[] s);
/***
* Write a line of text,
* appending the line with an operating-system-specific line ending.
*
* The format is implementation-specific.
* Throws WriteException on error.
*/
void writeLineW(const(wchar)[] s);
/***
* Write a string of text.
*
* Throws WriteException if it could not be fully written.
*/
void writeString(const(char)[] s);
/***
* Write a string of text.
*
* The format is implementation-specific.
* Throws WriteException if it could not be fully written.
*/
void writeStringW(const(wchar)[] s);
/***
* Print a formatted string into the stream using printf-style syntax,
* returning the number of bytes written.
*/
size_t vprintf(const(char)[] format, va_list args);
size_t printf(const(char)[] format, ...); /// ditto
/***
* Print a formatted string into the stream using writef-style syntax.
* References: std.format.
* Returns: self to chain with other stream commands like flush.
*/
OutputStream writef(...);
OutputStream writefln(...); /// ditto
OutputStream writefx(TypeInfo[] arguments, va_list argptr, int newline = false); /// ditto
void flush(); /// Flush pending output if appropriate.
void close(); /// Close the stream, flushing output if appropriate.
@property bool isOpen(); /// Return true if the stream is currently open.
}
/***
* Stream is the base abstract class from which the other stream classes derive.
*
* Stream's byte order is the format native to the computer.
*
* Reading:
* These methods require that the readable flag be set.
* Problems with reading result in a ReadException being thrown.
* Stream implements the InputStream interface in addition to the
* readBlock method.
*
* Writing:
* These methods require that the writeable flag be set. Problems with writing
* result in a WriteException being thrown. Stream implements the OutputStream
* interface in addition to the following methods:
* writeBlock
* copyFrom
* copyFrom
*
* Seeking:
* These methods require that the seekable flag be set.
* Problems with seeking result in a SeekException being thrown.
* seek, seekSet, seekCur, seekEnd, position, size, toString, toHash
*/
// not really abstract, but its instances will do nothing useful
class Stream : InputStream, OutputStream {
private import std.string, std.digest.crc, core.stdc.stdlib, core.stdc.stdio;
// stream abilities
bool readable = false; /// Indicates whether this stream can be read from.
bool writeable = false; /// Indicates whether this stream can be written to.
bool seekable = false; /// Indicates whether this stream can be sought within.
protected bool isopen = true; /// Indicates whether this stream is open.
protected bool readEOF = false; /** Indicates whether this stream is at eof
* after the last read attempt.
*/
protected bool prevCr = false; /** For a non-seekable stream indicates that
* the last readLine or readLineW ended on a
* '\r' character.
*/
this() {}
/***
* Read up to size bytes into the buffer and return the number of bytes
* actually read. A return value of 0 indicates end-of-file.
*/
abstract size_t readBlock(void* buffer, size_t size);
// reads block of data of specified size,
// throws ReadException on error
void readExact(void* buffer, size_t size) {
for(;;) {
if (!size) return;
size_t readsize = readBlock(buffer, size); // return 0 on eof
if (readsize == 0) break;
buffer += readsize;
size -= readsize;
}
if (size != 0)
throw new ReadException("not enough data in stream");
}
// reads block of data big enough to fill the given
// array, returns actual number of bytes read
size_t read(ubyte[] buffer) {
return readBlock(buffer.ptr, buffer.length);
}
// read a single value of desired type,
// throw ReadException on error
void read(out byte x) { readExact(&x, x.sizeof); }
void read(out ubyte x) { readExact(&x, x.sizeof); }
void read(out short x) { readExact(&x, x.sizeof); }
void read(out ushort x) { readExact(&x, x.sizeof); }
void read(out int x) { readExact(&x, x.sizeof); }
void read(out uint x) { readExact(&x, x.sizeof); }
void read(out long x) { readExact(&x, x.sizeof); }
void read(out ulong x) { readExact(&x, x.sizeof); }
void read(out float x) { readExact(&x, x.sizeof); }
void read(out double x) { readExact(&x, x.sizeof); }
void read(out real x) { readExact(&x, x.sizeof); }
void read(out ifloat x) { readExact(&x, x.sizeof); }
void read(out idouble x) { readExact(&x, x.sizeof); }
void read(out ireal x) { readExact(&x, x.sizeof); }
void read(out cfloat x) { readExact(&x, x.sizeof); }
void read(out cdouble x) { readExact(&x, x.sizeof); }
void read(out creal x) { readExact(&x, x.sizeof); }
void read(out char x) { readExact(&x, x.sizeof); }
void read(out wchar x) { readExact(&x, x.sizeof); }
void read(out dchar x) { readExact(&x, x.sizeof); }
// reads a string, written earlier by write()
void read(out char[] s) {
size_t len;
read(len);
s = readString(len);
}
// reads a Unicode string, written earlier by write()
void read(out wchar[] s) {
size_t len;
read(len);
s = readStringW(len);
}
// reads a line, terminated by either CR, LF, CR/LF, or EOF
char[] readLine() {
return readLine(null);
}
// reads a line, terminated by either CR, LF, CR/LF, or EOF
// reusing the memory in buffer if result will fit and otherwise
// allocates a new string
char[] readLine(char[] result) {
size_t strlen = 0;
char ch = getc();
while (readable) {
switch (ch) {
case '\r':
if (seekable) {
ch = getc();
if (ch != '\n')
ungetc(ch);
} else {
prevCr = true;
}
goto case;
case '\n':
case char.init:
result.length = strlen;
return result;
default:
if (strlen < result.length) {
result[strlen] = ch;
} else {
result ~= ch;
}
strlen++;
}
ch = getc();
}
result.length = strlen;
return result;
}
// reads a Unicode line, terminated by either CR, LF, CR/LF,
// or EOF; pretty much the same as the above, working with
// wchars rather than chars
wchar[] readLineW() {
return readLineW(null);
}
// reads a Unicode line, terminated by either CR, LF, CR/LF,
// or EOF;
// fills supplied buffer if line fits and otherwise allocates a new string.
wchar[] readLineW(wchar[] result) {
size_t strlen = 0;
wchar c = getcw();
while (readable) {
switch (c) {
case '\r':
if (seekable) {
c = getcw();
if (c != '\n')
ungetcw(c);
} else {
prevCr = true;
}
goto case;
case '\n':
case wchar.init:
result.length = strlen;
return result;
default:
if (strlen < result.length) {
result[strlen] = c;
} else {
result ~= c;
}
strlen++;
}
c = getcw();
}
result.length = strlen;
return result;
}
// iterate through the stream line-by-line - due to Regan Heath
int opApply(scope int delegate(ref char[] line) dg) {
int res = 0;
char[128] buf;
while (!eof) {
char[] line = readLine(buf);
res = dg(line);
if (res) break;
}
return res;
}
// iterate through the stream line-by-line with line count and string
int opApply(scope int delegate(ref ulong n, ref char[] line) dg) {
int res = 0;
ulong n = 1;
char[128] buf;
while (!eof) {
auto line = readLine(buf);
res = dg(n,line);
if (res) break;
n++;
}
return res;
}
// iterate through the stream line-by-line with wchar[]
int opApply(scope int delegate(ref wchar[] line) dg) {
int res = 0;
wchar[128] buf;
while (!eof) {
auto line = readLineW(buf);
res = dg(line);
if (res) break;
}
return res;
}
// iterate through the stream line-by-line with line count and wchar[]
int opApply(scope int delegate(ref ulong n, ref wchar[] line) dg) {
int res = 0;
ulong n = 1;
wchar[128] buf;
while (!eof) {
auto line = readLineW(buf);
res = dg(n,line);
if (res) break;
n++;
}
return res;
}
// reads a string of given length, throws
// ReadException on error
char[] readString(size_t length) {
char[] result = new char[length];
readExact(result.ptr, length);
return result;
}
// reads a Unicode string of given length, throws
// ReadException on error
wchar[] readStringW(size_t length) {
auto result = new wchar[length];
readExact(result.ptr, result.length * wchar.sizeof);
return result;
}
// unget buffer
private wchar[] unget;
final bool ungetAvailable() { return unget.length > 1; }
// reads and returns next character from the stream,
// handles characters pushed back by ungetc()
// returns char.init on eof.
char getc() {
char c;
if (prevCr) {
prevCr = false;
c = getc();
if (c != '\n')
return c;
}
if (unget.length > 1) {
c = cast(char)unget[unget.length - 1];
unget.length = unget.length - 1;
} else {
readBlock(&c,1);
}
return c;
}
// reads and returns next Unicode character from the
// stream, handles characters pushed back by ungetc()
// returns wchar.init on eof.
wchar getcw() {
wchar c;
if (prevCr) {
prevCr = false;
c = getcw();
if (c != '\n')
return c;
}
if (unget.length > 1) {
c = unget[unget.length - 1];
unget.length = unget.length - 1;
} else {
void* buf = &c;
size_t n = readBlock(buf,2);
if (n == 1 && readBlock(buf+1,1) == 0)
throw new ReadException("not enough data in stream");
}
return c;
}
// pushes back character c into the stream; only has
// effect on further calls to getc() and getcw()
char ungetc(char c) {
if (c == c.init) return c;
// first byte is a dummy so that we never set length to 0
if (unget.length == 0)
unget.length = 1;
unget ~= c;
return c;
}
// pushes back Unicode character c into the stream; only
// has effect on further calls to getc() and getcw()
wchar ungetcw(wchar c) {
if (c == c.init) return c;
// first byte is a dummy so that we never set length to 0
if (unget.length == 0)
unget.length = 1;
unget ~= c;
return c;
}
int vreadf(TypeInfo[] arguments, va_list args) {
string fmt;
int j = 0;
int count = 0, i = 0;
char c;
bool firstCharacter = true;
while ((j < arguments.length || i < fmt.length) && !eof) {
if(firstCharacter) {
c = getc();
firstCharacter = false;
}
if (fmt.length == 0 || i == fmt.length) {
i = 0;
if (arguments[j] is typeid(string) || arguments[j] is typeid(char[])
|| arguments[j] is typeid(const(char)[])) {
fmt = va_arg!(string)(args);
j++;
continue;
} else if (arguments[j] is typeid(int*) ||
arguments[j] is typeid(byte*) ||
arguments[j] is typeid(short*) ||
arguments[j] is typeid(long*)) {
fmt = "%d";
} else if (arguments[j] is typeid(uint*) ||
arguments[j] is typeid(ubyte*) ||
arguments[j] is typeid(ushort*) ||
arguments[j] is typeid(ulong*)) {
fmt = "%d";
} else if (arguments[j] is typeid(float*) ||
arguments[j] is typeid(double*) ||
arguments[j] is typeid(real*)) {
fmt = "%f";
} else if (arguments[j] is typeid(char[]*) ||
arguments[j] is typeid(wchar[]*) ||
arguments[j] is typeid(dchar[]*)) {
fmt = "%s";
} else if (arguments[j] is typeid(char*)) {
fmt = "%c";
}
}
if (fmt[i] == '%') { // a field
i++;
bool suppress = false;
if (fmt[i] == '*') { // suppress assignment
suppress = true;
i++;
}
// read field width
int width = 0;
while (isDigit(fmt[i])) {
width = width * 10 + (fmt[i] - '0');
i++;
}
if (width == 0)
width = -1;
// skip any modifier if present
if (fmt[i] == 'h' || fmt[i] == 'l' || fmt[i] == 'L')
i++;
// check the typechar and act accordingly
switch (fmt[i]) {
case 'd': // decimal/hexadecimal/octal integer
case 'D':
case 'u':
case 'U':
case 'o':
case 'O':
case 'x':
case 'X':
case 'i':
case 'I':
{
while (isWhite(c)) {
c = getc();
count++;
}
bool neg = false;
if (c == '-') {
neg = true;
c = getc();
count++;
} else if (c == '+') {
c = getc();
count++;
}
char ifmt = cast(char)(fmt[i] | 0x20);
if (ifmt == 'i') { // undetermined base
if (c == '0') { // octal or hex
c = getc();
count++;
if (c == 'x' || c == 'X') { // hex
ifmt = 'x';
c = getc();
count++;
} else { // octal
ifmt = 'o';
}
}
else // decimal
ifmt = 'd';
}
long n = 0;
switch (ifmt)
{
case 'd': // decimal
case 'u': {
while (isDigit(c) && width) {
n = n * 10 + (c - '0');
width--;
c = getc();
count++;
}
} break;
case 'o': { // octal
while (isOctalDigit(c) && width) {
n = n * 8 + (c - '0');
width--;
c = getc();
count++;
}
} break;
case 'x': { // hexadecimal
while (isHexDigit(c) && width) {
n *= 0x10;
if (isDigit(c))
n += c - '0';
else
n += 0xA + (c | 0x20) - 'a';
width--;
c = getc();
count++;
}
} break;
default:
assert(0);
}
if (neg)
n = -n;
if (arguments[j] is typeid(int*)) {
int* p = va_arg!(int*)(args);
*p = cast(int)n;
} else if (arguments[j] is typeid(short*)) {
short* p = va_arg!(short*)(args);
*p = cast(short)n;
} else if (arguments[j] is typeid(byte*)) {
byte* p = va_arg!(byte*)(args);
*p = cast(byte)n;
} else if (arguments[j] is typeid(long*)) {
long* p = va_arg!(long*)(args);
*p = n;
} else if (arguments[j] is typeid(uint*)) {
uint* p = va_arg!(uint*)(args);
*p = cast(uint)n;
} else if (arguments[j] is typeid(ushort*)) {
ushort* p = va_arg!(ushort*)(args);
*p = cast(ushort)n;
} else if (arguments[j] is typeid(ubyte*)) {
ubyte* p = va_arg!(ubyte*)(args);
*p = cast(ubyte)n;
} else if (arguments[j] is typeid(ulong*)) {
ulong* p = va_arg!(ulong*)(args);
*p = cast(ulong)n;
}
j++;
i++;
} break;
case 'f': // float
case 'F':
case 'e':
case 'E':
case 'g':
case 'G':
{
while (isWhite(c)) {
c = getc();
count++;
}
bool neg = false;
if (c == '-') {
neg = true;
c = getc();
count++;
} else if (c == '+') {
c = getc();
count++;
}
real r = 0;
while (isDigit(c) && width) {
r = r * 10 + (c - '0');
width--;
c = getc();
count++;
}
if (width && c == '.') {
width--;
c = getc();
count++;
double frac = 1;
while (isDigit(c) && width) {
r = r * 10 + (c - '0');
frac *= 10;
width--;
c = getc();
count++;
}
r /= frac;
}
if (width && (c == 'e' || c == 'E')) {
width--;
c = getc();
count++;
if (width) {
bool expneg = false;
if (c == '-') {
expneg = true;
width--;
c = getc();
count++;
} else if (c == '+') {
width--;
c = getc();
count++;
}
real exp = 0;
while (isDigit(c) && width) {
exp = exp * 10 + (c - '0');
width--;
c = getc();
count++;
}
if (expneg) {
while (exp--)
r /= 10;
} else {
while (exp--)
r *= 10;
}
}
}
if(width && (c == 'n' || c == 'N')) {
width--;
c = getc();
count++;
if(width && (c == 'a' || c == 'A')) {
width--;
c = getc();
count++;
if(width && (c == 'n' || c == 'N')) {
width--;
c = getc();
count++;
r = real.nan;
}
}
}
if(width && (c == 'i' || c == 'I')) {
width--;
c = getc();
count++;
if(width && (c == 'n' || c == 'N')) {
width--;
c = getc();
count++;
if(width && (c == 'f' || c == 'F')) {
width--;
c = getc();
count++;
r = real.infinity;
}
}
}
if (neg)
r = -r;
if (arguments[j] is typeid(float*)) {
float* p = va_arg!(float*)(args);
*p = r;
} else if (arguments[j] is typeid(double*)) {
double* p = va_arg!(double*)(args);
*p = r;
} else if (arguments[j] is typeid(real*)) {
real* p = va_arg!(real*)(args);
*p = r;
}
j++;
i++;
} break;
case 's': { // string
while (isWhite(c)) {
c = getc();
count++;
}
char[] s;
char[]* p;
size_t strlen;
if (arguments[j] is typeid(char[]*)) {
p = va_arg!(char[]*)(args);
s = *p;
}
while (!isWhite(c) && c != char.init) {
if (strlen < s.length) {
s[strlen] = c;
} else {
s ~= c;
}
strlen++;
c = getc();
count++;
}
s = s[0 .. strlen];
if (arguments[j] is typeid(char[]*)) {
*p = s;
} else if (arguments[j] is typeid(char*)) {
s ~= 0;
auto q = va_arg!(char*)(args);
q[0 .. s.length] = s[];
} else if (arguments[j] is typeid(wchar[]*)) {
auto q = va_arg!(const(wchar)[]*)(args);
*q = toUTF16(s);
} else if (arguments[j] is typeid(dchar[]*)) {
auto q = va_arg!(const(dchar)[]*)(args);
*q = toUTF32(s);
}
j++;
i++;
} break;
case 'c': { // character(s)
char* s = va_arg!(char*)(args);
if (width < 0)
width = 1;
else
while (isWhite(c)) {
c = getc();
count++;
}
while (width-- && !eof) {
*(s++) = c;
c = getc();
count++;
}
j++;
i++;
} break;
case 'n': { // number of chars read so far
int* p = va_arg!(int*)(args);
*p = count;
j++;
i++;
} break;
default: // read character as is
goto nws;
}
} else if (isWhite(fmt[i])) { // skip whitespace
while (isWhite(c))
c = getc();
i++;
} else { // read character as is
nws:
if (fmt[i] != c)
break;
c = getc();
i++;
}
}
ungetc(c);
return count;
}
int readf(...) {
return vreadf(_arguments, _argptr);
}
// returns estimated number of bytes available for immediate reading
@property size_t available() { return 0; }
/***
* Write up to size bytes from buffer in the stream, returning the actual
* number of bytes that were written.
*/
abstract size_t writeBlock(const void* buffer, size_t size);
// writes block of data of specified size,
// throws WriteException on error
void writeExact(const void* buffer, size_t size) {
const(void)* p = buffer;
for(;;) {
if (!size) return;
size_t writesize = writeBlock(p, size);
if (writesize == 0) break;
p += writesize;
size -= writesize;
}
if (size != 0)
throw new WriteException("unable to write to stream");
}
// writes the given array of bytes, returns
// actual number of bytes written
size_t write(const(ubyte)[] buffer) {
return writeBlock(buffer.ptr, buffer.length);
}
// write a single value of desired type,
// throw WriteException on error
void write(byte x) { writeExact(&x, x.sizeof); }
void write(ubyte x) { writeExact(&x, x.sizeof); }
void write(short x) { writeExact(&x, x.sizeof); }
void write(ushort x) { writeExact(&x, x.sizeof); }
void write(int x) { writeExact(&x, x.sizeof); }
void write(uint x) { writeExact(&x, x.sizeof); }
void write(long x) { writeExact(&x, x.sizeof); }
void write(ulong x) { writeExact(&x, x.sizeof); }
void write(float x) { writeExact(&x, x.sizeof); }
void write(double x) { writeExact(&x, x.sizeof); }
void write(real x) { writeExact(&x, x.sizeof); }
void write(ifloat x) { writeExact(&x, x.sizeof); }
void write(idouble x) { writeExact(&x, x.sizeof); }
void write(ireal x) { writeExact(&x, x.sizeof); }
void write(cfloat x) { writeExact(&x, x.sizeof); }
void write(cdouble x) { writeExact(&x, x.sizeof); }
void write(creal x) { writeExact(&x, x.sizeof); }
void write(char x) { writeExact(&x, x.sizeof); }
void write(wchar x) { writeExact(&x, x.sizeof); }
void write(dchar x) { writeExact(&x, x.sizeof); }
// writes a string, together with its length
void write(const(char)[] s) {
write(s.length);
writeString(s);
}
// writes a Unicode string, together with its length
void write(const(wchar)[] s) {
write(s.length);
writeStringW(s);
}
// writes a line, throws WriteException on error
void writeLine(const(char)[] s) {
writeString(s);
version (Windows)
writeString("\r\n");
else version (Mac)
writeString("\r");
else
writeString("\n");
}
// writes a Unicode line, throws WriteException on error
void writeLineW(const(wchar)[] s) {
writeStringW(s);
version (Windows)
writeStringW("\r\n");
else version (Mac)
writeStringW("\r");
else
writeStringW("\n");
}
// writes a string, throws WriteException on error
void writeString(const(char)[] s) {
writeExact(s.ptr, s.length);
}
// writes a Unicode string, throws WriteException on error
void writeStringW(const(wchar)[] s) {
writeExact(s.ptr, s.length * wchar.sizeof);
}
// writes data to stream using vprintf() syntax,
// returns number of bytes written
size_t vprintf(const(char)[] format, va_list args) {
// shamelessly stolen from OutBuffer,
// by Walter's permission
char[1024] buffer;
char* p = buffer.ptr;
// Can't use `tempCString()` here as it will result in compilation error:
// "cannot mix core.std.stdlib.alloca() and exception handling".
auto f = toStringz(format);
size_t psize = buffer.length;
size_t count;
while (true) {
version (Windows) {
count = vsnprintf(p, psize, f, args);
if (count != -1)
break;
psize *= 2;
p = cast(char*) alloca(psize);
} else version (Posix) {
count = vsnprintf(p, psize, f, args);
if (count == -1)
psize *= 2;
else if (count >= psize)
psize = count + 1;
else
break;
p = cast(char*) alloca(psize);
} else
throw new Exception("unsupported platform");
}
writeString(p[0 .. count]);
return count;
}
// writes data to stream using printf() syntax,
// returns number of bytes written
size_t printf(const(char)[] format, ...) {
va_list ap;
va_start(ap, format);
auto result = vprintf(format, ap);
va_end(ap);
return result;
}
private void doFormatCallback(dchar c) {
char[4] buf;
auto b = undead.utf.toUTF8(buf, c);
writeString(b);
}
// writes data to stream using writef() syntax,
OutputStream writef(...) {
return writefx(_arguments,_argptr,0);
}
// writes data with trailing newline
OutputStream writefln(...) {
return writefx(_arguments,_argptr,1);
}
// writes data with optional trailing newline
OutputStream writefx(TypeInfo[] arguments, va_list argptr, int newline=false) {
doFormat(&doFormatCallback,arguments,argptr);
if (newline)
writeLine("");
return this;
}
/***
* Copies all data from s into this stream.
* This may throw ReadException or WriteException on failure.
* This restores the file position of s so that it is unchanged.
*/
void copyFrom(Stream s) {
if (seekable) {
ulong pos = s.position;
s.position = 0;
copyFrom(s, s.size);
s.position = pos;
} else {
ubyte[128] buf;
while (!s.eof) {
size_t m = s.readBlock(buf.ptr, buf.length);
writeExact(buf.ptr, m);
}
}
}
/***
* Copy a specified number of bytes from the given stream into this one.
* This may throw ReadException or WriteException on failure.
* Unlike the previous form, this doesn't restore the file position of s.
*/
void copyFrom(Stream s, ulong count) {
ubyte[128] buf;
while (count > 0) {
size_t n = cast(size_t)(count 1)
unget.length = 1; // keep at least 1 so that data ptr stays
}
// close the stream somehow; the default just flushes the buffer
void close() {
if (isopen)
flush();
readEOF = prevCr = isopen = readable = writeable = seekable = false;
}
/***
* Read the entire stream and return it as a string.
* If the stream is not seekable the contents from the current position to eof
* is read and returned.
*/
override string toString() {
if (!readable)
return super.toString();
try
{
size_t pos;
size_t rdlen;
size_t blockSize;
char[] result;
if (seekable) {
ulong orig_pos = position;
scope(exit) position = orig_pos;
position = 0;
blockSize = cast(size_t)size;
result = new char[blockSize];
while (blockSize > 0) {
rdlen = readBlock(&result[pos], blockSize);
pos += rdlen;
blockSize -= rdlen;
}
} else {
blockSize = 4096;
result = new char[blockSize];
while ((rdlen = readBlock(&result[pos], blockSize)) > 0) {
pos += rdlen;
blockSize += rdlen;
result.length = result.length + blockSize;
}
}
return cast(string) result[0 .. pos];
}
catch (Throwable)
{
return super.toString();
}
}
/***
* Get a hash of the stream by reading each byte and using it in a CRC-32
* checksum.
*/
override size_t toHash() @trusted {
if (!readable || !seekable)
return super.toHash();
try
{
ulong pos = position;
scope(exit) position = pos;
CRC32 crc;
crc.start();
position = 0;
ulong len = size;
for (ulong i = 0; i < len; i++)
{
ubyte c;
read(c);
crc.put(c);
}
union resUnion
{
size_t hash;
ubyte[4] crcVal;
}
resUnion res;
res.crcVal = crc.finish();
return res.hash;
}
catch (Throwable)
{
return super.toHash();
}
}
// helper for checking that the stream is readable
final protected void assertReadable() {
if (!readable)
throw new ReadException("Stream is not readable");
}
// helper for checking that the stream is writeable
final protected void assertWriteable() {
if (!writeable)
throw new WriteException("Stream is not writeable");
}
// helper for checking that the stream is seekable
final protected void assertSeekable() {
if (!seekable)
throw new SeekException("Stream is not seekable");
}
unittest { // unit test for Issue 3363
import std.stdio;
immutable fileName = undead.internal.file.deleteme ~ "-issue3363.txt";
auto w = std.stdio.File(fileName, "w");
scope (exit) std.file.remove(fileName);
w.write("one two three");
w.close();
auto r = std.stdio.File(fileName, "r");
const(char)[] constChar;
string str;
char[] chars;
r.readf("%s %s %s", &constChar, &str, &chars);
assert (constChar == "one", constChar);
assert (str == "two", str);
assert (chars == "three", chars);
}
unittest { //unit tests for Issue 1668
void tryFloatRoundtrip(float x, string fmt = "", string pad = "") {
auto s = new MemoryStream();
s.writef(fmt, x, pad);
s.position = 0;
float f;
assert(s.readf(&f));
assert(x == f || (x != x && f != f)); //either equal or both NaN
}
tryFloatRoundtrip(1.0);
tryFloatRoundtrip(1.0, "%f");
tryFloatRoundtrip(1.0, "", " ");
tryFloatRoundtrip(1.0, "%f", " ");
tryFloatRoundtrip(3.14);
tryFloatRoundtrip(3.14, "%f");
tryFloatRoundtrip(3.14, "", " ");
tryFloatRoundtrip(3.14, "%f", " ");
float nan = float.nan;
tryFloatRoundtrip(nan);
tryFloatRoundtrip(nan, "%f");
tryFloatRoundtrip(nan, "", " ");
tryFloatRoundtrip(nan, "%f", " ");
float inf = 1.0/0.0;
tryFloatRoundtrip(inf);
tryFloatRoundtrip(inf, "%f");
tryFloatRoundtrip(inf, "", " ");
tryFloatRoundtrip(inf, "%f", " ");
tryFloatRoundtrip(-inf);
tryFloatRoundtrip(-inf,"%f");
tryFloatRoundtrip(-inf, "", " ");
tryFloatRoundtrip(-inf, "%f", " ");
}
}
/***
* A base class for streams that wrap a source stream with additional
* functionality.
*
* The method implementations forward read/write/seek calls to the
* source stream. A FilterStream can change the position of the source stream
* arbitrarily and may not keep the source stream state in sync with the
* FilterStream, even upon flushing and closing the FilterStream. It is
* recommended to not make any assumptions about the state of the source position
* and read/write state after a FilterStream has acted upon it. Specifc subclasses
* of FilterStream should document how they modify the source stream and if any
* invariants hold true between the source and filter.
*/
class FilterStream : Stream {
private Stream s; // source stream
/// Property indicating when this stream closes to close the source stream as
/// well.
/// Defaults to true.
bool nestClose = true;
/// Construct a FilterStream for the given source.
this(Stream source) {
s = source;
resetSource();
}
// source getter/setter
/***
* Get the current source stream.
*/
final Stream source(){return s;}
/***
* Set the current source stream.
*
* Setting the source stream closes this stream before attaching the new
* source. Attaching an open stream reopens this stream and resets the stream
* state.
*/
void source(Stream s) {
close();
this.s = s;
resetSource();
}
/***
* Indicates the source stream changed state and that this stream should reset
* any readable, writeable, seekable, isopen and buffering flags.
*/
void resetSource() {
if (s !is null) {
readable = s.readable;
writeable = s.writeable;
seekable = s.seekable;
isopen = s.isOpen;
} else {
readable = writeable = seekable = false;
isopen = false;
}
readEOF = prevCr = false;
}
// read from source
override size_t readBlock(void* buffer, size_t size) {
size_t res = s.readBlock(buffer,size);
readEOF = res == 0;
return res;
}
// write to source
override size_t writeBlock(const void* buffer, size_t size) {
return s.writeBlock(buffer,size);
}
// close stream
override void close() {
if (isopen) {
super.close();
if (nestClose)
s.close();
}
}
// seek on source
override ulong seek(long offset, SeekPos whence) {
readEOF = false;
return s.seek(offset,whence);
}
override @property size_t available() { return s.available; }
override void flush() { super.flush(); s.flush(); }
}
/***
* This subclass is for buffering a source stream.
*
* A buffered stream must be
* closed explicitly to ensure the final buffer content is written to the source
* stream. The source stream position is changed according to the block size so
* reading or writing to the BufferedStream may not change the source stream
* position by the same amount.
*/
class BufferedStream : FilterStream {
ubyte[] buffer; // buffer, if any
size_t bufferCurPos; // current position in buffer
size_t bufferLen; // amount of data in buffer
bool bufferDirty = false;
size_t bufferSourcePos; // position in buffer of source stream position
ulong streamPos; // absolute position in source stream
/* Example of relationship between fields:
*
* s ...01234567890123456789012EOF
* buffer |-- --|
* bufferCurPos |
* bufferLen |-- --|
* bufferSourcePos |
*
*/
invariant() {
assert(bufferSourcePos <= bufferLen);
assert(bufferCurPos <= bufferLen);
assert(bufferLen <= buffer.length);
}
enum size_t DefaultBufferSize = 8192;
/***
* Create a buffered stream for the stream source with the buffer size
* bufferSize.
*/
this(Stream source, size_t bufferSize = DefaultBufferSize) {
super(source);
if (bufferSize)
buffer = new ubyte[bufferSize];
}
override protected void resetSource() {
super.resetSource();
streamPos = 0;
bufferLen = bufferSourcePos = bufferCurPos = 0;
bufferDirty = false;
}
// reads block of data of specified size using any buffered data
// returns actual number of bytes read
override size_t readBlock(void* result, size_t len) {
if (len == 0) return 0;
assertReadable();
ubyte* outbuf = cast(ubyte*)result;
size_t readsize = 0;
if (bufferCurPos + len < bufferLen) {
// buffer has all the data so copy it
outbuf[0 .. len] = buffer[bufferCurPos .. bufferCurPos+len];
bufferCurPos += len;
readsize = len;
goto ExitRead;
}
readsize = bufferLen - bufferCurPos;
if (readsize > 0) {
// buffer has some data so copy what is left
outbuf[0 .. readsize] = buffer[bufferCurPos .. bufferLen];
outbuf += readsize;
bufferCurPos += readsize;
len -= readsize;
}
flush();
if (len >= buffer.length) {
// buffer can't hold the data so fill output buffer directly
size_t siz = super.readBlock(outbuf, len);
readsize += siz;
streamPos += siz;
} else {
// read a new block into buffer
bufferLen = super.readBlock(buffer.ptr, buffer.length);
if (bufferLen < len) len = bufferLen;
outbuf[0 .. len] = buffer[0 .. len];
bufferSourcePos = bufferLen;
streamPos += bufferLen;
bufferCurPos = len;
readsize += len;
}
ExitRead:
return readsize;
}
// write block of data of specified size
// returns actual number of bytes written
override size_t writeBlock(const void* result, size_t len) {
assertWriteable();
ubyte* buf = cast(ubyte*)result;
size_t writesize = 0;
if (bufferLen == 0) {
// buffer is empty so fill it if possible
if ((len < buffer.length) && (readable)) {
// read in data if the buffer is currently empty
bufferLen = s.readBlock(buffer.ptr, buffer.length);
bufferSourcePos = bufferLen;
streamPos += bufferLen;
} else if (len >= buffer.length) {
// buffer can't hold the data so write it directly and exit
writesize = s.writeBlock(buf,len);
streamPos += writesize;
goto ExitWrite;
}
}
if (bufferCurPos + len <= buffer.length) {
// buffer has space for all the data so copy it and exit
buffer[bufferCurPos .. bufferCurPos+len] = buf[0 .. len];
bufferCurPos += len;
bufferLen = bufferCurPos > bufferLen ? bufferCurPos : bufferLen;
writesize = len;
bufferDirty = true;
goto ExitWrite;
}
writesize = buffer.length - bufferCurPos;
if (writesize > 0) {
// buffer can take some data
buffer[bufferCurPos .. buffer.length] = buf[0 .. writesize];
bufferCurPos = bufferLen = buffer.length;
buf += writesize;
len -= writesize;
bufferDirty = true;
}
assert(bufferCurPos == buffer.length);
assert(bufferLen == buffer.length);
flush();
writesize += writeBlock(buf,len);
ExitWrite:
return writesize;
}
override ulong seek(long offset, SeekPos whence) {
assertSeekable();
if ((whence != SeekPos.Current) ||
(offset + bufferCurPos < 0) ||
(offset + bufferCurPos >= bufferLen)) {
flush();
streamPos = s.seek(offset,whence);
} else {
bufferCurPos += offset;
}
readEOF = false;
return streamPos-bufferSourcePos+bufferCurPos;
}
// Buffered readLine - Dave Fladebo
// reads a line, terminated by either CR, LF, CR/LF, or EOF
// reusing the memory in buffer if result will fit, otherwise
// will reallocate (using concatenation)
template TreadLine(T) {
T[] readLine(T[] inBuffer)
{
size_t lineSize = 0;
bool haveCR = false;
T c = '\0';
size_t idx = 0;
ubyte* pc = cast(ubyte*)&c;
L0:
for(;;) {
size_t start = bufferCurPos;
L1:
foreach(ubyte b; buffer[start .. bufferLen]) {
bufferCurPos++;
pc[idx] = b;
if(idx < T.sizeof - 1) {
idx++;
continue L1;
} else {
idx = 0;
}
if(c == '\n' || haveCR) {
if(haveCR && c != '\n') bufferCurPos--;
break L0;
} else {
if(c == '\r') {
haveCR = true;
} else {
if(lineSize < inBuffer.length) {
inBuffer[lineSize] = c;
} else {
inBuffer ~= c;
}
lineSize++;
}
}
}
flush();
size_t res = super.readBlock(buffer.ptr, buffer.length);
if(!res) break L0; // EOF
bufferSourcePos = bufferLen = res;
streamPos += res;
}
return inBuffer[0 .. lineSize];
}
} // template TreadLine(T)
override char[] readLine(char[] inBuffer) {
if (ungetAvailable())
return super.readLine(inBuffer);
else
return TreadLine!(char).readLine(inBuffer);
}
alias readLine = Stream.readLine;
override wchar[] readLineW(wchar[] inBuffer) {
if (ungetAvailable())
return super.readLineW(inBuffer);
else
return TreadLine!(wchar).readLine(inBuffer);
}
alias readLineW = Stream.readLineW;
override void flush()
out {
assert(bufferCurPos == 0);
assert(bufferSourcePos == 0);
assert(bufferLen == 0);
}
body {
if (writeable && bufferDirty) {
if (bufferSourcePos != 0 && seekable) {
// move actual file pointer to front of buffer
streamPos = s.seek(-bufferSourcePos, SeekPos.Current);
}
// write buffer out
bufferSourcePos = s.writeBlock(buffer.ptr, bufferLen);
if (bufferSourcePos != bufferLen) {
throw new WriteException("Unable to write to stream");
}
}
super.flush();
long diff = cast(long)bufferCurPos-bufferSourcePos;
if (diff != 0 && seekable) {
// move actual file pointer to current position
streamPos = s.seek(diff, SeekPos.Current);
}
// reset buffer data to be empty
bufferSourcePos = bufferCurPos = bufferLen = 0;
bufferDirty = false;
}
// returns true if end of stream is reached, false otherwise
override @property bool eof() {
if ((buffer.length == 0) || !readable) {
return super.eof;
}
// some simple tests to avoid flushing
if (ungetAvailable() || bufferCurPos != bufferLen)
return false;
if (bufferLen == buffer.length)
flush();
size_t res = super.readBlock(&buffer[bufferLen],buffer.length-bufferLen);
bufferSourcePos += res;
bufferLen += res;
streamPos += res;
return readEOF;
}
// returns size of stream
override @property ulong size() {
if (bufferDirty) flush();
return s.size;
}
// returns estimated number of bytes available for immediate reading
override @property size_t available() {
return bufferLen - bufferCurPos;
}
}
/// An exception for File errors.
class StreamFileException: StreamException {
/// Construct a StreamFileException with given error message.
this(string msg) { super(msg); }
}
/// An exception for errors during File.open.
class OpenException: StreamFileException {
/// Construct an OpenFileException with given error message.
this(string msg) { super(msg); }
}
/// Specifies the $(LREF File) access mode used when opening the file.
enum FileMode {
In = 1, /// Opens the file for reading.
Out = 2, /// Opens the file for writing.
OutNew = 6, /// Opens the file for writing, creates a new file if it doesn't exist.
Append = 10 /// Opens the file for writing, appending new data to the end of the file.
}
version (Windows) {
private import core.sys.windows.windows;
extern (Windows) {
void FlushFileBuffers(HANDLE hFile);
DWORD GetFileType(HANDLE hFile);
}
}
version (Posix) {
private import core.sys.posix.fcntl;
private import core.sys.posix.unistd;
alias HANDLE = int;
}
/// This subclass is for unbuffered file system streams.
class File: Stream {
version (Windows) {
private HANDLE hFile;
}
version (Posix) {
private HANDLE hFile = -1;
}
this() {
super();
version (Windows) {
hFile = null;
}
version (Posix) {
hFile = -1;
}
isopen = false;
}
// opens existing handle; use with care!
this(HANDLE hFile, FileMode mode) {
super();
this.hFile = hFile;
readable = cast(bool)(mode & FileMode.In);
writeable = cast(bool)(mode & FileMode.Out);
version(Windows) {
seekable = GetFileType(hFile) == 1; // FILE_TYPE_DISK
} else {
auto result = lseek(hFile, 0, 0);
seekable = (result != ~0);
}
}
/***
* Create the stream with no open file, an open file in read mode, or an open
* file with explicit file mode.
* mode, if given, is a combination of FileMode.In
* (indicating a file that can be read) and FileMode.Out (indicating a file
* that can be written).
* Opening a file for reading that doesn't exist will error.
* Opening a file for writing that doesn't exist will create the file.
* The FileMode.OutNew mode will open the file for writing and reset the
* length to zero.
* The FileMode.Append mode will open the file for writing and move the
* file position to the end of the file.
*/
this(string filename, FileMode mode = FileMode.In)
{
this();
open(filename, mode);
}
/***
* Open a file for the stream, in an identical manner to the constructors.
* If an error occurs an OpenException is thrown.
*/
void open(string filename, FileMode mode = FileMode.In) {
close();
int access, share, createMode;
parseMode(mode, access, share, createMode);
seekable = true;
readable = cast(bool)(mode & FileMode.In);
writeable = cast(bool)(mode & FileMode.Out);
version (Windows) {
hFile = CreateFileW(filename.tempCString!wchar(), access, share,
null, createMode, 0, null);
isopen = hFile != INVALID_HANDLE_VALUE;
}
version (Posix) {
hFile = core.sys.posix.fcntl.open(filename.tempCString(), access | createMode, share);
isopen = hFile != -1;
}
if (!isopen)
throw new OpenException(cast(string) ("Cannot open or create file '"
~ filename ~ "'"));
else if ((mode & FileMode.Append) == FileMode.Append)
seekEnd(0);
}
private void parseMode(int mode,
out int access,
out int share,
out int createMode) {
version (Windows) {
share |= FILE_SHARE_READ | FILE_SHARE_WRITE;
if (mode & FileMode.In) {
access |= GENERIC_READ;
createMode = OPEN_EXISTING;
}
if (mode & FileMode.Out) {
access |= GENERIC_WRITE;
createMode = OPEN_ALWAYS; // will create if not present
}
if ((mode & FileMode.OutNew) == FileMode.OutNew) {
createMode = CREATE_ALWAYS; // resets file
}
}
version (Posix) {
share = octal!666;
if (mode & FileMode.In) {
access = O_RDONLY;
}
if (mode & FileMode.Out) {
createMode = O_CREAT; // will create if not present
access = O_WRONLY;
}
if (access == (O_WRONLY | O_RDONLY)) {
access = O_RDWR;
}
if ((mode & FileMode.OutNew) == FileMode.OutNew) {
access |= O_TRUNC; // resets file
}
}
}
/// Create a file for writing.
void create(string filename) {
create(filename, FileMode.OutNew);
}
/// ditto
void create(string filename, FileMode mode) {
close();
open(filename, mode | FileMode.OutNew);
}
/// Close the current file if it is open; otherwise it does nothing.
override void close() {
if (isopen) {
super.close();
if (hFile) {
version (Windows) {
CloseHandle(hFile);
hFile = null;
} else version (Posix) {
core.sys.posix.unistd.close(hFile);
hFile = -1;
}
}
}
}
// destructor, closes file if still opened
~this() { close(); }
version (Windows) {
// returns size of stream
override @property ulong size() {
assertSeekable();
uint sizehi;
uint sizelow = GetFileSize(hFile,&sizehi);
return (cast(ulong)sizehi << 32) + sizelow;
}
}
override size_t readBlock(void* buffer, size_t size) {
assertReadable();
version (Windows) {
auto dwSize = to!DWORD(size);
ReadFile(hFile, buffer, dwSize, &dwSize, null);
size = dwSize;
} else version (Posix) {
size = core.sys.posix.unistd.read(hFile, buffer, size);
if (size == -1)
size = 0;
}
readEOF = (size == 0);
return size;
}
override size_t writeBlock(const void* buffer, size_t size) {
assertWriteable();
version (Windows) {
auto dwSize = to!DWORD(size);
WriteFile(hFile, buffer, dwSize, &dwSize, null);
size = dwSize;
} else version (Posix) {
size = core.sys.posix.unistd.write(hFile, buffer, size);
if (size == -1)
size = 0;
}
return size;
}
override ulong seek(long offset, SeekPos rel) {
assertSeekable();
version (Windows) {
int hi = cast(int)(offset>>32);
uint low = SetFilePointer(hFile, cast(int)offset, &hi, rel);
if ((low == INVALID_SET_FILE_POINTER) && (GetLastError() != 0))
throw new SeekException("unable to move file pointer");
ulong result = (cast(ulong)hi << 32) + low;
} else version (Posix) {
auto result = lseek(hFile, cast(off_t)offset, rel);
if (result == cast(typeof(result))-1)
throw new SeekException("unable to move file pointer");
}
readEOF = false;
return cast(ulong)result;
}
/***
* For a seekable file returns the difference of the size and position and
* otherwise returns 0.
*/
override @property size_t available() {
if (seekable) {
ulong lavail = size - position;
if (lavail > size_t.max) lavail = size_t.max;
return cast(size_t)lavail;
}
return 0;
}
// OS-specific property, just in case somebody wants
// to mess with underlying API
HANDLE handle() { return hFile; }
// run a few tests
unittest {
File file = new File;
int i = 666;
auto stream_file = undead.internal.file.deleteme ~ "-stream.$$$";
file.create(stream_file);
// should be ok to write
assert(file.writeable);
file.writeLine("Testing stream.d:");
file.writeString("Hello, world!");
file.write(i);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
// we must be at the end of file
assert(file.eof);
file.close();
// no operations are allowed when file is closed
assert(!file.readable && !file.writeable && !file.seekable);
file.open(stream_file);
// should be ok to read
assert(file.readable);
assert(file.available == file.size);
char[] line = file.readLine();
char[] exp = "Testing stream.d:".dup;
assert(line[0] == 'T');
assert(line.length == exp.length);
assert(!std.algorithm.cmp(line, "Testing stream.d:"));
// jump over "Hello, "
file.seek(7, SeekPos.Current);
version (Windows)
assert(file.position == 19 + 7);
version (Posix)
assert(file.position == 18 + 7);
assert(!std.algorithm.cmp(file.readString(6), "world!"));
i = 0; file.read(i);
assert(i == 666);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
// we must be at the end of file
assert(file.eof);
file.close();
file.open(stream_file,FileMode.OutNew | FileMode.In);
file.writeLine("Testing stream.d:");
file.writeLine("Another line");
file.writeLine("");
file.writeLine("That was blank");
file.position = 0;
char[][] lines;
foreach(char[] line; file) {
lines ~= line.dup;
}
assert( lines.length == 4 );
assert( lines[0] == "Testing stream.d:");
assert( lines[1] == "Another line");
assert( lines[2] == "");
assert( lines[3] == "That was blank");
file.position = 0;
lines = new char[][4];
foreach(ulong n, char[] line; file) {
lines[cast(size_t)(n-1)] = line.dup;
}
assert( lines[0] == "Testing stream.d:");
assert( lines[1] == "Another line");
assert( lines[2] == "");
assert( lines[3] == "That was blank");
file.close();
std.file.remove(stream_file);
}
}
/***
* This subclass is for buffered file system streams.
*
* It is a convenience class for wrapping a File in a BufferedStream.
* A buffered stream must be closed explicitly to ensure the final buffer
* content is written to the file.
*/
class BufferedFile: BufferedStream {
/// opens file for reading
this() { super(new File()); }
/// opens file in requested mode and buffer size
this(string filename, FileMode mode = FileMode.In,
size_t bufferSize = DefaultBufferSize) {
super(new File(filename,mode),bufferSize);
}
/// opens file for reading with requested buffer size
this(File file, size_t bufferSize = DefaultBufferSize) {
super(file,bufferSize);
}
/// opens existing handle; use with care!
this(HANDLE hFile, FileMode mode, size_t buffersize = DefaultBufferSize) {
super(new File(hFile,mode),buffersize);
}
/// opens file in requested mode
void open(string filename, FileMode mode = FileMode.In) {
File sf = cast(File)s;
sf.open(filename,mode);
resetSource();
}
/// creates file in requested mode
void create(string filename, FileMode mode = FileMode.OutNew) {
File sf = cast(File)s;
sf.create(filename,mode);
resetSource();
}
// run a few tests same as File
unittest {
BufferedFile file = new BufferedFile;
int i = 666;
auto stream_file = undead.internal.file.deleteme ~ "-stream.$$$";
file.create(stream_file);
// should be ok to write
assert(file.writeable);
file.writeLine("Testing stream.d:");
file.writeString("Hello, world!");
file.write(i);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
// we must be at the end of file
assert(file.eof);
long oldsize = cast(long)file.size;
file.close();
// no operations are allowed when file is closed
assert(!file.readable && !file.writeable && !file.seekable);
file.open(stream_file);
// should be ok to read
assert(file.readable);
// test getc/ungetc and size
char c1 = file.getc();
file.ungetc(c1);
assert( file.size == oldsize );
assert(!std.algorithm.cmp(file.readLine(), "Testing stream.d:"));
// jump over "Hello, "
file.seek(7, SeekPos.Current);
version (Windows)
assert(file.position == 19 + 7);
version (Posix)
assert(file.position == 18 + 7);
assert(!std.algorithm.cmp(file.readString(6), "world!"));
i = 0; file.read(i);
assert(i == 666);
// string#1 + string#2 + int should give exacly that
version (Windows)
assert(file.position == 19 + 13 + 4);
version (Posix)
assert(file.position == 18 + 13 + 4);
// we must be at the end of file
assert(file.eof);
file.close();
std.file.remove(stream_file);
}
}
/// UTF byte-order-mark signatures
enum BOM {
UTF8, /// UTF-8
UTF16LE, /// UTF-16 Little Endian
UTF16BE, /// UTF-16 Big Endian
UTF32LE, /// UTF-32 Little Endian
UTF32BE, /// UTF-32 Big Endian
}
private enum int NBOMS = 5;
immutable Endian[NBOMS] BOMEndian =
[ std.system.endian,
Endian.littleEndian, Endian.bigEndian,
Endian.littleEndian, Endian.bigEndian
];
immutable ubyte[][NBOMS] ByteOrderMarks =
[ [0xEF, 0xBB, 0xBF],
[0xFF, 0xFE],
[0xFE, 0xFF],
[0xFF, 0xFE, 0x00, 0x00],
[0x00, 0x00, 0xFE, 0xFF]
];
/***
* This subclass wraps a stream with big-endian or little-endian byte order
* swapping.
*
* UTF Byte-Order-Mark (BOM) signatures can be read and deduced or
* written.
* Note that an EndianStream should not be used as the source of another
* FilterStream since a FilterStream call the source with byte-oriented
* read/write requests and the EndianStream will not perform any byte swapping.
* The EndianStream reads and writes binary data (non-getc functions) in a
* one-to-one
* manner with the source stream so the source stream's position and state will be
* kept in sync with the EndianStream if only non-getc functions are called.
*/
class EndianStream : FilterStream {
Endian endian; /// Endianness property of the source stream.
/***
* Create the endian stream for the source stream source with endianness end.
* The default endianness is the native byte order.
* The Endian type is defined
* in the std.system module.
*/
this(Stream source, Endian end = std.system.endian) {
super(source);
endian = end;
}
/***
* Return -1 if no BOM and otherwise read the BOM and return it.
*
* If there is no BOM or if bytes beyond the BOM are read then the bytes read
* are pushed back onto the ungetc buffer or ungetcw buffer.
* Pass ungetCharSize == 2 to use
* ungetcw instead of ungetc when no BOM is present.
*/
int readBOM(int ungetCharSize = 1) {
ubyte[4] BOM_buffer;
int n = 0; // the number of read bytes
int result = -1; // the last match or -1
for (int i=0; i < NBOMS; ++i) {
int j;
immutable ubyte[] bom = ByteOrderMarks[i];
for (j=0; j < bom.length; ++j) {
if (n <= j) { // have to read more
if (eof)
break;
readExact(&BOM_buffer[n++],1);
}
if (BOM_buffer[j] != bom[j])
break;
}
if (j == bom.length) // found a match
result = i;
}
ptrdiff_t m = 0;
if (result != -1) {
endian = BOMEndian[result]; // set stream endianness
m = ByteOrderMarks[result].length;
}
if ((ungetCharSize == 1 && result == -1) || (result == BOM.UTF8)) {
while (n-- > m)
ungetc(BOM_buffer[n]);
} else { // should eventually support unget for dchar as well
if (n & 1) // make sure we have an even number of bytes
readExact(&BOM_buffer[n++],1);
while (n > m) {
n -= 2;
wchar cw = *(cast(wchar*)&BOM_buffer[n]);
fixBO(&cw,2);
ungetcw(cw);
}
}
return result;
}
/***
* Correct the byte order of buffer to match native endianness.
* size must be even.
*/
final void fixBO(const(void)* buffer, size_t size) {
if (endian != std.system.endian) {
ubyte* startb = cast(ubyte*)buffer;
uint* start = cast(uint*)buffer;
switch (size) {
case 0: break;
case 2: {
ubyte x = *startb;
*startb = *(startb+1);
*(startb+1) = x;
break;
}
case 4: {
*start = bswap(*start);
break;
}
default: {
uint* end = cast(uint*)(buffer + size - uint.sizeof);
while (start < end) {
uint x = bswap(*start);
*start = bswap(*end);
*end = x;
++start;
--end;
}
startb = cast(ubyte*)start;
ubyte* endb = cast(ubyte*)end;
auto len = uint.sizeof - (startb - endb);
if (len > 0)
fixBO(startb,len);
}
}
}
}
/***
* Correct the byte order of the given buffer in blocks of the given size and
* repeated the given number of times.
* size must be even.
*/
final void fixBlockBO(void* buffer, uint size, size_t repeat) {
while (repeat--) {
fixBO(buffer,size);
buffer += size;
}
}
override void read(out byte x) { readExact(&x, x.sizeof); }
override void read(out ubyte x) { readExact(&x, x.sizeof); }
override void read(out short x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out ushort x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out int x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out uint x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out long x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out ulong x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out float x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out double x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out real x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out ifloat x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out idouble x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out ireal x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out cfloat x) { readExact(&x, x.sizeof); fixBlockBO(&x,float.sizeof,2); }
override void read(out cdouble x) { readExact(&x, x.sizeof); fixBlockBO(&x,double.sizeof,2); }
override void read(out creal x) { readExact(&x, x.sizeof); fixBlockBO(&x,real.sizeof,2); }
override void read(out char x) { readExact(&x, x.sizeof); }
override void read(out wchar x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override void read(out dchar x) { readExact(&x, x.sizeof); fixBO(&x,x.sizeof); }
override wchar getcw() {
wchar c;
if (prevCr) {
prevCr = false;
c = getcw();
if (c != '\n')
return c;
}
if (unget.length > 1) {
c = unget[unget.length - 1];
unget.length = unget.length - 1;
} else {
void* buf = &c;
size_t n = readBlock(buf,2);
if (n == 1 && readBlock(buf+1,1) == 0)
throw new ReadException("not enough data in stream");
fixBO(&c,c.sizeof);
}
return c;
}
override wchar[] readStringW(size_t length) {
wchar[] result = new wchar[length];
readExact(result.ptr, length * wchar.sizeof);
fixBlockBO(result.ptr, wchar.sizeof, length);
return result;
}
/// Write the specified BOM b to the source stream.
void writeBOM(BOM b) {
immutable ubyte[] bom = ByteOrderMarks[b];
writeBlock(bom.ptr, bom.length);
}
override void write(byte x) { writeExact(&x, x.sizeof); }
override void write(ubyte x) { writeExact(&x, x.sizeof); }
override void write(short x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(ushort x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(int x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(uint x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(long x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(ulong x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(float x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(double x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(real x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(ifloat x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(idouble x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(ireal x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(cfloat x) { fixBlockBO(&x,float.sizeof,2); writeExact(&x, x.sizeof); }
override void write(cdouble x) { fixBlockBO(&x,double.sizeof,2); writeExact(&x, x.sizeof); }
override void write(creal x) { fixBlockBO(&x,real.sizeof,2); writeExact(&x, x.sizeof); }
override void write(char x) { writeExact(&x, x.sizeof); }
override void write(wchar x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void write(dchar x) { fixBO(&x,x.sizeof); writeExact(&x, x.sizeof); }
override void writeStringW(const(wchar)[] str) {
foreach(wchar cw;str) {
fixBO(&cw,2);
s.writeExact(&cw, 2);
}
}
override @property bool eof() { return s.eof && !ungetAvailable(); }
override @property ulong size() { return s.size; }
unittest {
MemoryStream m;
m = new MemoryStream ();
EndianStream em = new EndianStream(m,Endian.bigEndian);
uint x = 0x11223344;
em.write(x);
assert( m.data[0] == 0x11 );
assert( m.data[1] == 0x22 );
assert( m.data[2] == 0x33 );
assert( m.data[3] == 0x44 );
em.position = 0;
ushort x2 = 0x5566;
em.write(x2);
assert( m.data[0] == 0x55 );
assert( m.data[1] == 0x66 );
em.position = 0;
static ubyte[12] x3 = [1,2,3,4,5,6,7,8,9,10,11,12];
em.fixBO(x3.ptr,12);
if (std.system.endian == Endian.littleEndian) {
assert( x3[0] == 12 );
assert( x3[1] == 11 );
assert( x3[2] == 10 );
assert( x3[4] == 8 );
assert( x3[5] == 7 );
assert( x3[6] == 6 );
assert( x3[8] == 4 );
assert( x3[9] == 3 );
assert( x3[10] == 2 );
assert( x3[11] == 1 );
}
em.endian = Endian.littleEndian;
em.write(x);
assert( m.data[0] == 0x44 );
assert( m.data[1] == 0x33 );
assert( m.data[2] == 0x22 );
assert( m.data[3] == 0x11 );
em.position = 0;
em.write(x2);
assert( m.data[0] == 0x66 );
assert( m.data[1] == 0x55 );
em.position = 0;
em.fixBO(x3.ptr,12);
if (std.system.endian == Endian.bigEndian) {
assert( x3[0] == 12 );
assert( x3[1] == 11 );
assert( x3[2] == 10 );
assert( x3[4] == 8 );
assert( x3[5] == 7 );
assert( x3[6] == 6 );
assert( x3[8] == 4 );
assert( x3[9] == 3 );
assert( x3[10] == 2 );
assert( x3[11] == 1 );
}
em.writeBOM(BOM.UTF8);
assert( m.position == 3 );
assert( m.data[0] == 0xEF );
assert( m.data[1] == 0xBB );
assert( m.data[2] == 0xBF );
em.writeString ("Hello, world");
em.position = 0;
assert( m.position == 0 );
assert( em.readBOM() == BOM.UTF8 );
assert( m.position == 3 );
assert( em.getc() == 'H' );
em.position = 0;
em.writeBOM(BOM.UTF16BE);
assert( m.data[0] == 0xFE );
assert( m.data[1] == 0xFF );
em.position = 0;
em.writeBOM(BOM.UTF16LE);
assert( m.data[0] == 0xFF );
assert( m.data[1] == 0xFE );
em.position = 0;
em.writeString ("Hello, world");
em.position = 0;
assert( em.readBOM() == -1 );
assert( em.getc() == 'H' );
assert( em.getc() == 'e' );
assert( em.getc() == 'l' );
assert( em.getc() == 'l' );
em.position = 0;
}
}
/***
* Parameterized subclass that wraps an array-like buffer with a stream
* interface.
*
* The type Buffer must support the length property, opIndex and opSlice.
* Compile in release mode when directly instantiating a TArrayStream to avoid
* link errors.
*/
class TArrayStream(Buffer): Stream {
Buffer buf; // current data
ulong len; // current data length
ulong cur; // current file position
/// Create the stream for the the buffer buf. Non-copying.
this(Buffer buf) {
super ();
this.buf = buf;
this.len = buf.length;
readable = writeable = seekable = true;
}
// ensure subclasses don't violate this
invariant() {
assert(len <= buf.length);
assert(cur <= len);
}
override size_t readBlock(void* buffer, size_t size) {
assertReadable();
ubyte* cbuf = cast(ubyte*) buffer;
if (len - cur < size)
size = cast(size_t)(len - cur);
ubyte[] ubuf = cast(ubyte[])buf[cast(size_t)cur .. cast(size_t)(cur + size)];
cbuf[0 .. size] = ubuf[];
cur += size;
return size;
}
override size_t writeBlock(const void* buffer, size_t size) {
assertWriteable();
ubyte* cbuf = cast(ubyte*) buffer;
ulong blen = buf.length;
if (cur + size > blen)
size = cast(size_t)(blen - cur);
ubyte[] ubuf = cast(ubyte[])buf[cast(size_t)cur .. cast(size_t)(cur + size)];
ubuf[] = cbuf[0 .. size];
cur += size;
if (cur > len)
len = cur;
return size;
}
override ulong seek(long offset, SeekPos rel) {
assertSeekable();
long scur; // signed to saturate to 0 properly
switch (rel) {
case SeekPos.Set: scur = offset; break;
case SeekPos.Current: scur = cast(long)(cur + offset); break;
case SeekPos.End: scur = cast(long)(len + offset); break;
default:
assert(0);
}
if (scur < 0)
cur = 0;
else if (scur > len)
cur = len;
else
cur = cast(ulong)scur;
return cur;
}
override @property size_t available () { return cast(size_t)(len - cur); }
/// Get the current memory data in total.
@property ubyte[] data() {
if (len > size_t.max)
throw new StreamException("Stream too big");
const(void)[] res = buf[0 .. cast(size_t)len];
return cast(ubyte[])res;
}
override string toString() {
// assume data is UTF8
return to!(string)(cast(char[])data);
}
}
/* Test the TArrayStream */
unittest {
char[100] buf;
TArrayStream!(char[]) m;
m = new TArrayStream!(char[]) (buf);
assert (m.isOpen);
m.writeString ("Hello, world");
assert (m.position == 12);
assert (m.available == 88);
assert (m.seekSet (0) == 0);
assert (m.available == 100);
assert (m.seekCur (4) == 4);
assert (m.available == 96);
assert (m.seekEnd (-8) == 92);
assert (m.available == 8);
assert (m.size == 100);
assert (m.seekSet (4) == 4);
assert (m.readString (4) == "o, w");
m.writeString ("ie");
assert (buf[0..12] == "Hello, wield");
assert (m.position == 10);
assert (m.available == 90);
assert (m.size == 100);
m.seekSet (0);
assert (m.printf ("Answer is %d", 42) == 12);
assert (buf[0..12] == "Answer is 42");
}
/// This subclass reads and constructs an array of bytes in memory.
class MemoryStream: TArrayStream!(ubyte[]) {
/// Create the output buffer and setup for reading, writing, and seeking.
// clear to an empty buffer.
this() { this(cast(ubyte[]) null); }
/***
* Create the output buffer and setup for reading, writing, and seeking.
* Load it with specific input data.
*/
this(ubyte[] buf) { super (buf); }
this(byte[] buf) { this(cast(ubyte[]) buf); } /// ditto
this(char[] buf) { this(cast(ubyte[]) buf); } /// ditto
/// Ensure the stream can write count extra bytes from cursor position without an allocation.
void reserve(size_t count) {
if (cur + count > buf.length)
buf.length = cast(uint)((cur + count) * 2);
}
override size_t writeBlock(const void* buffer, size_t size) {
reserve(size);
return super.writeBlock(buffer,size);
}
unittest {
MemoryStream m;
m = new MemoryStream ();
assert (m.isOpen);
m.writeString ("Hello, world");
assert (m.position == 12);
assert (m.seekSet (0) == 0);
assert (m.available == 12);
assert (m.seekCur (4) == 4);
assert (m.available == 8);
assert (m.seekEnd (-8) == 4);
assert (m.available == 8);
assert (m.size == 12);
assert (m.readString (4) == "o, w");
m.writeString ("ie");
assert (cast(char[]) m.data == "Hello, wield");
m.seekEnd (0);
m.writeString ("Foo");
assert (m.position == 15);
assert (m.available == 0);
m.writeString ("Foo foo foo foo foo foo foo");
assert (m.position == 42);
m.position = 0;
assert (m.available == 42);
m.writef("%d %d %s",100,345,"hello");
auto str = m.toString();
assert (str[0..13] == "100 345 hello", str[0 .. 13]);
assert (m.available == 29);
assert (m.position == 13);
MemoryStream m2;
m.position = 3;
m2 = new MemoryStream ();
m2.writeString("before");
m2.copyFrom(m,10);
str = m2.toString();
assert (str[0..16] == "before 345 hello");
m2.position = 3;
m2.copyFrom(m);
auto str2 = m.toString();
str = m2.toString();
assert (str == ("bef" ~ str2));
}
}
import std.mmfile;
/***
* This subclass wraps a memory-mapped file with the stream API.
* See std.mmfile module.
*/
class MmFileStream : TArrayStream!(MmFile) {
/// Create stream wrapper for file.
this(MmFile file) {
super (file);
MmFile.Mode mode = file.mode();
writeable = mode > MmFile.Mode.read;
}
override void flush() {
if (isopen) {
super.flush();
buf.flush();
}
}
override void close() {
if (isopen) {
super.close();
buf.destroy();
buf = null;
}
}
}
unittest {
auto test_file = undead.internal.file.deleteme ~ "-testing.txt";
MmFile mf = new MmFile(test_file,MmFile.Mode.readWriteNew,100,null);
MmFileStream m;
m = new MmFileStream (mf);
m.writeString ("Hello, world");
assert (m.position == 12);
assert (m.seekSet (0) == 0);
assert (m.seekCur (4) == 4);
assert (m.seekEnd (-8) == 92);
assert (m.size == 100);
assert (m.seekSet (4));
assert (m.readString (4) == "o, w");
m.writeString ("ie");
ubyte[] dd = m.data;
assert ((cast(char[]) dd)[0 .. 12] == "Hello, wield");
m.position = 12;
m.writeString ("Foo");
assert (m.position == 15);
m.writeString ("Foo foo foo foo foo foo foo");
assert (m.position == 42);
m.close();
mf = new MmFile(test_file);
m = new MmFileStream (mf);
assert (!m.writeable);
char[] str = m.readString(12);
assert (str == "Hello, wield");
m.close();
std.file.remove(test_file);
}
/***
* This subclass slices off a portion of another stream, making seeking relative
* to the boundaries of the slice.
*
* It could be used to section a large file into a
* set of smaller files, such as with tar archives. Reading and writing a
* SliceStream does not modify the position of the source stream if it is
* seekable.
*/
class SliceStream : FilterStream {
private {
ulong pos; // our position relative to low
ulong low; // low stream offset.
ulong high; // high stream offset.
bool bounded; // upper-bounded by high.
}
/***
* Indicate both the source stream to use for reading from and the low part of
* the slice.
*
* The high part of the slice is dependent upon the end of the source
* stream, so that if you write beyond the end it resizes the stream normally.
*/
this (Stream s, ulong low)
in {
assert (low <= s.size);
}
body {
super(s);
this.low = low;
this.high = 0;
this.bounded = false;
}
/***
* Indicate the high index as well.
*
* Attempting to read or write past the high
* index results in the end being clipped off.
*/
this (Stream s, ulong low, ulong high)
in {
assert (low <= high);
assert (high <= s.size);
}
body {
super(s);
this.low = low;
this.high = high;
this.bounded = true;
}
invariant() {
if (bounded)
assert (pos <= high - low);
else
// size() does not appear to be const, though it should be
assert (pos <= (cast()s).size - low);
}
override size_t readBlock (void *buffer, size_t size) {
assertReadable();
if (bounded && size > high - low - pos)
size = cast(size_t)(high - low - pos);
ulong bp = s.position;
if (seekable)
s.position = low + pos;
size_t ret = super.readBlock(buffer, size);
if (seekable) {
pos = s.position - low;
s.position = bp;
}
return ret;
}
override size_t writeBlock (const void *buffer, size_t size) {
assertWriteable();
if (bounded && size > high - low - pos)
size = cast(size_t)(high - low - pos);
ulong bp = s.position;
if (seekable)
s.position = low + pos;
size_t ret = s.writeBlock(buffer, size);
if (seekable) {
pos = s.position - low;
s.position = bp;
}
return ret;
}
override ulong seek(long offset, SeekPos rel) {
assertSeekable();
long spos;
switch (rel) {
case SeekPos.Set:
spos = offset;
break;
case SeekPos.Current:
spos = cast(long)(pos + offset);
break;
case SeekPos.End:
if (bounded)
spos = cast(long)(high - low + offset);
else
spos = cast(long)(s.size - low + offset);
break;
default:
assert(0);
}
if (spos < 0)
pos = 0;
else if (bounded && spos > high - low)
pos = high - low;
else if (!bounded && spos > s.size - low)
pos = s.size - low;
else
pos = cast(ulong)spos;
readEOF = false;
return pos;
}
override @property size_t available() {
size_t res = s.available;
ulong bp = s.position;
if (bp <= pos+low && pos+low <= bp+res) {
if (!bounded || bp+res <= high)
return cast(size_t)(bp + res - pos - low);
else if (high <= bp+res)
return cast(size_t)(high - pos - low);
}
return 0;
}
unittest {
MemoryStream m;
SliceStream s;
m = new MemoryStream ((cast(char[])"Hello, world").dup);
s = new SliceStream (m, 4, 8);
assert (s.size == 4);
assert (m.position == 0);
assert (s.position == 0);
assert (m.available == 12);
assert (s.available == 4);
assert (s.writeBlock (cast(char *) "Vroom", 5) == 4);
assert (m.position == 0);
assert (s.position == 4);
assert (m.available == 12);
assert (s.available == 0);
assert (s.seekEnd (-2) == 2);
assert (s.available == 2);
assert (s.seekEnd (2) == 4);
assert (s.available == 0);
assert (m.position == 0);
assert (m.available == 12);
m.seekEnd(0);
m.writeString("\nBlaho");
assert (m.position == 18);
assert (m.available == 0);
assert (s.position == 4);
assert (s.available == 0);
s = new SliceStream (m, 4);
assert (s.size == 14);
assert (s.toString () == "Vrooorld\nBlaho");
s.seekEnd (0);
assert (s.available == 0);
s.writeString (", etcetera.");
assert (s.position == 25);
assert (s.seekSet (0) == 0);
assert (s.size == 25);
assert (m.position == 18);
assert (m.size == 29);
assert (m.toString() == "HellVrooorld\nBlaho, etcetera.");
}
}
undeaD-1.0.10/src/undead/string.d 0000664 0000000 0000000 00000015765 13463741136 0016521 0 ustar 00root root 0000000 0000000 /**
* Contains the obsolete pattern matching functions from Phobos'
* `std.string`.
*/
module undead.string;
import std.traits;
/***********************************************
* See if character c is in the pattern.
* Patterns:
*
* A $(I pattern) is an array of characters much like a $(I character
* class) in regular expressions. A sequence of characters
* can be given, such as "abcde". The '-' can represent a range
* of characters, as "a-e" represents the same pattern as "abcde".
* "a-fA-F0-9" represents all the hex characters.
* If the first character of a pattern is '^', then the pattern
* is negated, i.e. "^0-9" means any character except a digit.
* The functions inPattern, $(B countchars), $(B removeschars),
* and $(B squeeze) use patterns.
*
* Note: In the future, the pattern syntax may be improved
* to be more like regular expression character classes.
*/
bool inPattern(S)(dchar c, in S pattern) @safe pure @nogc
if (isSomeString!S)
{
bool result = false;
int range = 0;
dchar lastc;
foreach (size_t i, dchar p; pattern)
{
if (p == '^' && i == 0)
{
result = true;
if (i + 1 == pattern.length)
return (c == p); // or should this be an error?
}
else if (range)
{
range = 0;
if (lastc <= c && c <= p || c == p)
return !result;
}
else if (p == '-' && i > result && i + 1 < pattern.length)
{
range = 1;
continue;
}
else if (c == p)
return !result;
lastc = p;
}
return result;
}
@safe pure @nogc unittest
{
assertCTFEable!(
{
assert(inPattern('x', "x") == 1);
assert(inPattern('x', "y") == 0);
assert(inPattern('x', string.init) == 0);
assert(inPattern('x', "^y") == 1);
assert(inPattern('x', "yxxy") == 1);
assert(inPattern('x', "^yxxy") == 0);
assert(inPattern('x', "^abcd") == 1);
assert(inPattern('^', "^^") == 0);
assert(inPattern('^', "^") == 1);
assert(inPattern('^', "a^") == 1);
assert(inPattern('x', "a-z") == 1);
assert(inPattern('x', "A-Z") == 0);
assert(inPattern('x', "^a-z") == 0);
assert(inPattern('x', "^A-Z") == 1);
assert(inPattern('-', "a-") == 1);
assert(inPattern('-', "^A-") == 0);
assert(inPattern('a', "z-a") == 1);
assert(inPattern('z', "z-a") == 1);
assert(inPattern('x', "z-a") == 0);
});
}
/**
* See if character c is in the intersection of the patterns.
*/
bool inPattern(S)(dchar c, S[] patterns) @safe pure @nogc
if (isSomeString!S)
{
foreach (string pattern; patterns)
{
if (!inPattern(c, pattern))
{
return false;
}
}
return true;
}
/**
* Count characters in s that match pattern.
*/
size_t countchars(S, S1)(S s, in S1 pattern) @safe pure @nogc
if (isSomeString!S && isSomeString!S1)
{
size_t count;
foreach (dchar c; s)
{
count += inPattern(c, pattern);
}
return count;
}
@safe pure @nogc unittest
{
assertCTFEable!(
{
assert(countchars("abc", "a-c") == 3);
assert(countchars("hello world", "or") == 3);
});
}
/**
* Return string that is s with all characters removed that match pattern.
*/
S removechars(S)(S s, in S pattern) @safe pure
if (isSomeString!S)
{
import std.utf : encode;
Unqual!(typeof(s[0]))[] r;
bool changed = false;
foreach (size_t i, dchar c; s)
{
if (inPattern(c, pattern))
{
if (!changed)
{
changed = true;
r = s[0 .. i].dup;
}
continue;
}
if (changed)
{
encode(r, c);
}
}
if (changed)
return r;
else
return s;
}
@safe pure unittest
{
assertCTFEable!(
{
assert(removechars("abc", "a-c").length == 0);
assert(removechars("hello world", "or") == "hell wld");
assert(removechars("hello world", "d") == "hello worl");
assert(removechars("hah", "h") == "a");
});
}
@safe pure unittest
{
assert(removechars("abc", "x") == "abc");
}
/***************************************************
* Return string where sequences of a character in s[] from pattern[]
* are replaced with a single instance of that character.
* If pattern is null, it defaults to all characters.
*/
S squeeze(S)(S s, in S pattern = null)
{
import std.utf : encode, stride;
Unqual!(typeof(s[0]))[] r;
dchar lastc;
size_t lasti;
int run;
bool changed;
foreach (size_t i, dchar c; s)
{
if (run && lastc == c)
{
changed = true;
}
else if (pattern is null || inPattern(c, pattern))
{
run = 1;
if (changed)
{
if (r is null)
r = s[0 .. lasti].dup;
encode(r, c);
}
else
lasti = i + stride(s, i);
lastc = c;
}
else
{
run = 0;
if (changed)
{
if (r is null)
r = s[0 .. lasti].dup;
encode(r, c);
}
}
}
return changed ? ((r is null) ? s[0 .. lasti] : cast(S) r) : s;
}
@system pure unittest
{
assertCTFEable!(
{
string s;
assert(squeeze("hello") == "helo");
s = "abcd";
assert(squeeze(s) is s);
s = "xyzz";
assert(squeeze(s).ptr == s.ptr); // should just be a slice
assert(squeeze("hello goodbyee", "oe") == "hello godbye");
});
}
/***************************************************************
Finds the position $(D_PARAM pos) of the first character in $(D_PARAM
s) that does not match $(D_PARAM pattern) (in the terminology used by
$(REF inPattern, std,string)). Updates $(D_PARAM s =
s[pos..$]). Returns the slice from the beginning of the original
(before update) string up to, and excluding, $(D_PARAM pos).
The $(D_PARAM munch) function is mostly convenient for skipping
certain category of characters (e.g. whitespace) when parsing
strings. (In such cases, the return value is not used.)
*/
S1 munch(S1, S2)(ref S1 s, S2 pattern) @safe pure @nogc
{
size_t j = s.length;
foreach (i, dchar c; s)
{
if (!inPattern(c, pattern))
{
j = i;
break;
}
}
scope(exit) s = s[j .. $];
return s[0 .. j];
}
///
@safe pure @nogc unittest
{
string s = "123abc";
string t = munch(s, "0123456789");
assert(t == "123" && s == "abc");
t = munch(s, "0123456789");
assert(t == "" && s == "abc");
}
@safe pure @nogc unittest
{
string s = "123€abc";
string t = munch(s, "0123456789");
assert(t == "123" && s == "€abc");
t = munch(s, "0123456789");
assert(t == "" && s == "€abc");
t = munch(s, "£$€¥");
assert(t == "€" && s == "abc");
}
// helper function for unit tests
private @property void assertCTFEable(alias dg)()
{
static assert({ cast(void) dg(); return true; }());
cast(void) dg();
} undeaD-1.0.10/src/undead/utf.d 0000664 0000000 0000000 00000001117 13463741136 0015773 0 ustar 00root root 0000000 0000000 /**
* Contains the obsolete functions from Phobos' `std.utf`.
*/
module undead.utf;
import std.utf;
import std.typecons;
//deprecated("Removed October 2017. Please use std.utf.encode instead.")
char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
{
const sz = encode!(Yes.useReplacementDchar)(buf, c);
return buf[0 .. sz];
}
//deprecated("Removed October 2017. Please use std.utf.encode instead.")
wchar[] toUTF16(return ref wchar[2] buf, dchar c) nothrow @nogc @safe pure
{
const sz = encode!(Yes.useReplacementDchar)(buf, c);
return buf[0 .. sz];
}
undeaD-1.0.10/win32.mak 0000664 0000000 0000000 00000001637 13463741136 0014444 0 ustar 00root root 0000000 0000000 #_ win32.mak
# Build win32 version of undead
# Needs Digital Mars D compiler to build, available free from:
# http://www.digitalmars.com/d/
DMD=dmd
DEL=del
S=src\undead
O=obj
B=bin
TARGET=undead
DFLAGS=-g -Isrc/
LFLAGS=-L/map/co
#DFLAGS=
#LFLAGS=
.d.obj :
$(DMD) -c $(DFLAGS) $*
SRC= $S\bitarray.d $S\regexp.d $S\datebase.d $S\date.d $S\dateparse.d \
$S\cstream.d $S\stream.d $S\socketstream.d $S\doformat.d $S/string.d \
$S\internal\file.d
SOURCE= $(SRC) win32.mak win64.mak posix.mak LICENSE README.md dub.json
all: $B\$(TARGET).lib
#################################################
$B\$(TARGET).lib : $(SRC)
$(DMD) -lib -of$B\$(TARGET).lib $(SRC) $(DFLAGS)
unittest :
$(DMD) -unittest -main -cov -of$O\unittest.exe $(SRC) $(DFLAGS)
$O\unittest.exe
clean:
$(DEL) $O\unittest.exe *.lst
tolf:
tolf $(SOURCE)
detab:
detab $(SRC)
zip: detab tolf $(SOURCE)
$(DEL) undead.zip
zip32 undead $(SOURCE)
undeaD-1.0.10/win64.mak 0000664 0000000 0000000 00000001644 13463741136 0014447 0 ustar 00root root 0000000 0000000 #_ win32.mak
# Build win32 version of undead
# Needs Digital Mars D compiler to build, available free from:
# http://www.digitalmars.com/d/
DMD=dmd
DEL=del
S=src\undead
O=obj
B=bin
TARGET=undead
DFLAGS=-m64 -g -Isrc/
LFLAGS=-L/map/co
#DFLAGS=
#LFLAGS=
.d.obj :
$(DMD) -c $(DFLAGS) $*
SRC= $S\bitarray.d $S\regexp.d $S\datebase.d $S\date.d $S\dateparse.d \
$S\cstream.d $S\stream.d $S\socketstream.d $S\doformat.d $S/string.d \
$S\internal\file.d
SOURCE= $(SRC) win32.mak win64.mak posix.mak LICENSE README.md dub.json
all: $B\$(TARGET).lib
#################################################
$B\$(TARGET).lib : $(SRC)
$(DMD) -lib -of$B\$(TARGET).lib $(SRC) $(DFLAGS)
unittest :
$(DMD) -unittest -main -cov -of$O\unittest.exe $(SRC) $(DFLAGS)
$O\unittest.exe
clean:
$(DEL) $O\unittest.exe *.lst
tolf:
tolf $(SOURCE)
detab:
detab $(SRC)
zip: detab tolf $(SOURCE)
$(DEL) undead.zip
zip32 undead $(SOURCE)