agrep-4.17/0040755001123100001460000000000007742744447010745 5ustar friurzagrep-4.17/COPYRIGHT0100644001123100001460000000173007010116362012207 0ustar friurzThis material was developed by Sun Wu, Udi Manber and Burra Gopal at the University of Arizona, Department of Computer Science. Permission is granted to copy this software, to redistribute it on a nonprofit basis, and to use it for any purpose, subject to the following restrictions and understandings. 1. Any copy made of this software must include this copyright notice in full. 2. All materials developed as a consequence of the use of this software shall duly acknowledge such use, in accordance with the usual standards of acknowledging credit in academic research. 3. The authors have made no warranty or representation that the operation of this software will be error-free or suitable for any application, and they are under under no obligation to provide any services, by way of maintenance, update, or otherwise. The software is an experimental prototype offered on an as-is basis. 4. Redistribution for profit requires the express, written permission of the authors. agrep-4.17/Makefile.NeXT0100644001123100001460000001061107010116362013167 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On IRIX, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = /bin/ar RANLIB = /bin/ranlib # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 0 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = gcc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.NeXT CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.alpha0100644001123100001460000001073407010116362013444 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = ar #/usr/ccs/bin/ar #for Solaris RANLIB = ranlib #true #for Solaris # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 1 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = cc #gcc -traditional #cc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O -Olimit 3000 #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.alpha CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.hp0100644001123100001460000001063607010116362012767 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = ar #/usr/ccs/bin/ar #for Solaris RANLIB = : # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 0 HAVE_SYS_DIR_H = 1 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = cc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.hp CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.in0100644001123100001460000000647307302117062012774 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. srcdir = @srcdir@ VPATH = @srcdir@ SHELL = /bin/sh CC = @CC@ AR = @AR@ RANLIB = @RANLIB@ CP = @CP@ STRIP = @STRIP@ INSTALL = @INSTALL@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_MAN = ${INSTALL} -m 444 DEFS = prefix = @prefix@ exec_prefix = @exec_prefix@ binprefix = manprefix = bindir = $(exec_prefix)/bin libdir = $(exec_prefix)/lib mandir = $(prefix)/man/man1 manext = 1 MAN1 = agrep.1 # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 OPTIMIZEFLAGS = -O2 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include # AGREP_POINTER is defined in autoconf.h MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(DEFS) LDFLAGS = OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) LIB = $(LIBDIR)/lib$(PROG).a all: $(LIB) $(NOTCPROG) install: all install-man $(INSTALL) $(PROG) $(bindir) install-man: $(MAN1) $(INSTALL_MAN) $(MAN1) $(mandir) clean: rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) distclean: clean rm -f Makefile HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LDFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) $(LIB): $(OBJS) dummyfilters.o $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LDFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.rs60000100644001123100001460000001062707010116362013312 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On IRIX, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = /usr/bin/ar RANLIB = true #for IRIX # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 1 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = cc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.rs6000 CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.sgi0100644001123100001460000001062407010116362013137 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On IRIX, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = /usr/bin/ar RANLIB = true #for IRIX # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 1 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = cc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.sgi CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.solaris0100644001123100001460000001070107010116362014025 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = /usr/ccs/bin/ar #for Solaris RANLIB = true #for Solaris # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 1 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = gcc -traditional #cc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.solaris CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile.sunos0100644001123100001460000001067207010116362013527 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = ar #/usr/ccs/bin/ar #for Solaris RANLIB = ranlib #true #for Solaris # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). HAVE_DIRENT_H = 1 HAVE_SYS_DIR_H = 0 HAVE_SYS_NDIR_H = 0 HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 0 # You might have to change this depending on your machine configuration. CC = gcc SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = ../lib TCOMP = cast TCOMPDIR = ../compress AGREPDIR = ../agrep TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -I$(TEMPLATEDIR)/include DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.sunos CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/README0100644001123100001460000001403307010116362011574 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ This is version 2.04 of agrep - a new tool for fast text searching allowing errors. agrep is similar to egrep (or grep or fgrep), but it is much more general (and usually faster). The main changes from version 1.1 are 1) incorporating Boyer-Moore type filtering to speed up search considerably, 2) allowing multi patterns via the -f option; this is similar to fgrep, but from our experience agrep is much faster, 3) searching for "best match" without having to specify the number of errors allowed, and 4) ascii is no longer required. Several more options were added. To compile, simply run make in the agrep directory after untar'ing the tar file (tar -xf agrep-2.04.tar will do it). The three most significant features of agrep that are not supported by the grep family are 1) the ability to search for approximate patterns; for example, "agrep -2 homogenos foo" will find homogeneous as well as any other word that can be obtained from homogenos with at most 2 substitutions, insertions, or deletions. "agrep -B homogenos foo" will generate a message of the form best match has 2 errors, there are 5 matches, output them? (y/n) 2) agrep is record oriented rather than just line oriented; a record is by default a line, but it can be user defined; for example, "agrep -d '^From ' 'pizza' mbox" outputs all mail messages that contain the keyword "pizza". Another example: "agrep -d '$$' pattern foo" will output all paragraphs (separated by an empty line) that contain pattern. 3) multiple patterns with AND (or OR) logic queries. For example, "agrep -d '^From ' 'burger,pizza' mbox" outputs all mail messages containing at least one of the two keywords (, stands for OR). "agrep -d '^From ' 'good;pizza' mbox" outputs all mail messages containing both keywords. Putting these options together one can ask queries like agrep -d '$$' -2 ';TheAuthor;Curriculum;<198[5-9]>' bib which outputs all paragraphs referencing articles in CACM between 1985 and 1989 by TheAuthor dealing with curriculum. Two errors are allowed, but they cannot be in either CACM or the year (the <> brackets forbid errors in the pattern between them). Other features include searching for regular expressions (with or without errors), unlimited wild cards, limiting the errors to only insertions or only substitutions or any combination, allowing each deletion, for example, to be counted as, say, 2 substitutions or 3 insertions, restricting parts of the query to be exact and parts to be approximate, and many more. agrep is available by anonymous ftp from cs.arizona.edu (IP 192.12.69.5) as agrep/agrep-2.04.tar.Z (or in uncompressed form as agrep/agrep-2.04.tar). The tar file contains the source code (in C), man pages (agrep.1), and two additional files, agrep.algorithms and agrep.chronicle, giving more information. The agrep directory also includes two postscript files: agrep.ps.1 is a technical report from June 1991 describing the design and implementation of agrep; agrep.ps.2 is a copy of the paper as appeared in the 1992 Winter USENIX conference. Please mail bug reports (or any other comments) to sw@cs.arizona.edu or to udi@cs.arizona.edu. We would appreciate if users notify us (at the address above) of any extensions, improvements, or interesting uses of this software. January 17, 1992 BUGS_fixed/option_update 1. remove multiple definitions of some global variables. 2. fix a bug in -G option. 3. fix a bug in -w option. January 23, 1992 4. fix a bug in pipeline input. 5. make the definition of word-delimiter consistant. March 16, 1992 6. add option '-y' which, if specified with -B option, will always output the best-matches without a prompt. April 10, 1992 7. fix a bug regarding exit status. April 15, 1992 ------------------------------------------------------------------------------- REVISIONS TO AGREP, FALL '93 8. Options can now be specified in a single group of characters after one '-'. - Sept 3rd 1993 9. Made agrep callable as a library routine from a separate function. The interface is: memagrep(argc, argv, searchbufferlen, searchbuffer), the pattern to be searched for and the options being specified EXACTLY as if they are being specified on the command line. The only difference is that instead of the file-names to look at, the user should specify a buffer and its length. Sample user programs are in ../user directory. In memagrep(), there are TWO peculiarities: 1. Peculiarity #1 -- at the end of the buffer, the user must have N bytes of valid virtual memory, where N is the length of the pattern to be searched. This space is used by agrep to speed up the checking of the termination condition. Its contents are restored before memagrep() returns -- however, some space must be there... else you'll get SIGSEGV. I might trap segv and do a longjmp, but that'll be in a new version! 2. The search buffer must begin with a newline so that it is easy for agrep to output matched lines. This also avoids some copying. Ofcourse, if we copied the user's search buffer into another buffer which meets both the above conditions, memagrep() will no longer be fast -- and speed is the primary goal. - Sept 27th 1993 10. Added some filter-programs to make agrep search thru compressed files. Also added some features in the Makefile which allows the user to build an agrep with a dummyfilter so that agrep remains independent of tcompress. The definitions needed in agrep to interface with tcompress are in defs.h - Nov 10th 1993 11. Added a library interface for searching thru a specified set of files, fileagrep(), which is similar to memagrep(). This is used by glimpse. Had to modify some other things and fix some bugs (see CHANGES). - Dec 1993 (coding), Jan 1993 (debugging). ------------------------------------------------------------------------------- CODING NOTE: sgrep.c and newmgrep.c use a similar while(fill_buf) loop with start and end, while others use loops with an internal variable i. agrep-4.17/agrep.10100644001123100001460000003107107010116362012075 0ustar friurz.TH AGREP l "Jan 17, 1992" .SH NAME agrep \- search a file for a string or regular expression, with approximate matching capabilities .SH SYNOPSIS .B agrep [ .B \-#cdehiklnpstvwxBDGIS ] .I pattern [ -f .I patternfile ] [ .IR filename ".\|.\|. ]" .SH DESCRIPTION .B agrep searches the input .IR filenames (standard input is the default, but see a warning under LIMITATIONS) for records containing strings which either \fIexactly\fP or \fIapproximately\fP match a pattern. A record is by default a line, but it can be defined differently using the -d option (see below). Normally, each record found is copied to the standard output. Approximate matching allows finding records that contain the pattern with several errors including substitutions, insertions, and deletions. For example, Massechusets matches Massachusetts with two errors (one substitution and one insertion). Running .B agrep -2 Massechusets foo outputs all lines in foo containing any string with at most 2 errors from Massechusets. .LP .B agrep supports many kinds of queries including arbitrary wild cards, sets of patterns, and in general, regular expressions. See PATTERNS below. It supports most of the options supported by the .B grep family plus several more (but it is not 100% compatible with grep). For more information on the algorithms used by agrep see Wu and Manber, "Fast Text Searching With Errors," Technical report #91-11, Department of Computer Science, University of Arizona, June 1991 (available by anonymous ftp from cs.arizona.edu in agrep/agrep.ps.1), and Wu and Manber, "Agrep -- A Fast Approximate Pattern Searching Tool", To appear in USENIX Conference 1992 January (available by anonymous ftp from cs.arizona.edu in agrep/agrep.ps.2). .LP As with the rest of the \fBgrep\fP family, the characters .RB ` $ ', .RB `^ ', .RB ` \(** ', .RB ` [ ' , .RB ` ] ' , .RB ` \s+2^\s0 ', .RB ` | ', .RB ` ( ', .RB ` ) ', .RB ` ! ', and .RB ` \e ' can cause unexpected results when included in the .IR pattern , as these characters are also meaningful to the shell. To avoid these problems, one should always enclose the entire pattern argument in single quotes, i.e., 'pattern'. Do not use double quotes ("). .LP When .B agrep is applied to more than one input file, the name of the file is displayed preceding each line which matches the pattern. The filename is not displayed when processing a single file, so if you actually want the filename to appear, use .B /dev/null as a second file in the list. .SH OPTIONS .TP .B \-\fI#\fP \fI#\fP is a non-negative integer (at most 8) specifying the maximum number of errors permitted in finding the approximate matches (defaults to zero). Generally, each insertion, deletion, or substitution counts as one error. It is possible to adjust the relative cost of insertions, deletions and substitutions (see -I -D and -S options). .TP .B \-c Display only the count of matching records. .TP .B \-d "'\fIdelim\fP'" Define \fIdelim\fP to be the separator between two records. The default value is '$', namely a record is by default a line. \fIdelim\fP can be a string of size at most 8 (with possible use of ^ and $), but not a regular expression. Text between two \fIdelim\fP's, before the first \fIdelim\fP, and after the last \fIdelim\fP is considered as one record. For example, -d '$$' defines paragraphs as records and -d '^From\ ' defines mail messages as records. .B agrep matches each record separately. This option does not currently work with regular expressions. .TP .BI \-e " pattern" Same as a simple .I pattern argument, but useful when the .I pattern begins with a .RB ` \- '. .TP .BI \-f " patternfile" .I patternfile contains a set of (simple) patterns. The output is all lines that match at least one of the patterns in .I patternfile. Currently, the \-f option works only for exact match and for simple patterns (any meta symbol is interpreted as a regular character); it is compatible only with \-c, \-h, \-i, \-l, \-s, \-v, \-w, and \-x options. see LIMITATIONS for size bounds. .TP .B \-h Do not display filenames. .TP .B \-i Case-insensitive search \(em e.g., "A" and "a" are considered equivalent. .TP .B \-k No symbol in the pattern is treated as a meta character. For example, agrep -k 'a(b|c)*d' foo will find the occurrences of a(b|c)*d in foo whereas agrep 'a(b|c)*d' foo will find substrings in foo that match the regular expression 'a(b|c)*d'. .TP .B \-l List only the files that contain a match. This option is useful for looking for files containing a certain pattern. For example, " agrep -l 'wonderful' * " will list the names of those files in current directory that contain the word 'wonderful'. .TP .B \-n Each line that is printed is prefixed by its record number in the file. .TP .B \-p Find records in the text that contain a supersequence of the pattern. For example, \fB agrep \-p DCS foo will match "Department of Computer Science." .TP .B \-s Work silently, that is, display nothing except error messages. This is useful for checking the error status. .TP .B \-t Output the record starting from the end of .I delim to (and including) the next .I delim. This is useful for cases where .I delim should come at the end of the record. .TP .B \-v Inverse mode \(em display only those records that .I do not contain the pattern. .TP .B \-w Search for the pattern as a word \(em i.e., surrounded by non-alphanumeric characters. The non-alphanumeric .B must surround the match; they cannot be counted as errors. For example, .B agrep -w -1 car will match cars, but not characters. .TP .B \-x The pattern must match the whole line. .TP .B \-y Used with \-B option. When \-y is on, agrep will always output the best matches without giving a prompt. .TP .B \-B Best match mode. When \-B is specified and no exact matches are found, agrep will continue to search until the closest matches (i.e., the ones with minimum number of errors) are found, at which point the following message will be shown: "the best match contains x errors, there are y matches, output them? (y/n)" The best match mode is not supported for standard input, e.g., pipeline input. When the \-#, \-c, or \-l options are specified, the \-B option is ignored. In general, \-B may be slower than \-#, but not by very much. .TP .B \-D\fIk\fP Set the cost of a deletion to \fIk\fP (\fIk\fP is a positive integer). This option does not currently work with regular expressions. .TP .B \-G Output the files that contain a match. .TP .B \-I\fIk\fP Set the cost of an insertion to \fIk\fP (\fIk\fP is a positive integer). This option does not currently work with regular expressions. .TP .B \-S\fIk\fP Set the cost of a substitution to \fIk\fP (\fIk\fP is a positive integer). This option does not currently work with regular expressions. .ne 4 .SH PATTERNS .LP \fIagrep\fP supports a large variety of patterns, including simple strings, strings with classes of characters, sets of strings, wild cards, and regular expressions. .TP \fBStrings\fP any sequence of characters, including the special symbols `^' for beginning of line and `$' for end of line. The special characters listed above ( .RB ` $ ', .RB `^ ', .RB ` \(** ', .RB ` [ ' , .RB ` \s+2^\s0 ', .RB ` | ', .RB ` ( ', .RB ` ) ', .RB ` ! ', and .RB ` \e ' ) should be preceded by `\\' if they are to be matched as regular characters. For example, \\^abc\\\\ corresponds to the string ^abc\\, whereas ^abc corresponds to the string abc at the beginning of a line. .TP \fBClasses of characters\fP a list of characters inside [] (in order) corresponds to any character from the list. For example, [a-ho-z] is any character between a and h or between o and z. The symbol `^' inside [] complements the list. For example, [^i-n] denote any character in the character set except character 'i' to 'n'. The symbol `^' thus has two meanings, but this is consistent with egrep. The symbol `.' (don't care) stands for any symbol (except for the newline symbol). .TP \fBBoolean operations\fP .B agrep supports an `and' operation `;' and an `or' operation `,', but not a combination of both. For example, 'fast;network' searches for all records containing both words. .TP \fBWild cards\fP The symbol '#' is used to denote a wild card. # matches zero or any number of arbitrary characters. For example, ex#e matches example. The symbol # is equivalent to .* in egrep. In fact, .* will work too, because it is a valid regular expression (see below), but unless this is part of an actual regular expression, # will work faster. .TP \fBCombination of exact and approximate matching\fP any pattern inside angle brackets <> must match the text exactly even if the match is with errors. For example, ics matches mathematical with one error (replacing the last s with an a), but mathe does not match mathematical no matter how many errors we allow. .TP \fBRegular expressions\fP The syntax of regular expressions in \fBagrep\fP is in general the same as that for \fBegrep\fP. The union operation `|', Kleene closure `*', and parentheses () are all supported. Currently '+' is not supported. Regular expressions are currently limited to approximately 30 characters (generally excluding meta characters). Some options (\-d, \-w, \-f, \-t, \-x, \-D, \-I, \-S) do not currently work with regular expressions. The maximal number of errors for regular expressions that use '*' or '|' is 4. .SH EXAMPLES .LP .TP agrep -2 -c ABCDEFG foo gives the number of lines in file foo that contain ABCDEFG within two errors. .TP agrep -1 -D2 -S2 'ABCD#YZ' foo outputs the lines containing ABCD followed, within arbitrary distance, by YZ, with up to one additional insertion (-D2 and -S2 make deletions and substitutions too "expensive"). .TP agrep -5 -p abcdefghij /usr/dict/words outputs the list of all words containing at least 5 of the first 10 letters of the alphabet \fIin order\fR. (Try it: any list starting with academia and ending with sacrilegious must mean something!) .TP agrep -1 'abc[0-9](de|fg)*[x-z]' foo outputs the lines containing, within up to one error, the string that starts with abc followed by one digit, followed by zero or more repetitions of either de or fg, followed by either x, y, or z. .TP agrep -d '^From\ ' 'breakdown;internet' mbox outputs all mail messages (the pattern '^From\ ' separates mail messages in a mail file) that contain keywords 'breakdown' and 'internet'. .TP agrep -d '$$' -1 ' ' foo finds all paragraphs that contain word1 followed by word2 with one error in place of the blank. In particular, if word1 is the last word in a line and word2 is the first word in the next line, then the space will be substituted by a newline symbol and it will match. Thus, this is a way to overcome separation by a newline. Note that -d '$$' (or another delim which spans more than one line) is necessary, because otherwise agrep searches only one line at a time. .TP agrep '^agrep' outputs all the examples of the use of agrep in this man pages. .PD .SH "SEE ALSO" .BR ed (1), .BR ex (1), .BR grep (1V), .BR sh (1), .BR csh (1). .SH BUGS/LIMITATIONS Any bug reports or comments will be appreciated! Please mail them to sw@cs.arizona.edu or udi@cs.arizona.edu .LP Regular expressions do not support the '+' operator (match 1 or more instances of the preceding token). These can be searched for by using this syntax in the pattern: .sp .in 1.0i \&'\fIpattern\fB(\fIpattern\fB)*\fR' .in .sp (search for strings containing one instance of the pattern, followed by 0 or more instances of the pattern). .LP The following can cause an infinite loop: .B agrep pattern * > output_file. If the number of matches is high, they may be deposited in output_file before it is completely read leading to more matches of the pattern within output_file (the matches are against the whole directory). It's not clear whether this is a "bug" (grep will do the same), but be warned. .LP The maximum size of the .I patternfile is limited to be 250Kb, and the maximum number of patterns is limited to be 30,000. .LP Standard input is the default if no input file is given. However, if standard input is keyed in directly (as opposed to through a pipe, for example) agrep may not work for some non-simple patterns. .LP There is no size limit for simple patterns. More complicated patterns are currently limited to approximately 30 characters. Lines are limited to 1024 characters. Records are limited to 48K, and may be truncated if they are larger than that. The limit of record length can be changed by modifying the parameter Max_record in agrep.h. .SH DIAGNOSTICS Exit status is 0 if any matches are found, 1 if none, 2 for syntax errors or inaccessible files. .SH AUTHORS Sun Wu and Udi Manber, Department of Computer Science, University of Arizona, Tucson, AZ 85721. {sw|udi}@cs.arizona.edu. agrep-4.17/agrep.algorithms0100644001123100001460000000511007010116362014101 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ The implementation of agrep includes the following algorithms. Except for exact matching of simple patterns, for which we use a simple variation of the Boyer-Moore algorithm, all the algorithms (listed below) were designed by Sun Wu and Udi Manber. 1. bitap: The most general algorithm inside agrep. It supports many extensions such as approximate regular expression pattern matching, non-uniform costs, simultaneous matching of multiple patterns, mixed exact/approximate matching, etc. The algorithm is described in agrep.ps.1. 2. mgrep: A sub-linear expect-time algorithm for matching a set of patterns. It assumes that the set of patterns contains k patterns, and that the shortest pattern is of size m. See agrep.ps.2 for a brief description of the algorithm. 3. amonkey: a Boyer-Moore style algorithm for approximate pattern matching. let b = log_c (2*m), where c is the size of alphabet set. In the preprocessing, a table is built to determine whether a given substring of size b is in the pattern. Suppose we are looking for matches with at most k errors. The search is done in two passes. In the first pass (the filtering pass), the areas in the text that have a possibility to contain the matches are marked. The second pass finds the matches in those marked areas. The search in the first pass is done in the following way. Suppose the end position of the pattern is currently aligned with position tx in the text. The algorithm scans backward from tx until either (k+1) blocks that do not occur in the pattern have been scanned, or the scan has passed position (tx-m+k). In the former case, pattern is shifted forward to align the beginning position of the pattern with one character after the position in the text where the scan was stopped. In the latter case, we marked tx-m to tx+m as a candidate area. 4. mmonkey: Combining the mgrep algorithm with a partition technique, we have an algorithm with the same time complexity as amonkey. For ASCII text and pattern, this algorithm is faster than amonkey. The principle of the partition technique is as follows. Let A and B be two strings of size m. If we partition A into (k+1) blocks, then the distance between A and B is > k if none of the blocks of A occur in B. This implies that to match A with no more than k errors, B has to contain a substring that matches exactly one block of A. A brief description can be found in agrep.ps.2. agrep-4.17/agrep.c0100644001123100001460000036036407614570046012206 0ustar friurz /* * bgopal: (1993-4) added a library interface and removed some bugs: also * selectively modified many routines to work with our text-compression algo. */ #include #include "agrep.h" #include "checkfile.h" #include #define PRINT(s) extern char **environ; extern int errno; int pattern_index; /* index in argv where the pattern is */ int glimpse_isserver=0; /* so that there is no user interaction */ int glimpse_call = 0; /* So that usage message is not printed twice */ int glimpse_clientdied=0;/* to quit search if glimpseserver's client dies */ int agrep_initialfd; /* Where does input come from? File/Memory? */ CHAR *agrep_inbuffer; int agrep_inlen; int agrep_inpointer; FILE *agrep_finalfp; /* Where does output go to? File/Memory? */ CHAR *agrep_outbuffer; int agrep_outlen; int agrep_outpointer; int execfd; /* used by exec called within agrep_search, set in agrep_init */ int multifd = -1; /* fd for multipattern search used in ^^ , set in ^^^^^^^^ */ extern char *pat_spool; #if DOTCOMPRESSED extern char *tc_pat_spool; #endif /* DOTCOMPRESSED */ char *multibuf=NULL; /* buffer to put the multiple patterns in */ int multilen = 0; /* length of the multibuf: not the #of multi-patterns! */ extern int pos_cnt; /* to re-initialize it to 0 for reg-exp search */ unsigned Mask[MAXSYM]; unsigned Init1, NO_ERR_MASK, Init[MaxError]; unsigned Bit[WORD+1]; CHAR buffer[BlockSize+Maxline+1]; /* should not be used anywhere: 10/18/93 */ unsigned Next[MaxNext], Next1[MaxNext]; unsigned wildmask, endposition, D_endpos; int LIMITOUTPUT; /* maximum number of matches we are going to allow */ int LIMITPERFILE; /* maximum number of matches per file we are going to allow */ int LIMITTOTALFILE; /* maximum number of files we are going to allow */ int EXITONERROR; /* return -1 or exit on error? */ int REGEX, FASTREGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN; int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP; int NOOUTPUTZERO; int Num_Pat, PSIZE, prev_num_of_matched, num_of_matched, files_matched, SILENT, NOPROMPT, BESTMATCH, NOUPPER; int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT; int WORDBOUND, DELIMITER, D_length, tc_D_length, original_D_length; int EATFIRST, OUTTAIL; int BYTECOUNT; int PRINTOFFSET; int PRINTRECORD; int PRINTNONEXISTENTFILE; int FILEOUT; int DNA; int APPROX; int PAT_FILE; /* multiple patterns from a given file */ char PAT_FILE_NAME[MAX_LINE_LEN]; int PAT_BUFFER; /* multiple patterns from a given buffer */ int CONSTANT; int RECURSIVE; int total_line; /* used in mgrep */ int D; int M; int TCOMPRESSED; int EASYSEARCH; /* 1 used only for compressed files: LITTLE/BIG */ int ALWAYSFILENAME = OFF; int POST_FILTER = OFF; int NEW_FILE = OFF; /* only when post-filter is used */ int PRINTFILENUMBER = OFF; int PRINTFILETIME = OFF; int PRINTPATTERN = OFF; int MULTI_OUTPUT = OFF; /* should mgrep print the matched line multiple times for each matched pattern or just once? */ /* invisible to the user, used only by glimpse: cannot use -l since it is incompatible with stdin and -A is used for the index search (done next) */ /* Stuff to handle complicated boolean patterns */ int AComplexBoolean = 0; ParseTree *AParse = NULL; int anum_terminals = 0; ParseTree aterminals[MAXNUM_PAT]; char amatched_terminals[MAXNUM_PAT]; char aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */ char tc_aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */ #if MEASURE_TIMES /* timing variables */ int OUTFILTER_ms; int FILTERALGO_ms; int INFILTER_ms; #endif /*MEASURE_TIMES*/ CHAR **Textfiles = NULL; /* array of filenames to be searched */ int Numfiles = 0; /* indicates how many files in Textfiles */ int copied_from_argv = 0; /* were filenames copied from argv (should I free 'em)? */ CHAR old_D_pat[MaxDelimit * 2] = "\n"; /* to hold original D_pattern */ CHAR original_old_D_pat[MaxDelimit * 2] = "\n"; CHAR Pattern[MAXPAT], OldPattern[MAXPAT]; CHAR CurrentFileName[MAX_LINE_LEN]; long CurrentFileTime; int SetCurrentFileName = 0; /* dirty glimpse trick to make filters work: output seems to come from another file */ int SetCurrentFileTime = 0; /* dirty glimpse trick to avoid doing a stat to find the time */ int CurrentByteOffset; int SetCurrentByteOffset = 0; CHAR Progname[MAXNAME]; CHAR D_pattern[MaxDelimit * 2] = "\n; "; /* string which delimits records -- defaults to newline */ CHAR tc_D_pattern[MaxDelimit * 2] = "\n"; CHAR original_D_pattern[MaxDelimit * 2] = "\n; "; char COMP_DIR[MAX_LINE_LEN]; char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; /* interfacing with tcompress */ int NOFILENAME, /* Boolean flag, set for -h option */ FILENAMEONLY;/* Boolean flag, set for -l option */ extern int init(); int table[WORD][WORD]; CHAR *agrep_saved_pattern = NULL; /* to prevent multiple prepfs for each boolean search: crd@hplb.hpl.hp.com */ long aget_file_time(stbuf, name) struct stat *stbuf; char *name; { long ret = 0; struct stat mystbuf; if (stbuf != NULL) ret = stbuf->st_mtime; else { if (my_stat(name, &mystbuf) == -1) ret = 0; else ret = mystbuf.st_mtime; } return ret; } char * aprint_file_time(thetime) time_t thetime; { #if 0 char s[256], s1[16], s2[16], s3[16], s4[16], s5[16]; static char buffer[256]; strcpy(s, ctime(&thetime)); /* of the form: Sun Sep 16 01:03:52 1973\n\0 */ s[strlen(s) - 1] = '\0'; sscanf(s, "%s%s%s%s%s", s1, s2, s3, s4, s5); sprintf(buffer, ": %s %s %s", s2, s3, s5); /* ditch Sun 01:03:52 */ #else static char buffer[256]; buffer[0] = ':'; buffer[1] = ' '; strftime(&buffer[2], 256, "%h %e %Y", gmtime(&thetime)); #endif return &buffer[0]; } /* Called when multipattern search and pattern has not changed */ void reinit_value_partial() { num_of_matched = prev_num_of_matched = 0; errno = 0; FIRST_IN_RE = ON; } /* This must be called before every agrep_search to reset agrep globals */ void reinit_value() { int i, j; /* Added on 7th Oct 194 */ if (AParse) { if (AComplexBoolean) destroy_tree(AParse); AComplexBoolean = 0; AParse = 0; PAT_BUFFER = 0; if (multibuf != NULL) free(multibuf); /* this was allocated for arbit booleans, not multipattern search */ multibuf = NULL; multilen = 0; /* Cannot free multifd here since that is always allocated for multipattern search */ } for (i=0; i 0 ; i--) Bit[i] = Bit[i+1] << 1; for (i=0; i< MAXSYM; i++) Mask[i] = 0; /* bg: new things added on Mar 13 94 */ Init1 = 0; NO_ERR_MASK = 0; memset(Init, '\0', MaxError * sizeof(unsigned)); memset(Next, '\0', MaxNext * sizeof(unsigned)); memset(Next1, '\0', MaxNext * sizeof(unsigned)); wildmask = endposition = D_endpos = 0; for (i=0; i 0 && j < 10) { V[i] = V[i] | Bit[base + table[i][j++]]; } } Bit[base]=temp; if(M <= SHORTREG) { k = exponen(M); pp = 2*k; for(i=k; i>1); for(j=M; j>=1; j--) { if(n & Bit[WORD]) Next[i] = Next[i] | V[j]; n = (n>>1); } } return; } if(M > MAXREG) fprintf(stderr, "%s: regular expression too long\n", Progname); MM = M; if(M & 1) M=M+1; k = exponen(M/2); pp = 2*k; mid = MM/2; for(i=k; i>1); for(j=MM; j>mid ; j--) { if(n & Bit[WORD]) Next[i] = Next[i] | V[j-mid]; n = (n>>1); } n=i-k; Next1[i-k] = 0; for(j = 0; j>1); } } return; } int exponen(m) int m; { int i, ex; ex= 1; for (i=0; i 30) { fprintf(stderr, "%s: regular expression too long\n", Progname); if (!EXITONERROR){ errno = AGREP_ERROR; return -1; } else exit(2); } base = WORD - M; hh = M/2; for(i=WORD, j=0; j < hh ; i--, j++) LL = LL | Bit[i]; if(FIRST_IN_RE) compute_next(M, Next, Next1); /*SUN: try: change to memory allocation */ FIRST_IN_RE = 0; Newline = '\n'; Init[0] = Bit[base]; if(HEAD) Init[0] = Init[0] | Bit[base+1]; for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]>>hh] | Next1[Init[i-1]&LL]; Init1 = Init[0] | 1; Init0 = Init[0]; r2 = r3 = Init[0]; for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; } if ( D == 0 ) { #if AGREP_POINTER if (Text != -1) { #endif /*AGREP_POINTER*/ alloc_buf(Text, &buffer, BlockSize+Maxline+1); while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0) { i=Maxline; end = num_read + Maxline; #if 0 /* pab: Don't do this here; it's done in bitap.fill_buf, * where we can handle eof on a block boundary right */ if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n'; #endif /* 0 */ if(FIRST_LOOP) { /* if first time in the loop add a newline */ buffer[i-1] = '\n'; /* in front the text. */ i--; CurrentByteOffset --; FIRST_LOOP = 0; } /* RE1_PROCESS_WHEN_DZERO: the while-loop below */ while ( i < end ) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r1 = Init1 & r3; r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1; } else { r2 = r3 & Init1; } } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r3; /* match against endofline */ r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1; if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2; /* epsilon move */ if(( r2 & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } r3 = Init0; r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0; /* match begin of line */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r1 = Init1 & r2; r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; } else r3 = r2 & Init1; } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r2; /* match against endofline */ r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3; /* epsilon move */ if(( r3 & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } r2 = Init0; r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0; /* match begin of line */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* while i < end ... */ strncpy(buffer, buffer+num_read, Maxline); } /* end while fill_buf()... */ free_buf(Text, buffer); return 0; #if AGREP_POINTER } else { /* within the memory buffer: assume it starts with a newline at position 0, the actual pattern follows that, and it ends with a '\n' */ num_read = agrep_inlen; buffer = (CHAR *)agrep_inbuffer; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; /* An exact copy of the above RE1_PROCESS_WHEN_DZERO: the while-loop below */ while ( i < end ) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r1 = Init1 & r3; r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1; } else { r2 = r3 & Init1; } } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r3; /* match against endofline */ r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1; if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2; /* epsilon move */ if(( r2 & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } r3 = Init0; r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0; /* match begin of line */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r1 = Init1 & r2; r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; } else r3 = r2 & Init1; } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r2; /* match against endofline */ r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3; /* epsilon move */ if(( r3 & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } r2 = Init0; r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0; /* match begin of line */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* while i < end ... */ return 0; } #endif /*AGREP_POINTER*/ } /* end if (D == 0) */ #if AGREP_POINTER if (Text != -1) { #endif /*AGREP_POINTER*/ while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0) { i=Maxline; end = Maxline + num_read; #if 0 /* pab: Don't do this here; it's done in bitap.fill_buf, * where we can handle eof on a block boundary right */ if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n'; #endif /* 0 */ if(FIRST_TIME) { /* if first time in the loop add a newline */ buffer[i-1] = '\n'; /* in front the text. */ i--; CurrentByteOffset --; FIRST_TIME = 0; } /* RE1_PROCESS_WHEN_DNOTZERO: the while loop below */ while (i < end ) { c = buffer[i]; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r2 = B[0]; r1 = Init1 & r2; A[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; r3 = B[1]; r1 = Init1 & r3; r0 = r2 | A[0]; /* A[0] | B[0] */ A[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | (( r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextcharfile; r2 = B[2]; r1 = Init1 & r2; r0 = r3 | A[1]; A[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextcharfile; r3 = B[3]; r1 = Init1 & r3; r0 = r2 | A[2]; A[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextcharfile; r2 = B[4]; r1 = Init1 & r2; r0 = r3 | A[3]; A[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextcharfile; } /* if(CMask) */ else { r2 = B[0]; A[0] = r2 & Init1; r3 = B[1]; r1 = Init1 & r3; r0 = r2 | A[0]; A[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextcharfile; r2 = B[2]; r1 = Init1 & r2; r0 = r3 | A[1]; A[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextcharfile; r3 = B[3]; r1 = Init1 & r3; r0 = r2 | A[2]; A[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextcharfile; r2 = B[4]; r1 = Init1 & r2; r0 = r3 | A[3]; A[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextcharfile; } } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & B[D]; /* match against endofline */ A[D] = ((Next[B[D]>>hh] | Next1[B[D]&LL]) & CMask) | r1; if(TAIL) A[D] = ( Next[A[D]>>hh] | Next1[A[D]&LL] ) | A[D]; /* epsilon move */ if(( A[D] & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<=D; k++) B[k] = Init[0]; r1 = Init1 & B[0]; A[0] = (( Next[B[0]>>hh] | Next1[B[0]&LL]) & CMask) | r1; for(k=1; k<=D; k++) { r3 = B[k]; r1 = Init1 & r3; r2 = A[k-1] | B[k-1]; A[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((B[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextcharfile: i=i+1; CurrentByteOffset ++; c = buffer[i]; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r2 = A[0]; r1 = Init1 & r2; B[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; r3 = A[1]; r1 = Init1 & r3; r0 = B[0] | r2; B[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL]) & r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1file; r2 = A[2]; r1 = Init1 & r2; r0 = B[1] | r3; B[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1file; r3 = A[3]; r1 = Init1 & r3; r0 = B[2] | r2; B[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1file; r2 = A[4]; r1 = Init1 & r2; r0 = B[3] | r3; B[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextchar1file; } /* if(CMask) */ else { r2 = A[0]; B[0] = r2 & Init1; r3 = A[1]; r1 = Init1 & r3; r0 = B[0] | r2; B[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1file; r2 = A[2]; r1 = Init1 & r2; r0 = B[1] | r3; B[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1file; r3 = A[3]; r1 = Init1 & r3; r0 = B[2] | r2; B[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1file; r2 = A[4]; r1 = Init1 & r2; r0 = B[3] | r3; B[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextchar1file; } } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & A[D]; /* match against endofline */ B[D] = ((Next[A[D]>>hh] | Next1[A[D]&LL]) & CMask) | r1; if(TAIL) B[D] = ( Next[B[D]>>hh] | Next1[B[D]&LL] ) | B[D]; /* epsilon move */ if(( B[D] & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<=D; k++) A[k] = Init0; r1 = Init1 & A[0]; B[0] = ((Next[A[0]>>hh] | Next1[A[0]&LL]) & CMask) | r1; for(k=1; k<=D; k++) { r3 = A[k]; r1 = Init1 & r3; r2 = A[k-1] | B[k-1]; B[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((A[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextchar1file: i=i+1; CurrentByteOffset ++; } /* while i < end */ strncpy(buffer, buffer+num_read, Maxline); } /* while fill_buf... */ free_buf(Text, buffer); return 0; #if AGREP_POINTER } else { /* within the memory buffer: assume it starts with a newline at position 0, the actual pattern follows that, and it ends with a '\n' */ num_read = agrep_inlen; buffer = (CHAR *)agrep_inbuffer; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; /* An exact copy of the above RE1_PROCESS_WHEN_DNOTZERO: the while loop below */ while (i < end ) { c = buffer[i]; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r2 = B[0]; r1 = Init1 & r2; A[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; r3 = B[1]; r1 = Init1 & r3; r0 = r2 | A[0]; /* A[0] | B[0] */ A[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | (( r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextcharmem; r2 = B[2]; r1 = Init1 & r2; r0 = r3 | A[1]; A[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextcharmem; r3 = B[3]; r1 = Init1 & r3; r0 = r2 | A[2]; A[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextcharmem; r2 = B[4]; r1 = Init1 & r2; r0 = r3 | A[3]; A[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextcharmem; } /* if(CMask) */ else { r2 = B[0]; A[0] = r2 & Init1; r3 = B[1]; r1 = Init1 & r3; r0 = r2 | A[0]; A[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextcharmem; r2 = B[2]; r1 = Init1 & r2; r0 = r3 | A[1]; A[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextcharmem; r3 = B[3]; r1 = Init1 & r3; r0 = r2 | A[2]; A[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextcharmem; r2 = B[4]; r1 = Init1 & r2; r0 = r3 | A[3]; A[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextcharmem; } } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & B[D]; /* match against endofline */ A[D] = ((Next[B[D]>>hh] | Next1[B[D]&LL]) & CMask) | r1; if(TAIL) A[D] = ( Next[A[D]>>hh] | Next1[A[D]&LL] ) | A[D]; /* epsilon move */ if(( A[D] & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<=D; k++) B[k] = Init[0]; r1 = Init1 & B[0]; A[0] = (( Next[B[0]>>hh] | Next1[B[0]&LL]) & CMask) | r1; for(k=1; k<=D; k++) { r3 = B[k]; r1 = Init1 & r3; r2 = A[k-1] | B[k-1]; A[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((B[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextcharmem: i=i+1; CurrentByteOffset ++; c = buffer[i]; CMask = Mask[c]; if(c != Newline) { if(CMask != 0) { r2 = A[0]; r1 = Init1 & r2; B[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1; r3 = A[1]; r1 = Init1 & r3; r0 = B[0] | r2; B[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL]) & r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1mem; r2 = A[2]; r1 = Init1 & r2; r0 = B[1] | r3; B[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1mem; r3 = A[3]; r1 = Init1 & r3; r0 = B[2] | r2; B[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1mem; r2 = A[4]; r1 = Init1 & r2; r0 = B[3] | r3; B[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextchar1mem; } /* if(CMask) */ else { r2 = A[0]; B[0] = r2 & Init1; r3 = A[1]; r1 = Init1 & r3; r0 = B[0] | r2; B[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1mem; r2 = A[2]; r1 = Init1 & r2; r0 = B[1] | r3; B[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1mem; r3 = A[3]; r1 = Init1 & r3; r0 = B[2] | r2; B[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1mem; r2 = A[4]; r1 = Init1 & r2; r0 = B[3] | r3; B[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ; if(D == 4) goto Nextchar1mem; } } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & A[D]; /* match against endofline */ B[D] = ((Next[A[D]>>hh] | Next1[A[D]&LL]) & CMask) | r1; if(TAIL) B[D] = ( Next[B[D]>>hh] | Next1[B[D]&LL] ) | B[D]; /* epsilon move */ if(( B[D] & 1 ) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<=D; k++) A[k] = Init0; r1 = Init1 & A[0]; B[0] = ((Next[A[0]>>hh] | Next1[A[0]&LL]) & CMask) | r1; for(k=1; k<=D; k++) { r3 = A[k]; r1 = Init1 & r3; r2 = A[k-1] | B[k-1]; B[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((A[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextchar1mem: i=i+1; CurrentByteOffset ++; } /* while i < end */ return 0; } #endif /*AGREP_POINTER*/ } /* re1 */ int re(Text, M, D) int Text, M, D; { register unsigned i, c, r1, r2, r3, CMask, k, Newline, Init0, Init1, end; register unsigned r_even, r_odd, r_NO_ERR ; unsigned RMask[MAXSYM]; unsigned A[MaxRerror+1], B[MaxRerror+1]; int num_read, j=0, lasti, base, ResidueSize; int FIRST_TIME; /* Flag */ CHAR *buffer; base = WORD - M; k = 2*exponen(M); if(FIRST_IN_RE) { compute_next(M, Next, Next1); FIRST_IN_RE = 0; } for(i=0; i< MAXSYM; i++) RMask[i] = Mask[i]; r_NO_ERR = NO_ERR_MASK; Newline = '\n'; Init0 = Init[0] = Bit[base]; if(HEAD) Init0 = Init[0] = Init0 | Bit[base+1] ; for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]]; /* can be out? */ Init1 = Init0 | 1; r2 = r3 = Init0; for(k=0; k<= D; k++) { A[k] = B[k] = Init[0]; } /* can be out? */ FIRST_TIME = ON; alloc_buf(Text, &buffer, BlockSize+Maxline+1); if ( D == 0 ) { #if AGREP_POINTER if(Text != -1) { #endif /*AGREP_POINTER*/ lasti = Maxline; while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0) { i=Maxline; end = Maxline + num_read ; #if 0 /* pab: Don't do this here; it's done in bitap.fill_buf, * where we can handle eof on a block boundary right */ if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n'; #endif /* 0 */ if(FIRST_TIME) { buffer[i-1] = '\n'; i--; CurrentByteOffset --; FIRST_TIME = 0; } /* RE_PROCESS_WHEN_DZERO: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & r3; r2 = (Next[r3] & CMask) | r1; } else { r1 = Init1 & r3; /* match against '\n' */ r2 = Next[r3] & CMask | r1; j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(TAIL) r2 = Next[r2] | r2 ; /* epsilon move */ if(( r2 & 1) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - 1; r3 = Init0; r2 = (Next[r3] & CMask) | Init0; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & r2; r3 = (Next[r2] & CMask) | r1; } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r2; /* match against endofline */ r3 = Next[r2] & CMask | r1; if(TAIL) r3 = Next[r3] | r3; if(( r3 & 1) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - 1; r2 = Init0; r3 = (Next[r2] & CMask) | Init0; /* match the newline */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* while */ ResidueSize = Maxline + num_read - lasti; if(ResidueSize > Maxline) { ResidueSize = Maxline; } strncpy(buffer+Maxline-ResidueSize, buffer+lasti, ResidueSize); lasti = Maxline - ResidueSize; } /* while fill_buf() */ free_buf(Text, buffer); return 0; #if AGREP_POINTER } else { num_read = agrep_inlen; buffer = (CHAR *)agrep_inbuffer; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; lasti = 1; /* An exact copy of the above RE_PROCESS_WHEN_DZERO: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & r3; r2 = (Next[r3] & CMask) | r1; } else { r1 = Init1 & r3; /* match against '\n' */ r2 = Next[r3] & CMask | r1; j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(TAIL) r2 = Next[r2] | r2 ; /* epsilon move */ if(( r2 & 1) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - 1; r3 = Init0; r2 = (Next[r3] & CMask) | Init0; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & r2; r3 = (Next[r2] & CMask) | r1; } else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & r2; /* match against endofline */ r3 = Next[r2] & CMask | r1; if(TAIL) r3 = Next[r3] | r3; if(( r3 & 1) ^ INVERSE) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - 1; r2 = Init0; r3 = (Next[r2] & CMask) | Init0; /* match the newline */ if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* while */ /* If a residue is left for within-memory-buffer, since nothing can be "read" after that, we can ignore it: as if only 1 iteration of while */ return 0; } #endif /*AGREP_POINTER*/ } /* end if(D==0) */ #if AGREP_POINTER if (Text != -1) { #endif /*AGREP_POINTER*/ while ((num_read = fill_buf(Text, buffer + Maxline, BlockSize)) > 0) { i=Maxline; end = Maxline+num_read; #if 0 /* pab: Don't do this here; it's done in bitap.fill_buf, * where we can handle eof on a block boundary right */ if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end++] = '\n'; #endif /* 0 */ if(FIRST_TIME) { buffer[i-1] = '\n'; i--; CurrentByteOffset --; FIRST_TIME = 0; } /* RE_PROCESS_WHEN_DNOTZERO: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if (c != Newline) { r_even = B[0]; r1 = Init1 & r_even; A[0] = (Next[r_even] & CMask) | r1; r_odd = B[1]; r1 = Init1 & r_odd; r2 = (r_even | Next[r_even|A[0]]) &r_NO_ERR; A[1] = (Next[r_odd] & CMask) | r2 | r1 ; if(D == 1) goto Nextcharfile; r_even = B[2]; r1 = Init1 & r_even; r2 = (r_odd | Next[r_odd|A[1]]) &r_NO_ERR; A[2] = (Next[r_even] & CMask) | r2 | r1 ; if(D == 2) goto Nextcharfile; r_odd = B[3]; r1 = Init1 & r_odd; r2 = (r_even | Next[r_even|A[2]]) &r_NO_ERR; A[3] = (Next[r_odd] & CMask) | r2 | r1 ; if(D == 3) goto Nextcharfile; r_even = B[4]; r1 = Init1 & r_even; r2 = (r_odd | Next[r_odd|A[3]]) &r_NO_ERR; A[4] = (Next[r_even] & CMask) | r2 | r1 ; goto Nextcharfile; } /* if NOT Newline */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & B[D]; /* match endofline */ A[D] = (Next[B[D]] & CMask) | r1; if(TAIL) A[D] = Next[A[D]] | A[D]; if((A[D] & 1) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; } r1 = Init1 & B[0]; A[0] = (Next[B[0]] & CMask) | r1; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = (B[k-1] | Next[A[k-1]|B[k-1]]) &r_NO_ERR; A[k] = (Next[B[k]] & CMask) | r1 | r2; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextcharfile: c = buffer[i]; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & A[0]; B[0] = (Next[A[0]] & CMask) | r1; r1 = Init1 & A[1]; B[1] = (Next[A[1]] & CMask) | ((A[0] | Next[A[0] | B[0]]) & r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1file; r1 = Init1 & A[2]; B[2] = (Next[A[2]] & CMask) | ((A[1] | Next[A[1] | B[1]]) &r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1file; r1 = Init1 & A[3]; B[3] = (Next[A[3]] & CMask) | ((A[2] | Next[A[2] | B[2]])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1file; r1 = Init1 & A[4]; B[4] = (Next[A[4]] & CMask) | ((A[3] | Next[A[3] | B[3]])&r_NO_ERR) | r1 ; goto Nextchar1file; } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & A[D]; /* match endofline */ B[D] = (Next[A[D]] & CMask) | r1; if(TAIL) B[D] = Next[B[D]] | B[D]; if((B[D] & 1) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; } r1 = Init1 & A[0]; B[0] = (Next[A[0]] & CMask) | r1; for(k=1; k<= D; k++) { r1 = Init1 & A[k]; r2 = (A[k-1] | Next[A[k-1]|B[k-1]])&r_NO_ERR; B[k] = (Next[A[k]] & CMask) | r1 | r2; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextchar1file: i++; CurrentByteOffset ++; } /* while i < end */ strncpy(buffer, buffer+num_read, Maxline); } /* while fill_buf() */ free_buf(Text, buffer); return 0; #if AGREP_POINTER } else { num_read = agrep_inlen; buffer = (CHAR *)agrep_inbuffer; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; /* An exact copy of the above RE_PROCESS_WHEN_DNOTZERO: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = RMask[c]; if (c != Newline) { r_even = B[0]; r1 = Init1 & r_even; A[0] = (Next[r_even] & CMask) | r1; r_odd = B[1]; r1 = Init1 & r_odd; r2 = (r_even | Next[r_even|A[0]]) &r_NO_ERR; A[1] = (Next[r_odd] & CMask) | r2 | r1 ; if(D == 1) goto Nextcharmem; r_even = B[2]; r1 = Init1 & r_even; r2 = (r_odd | Next[r_odd|A[1]]) &r_NO_ERR; A[2] = (Next[r_even] & CMask) | r2 | r1 ; if(D == 2) goto Nextcharmem; r_odd = B[3]; r1 = Init1 & r_odd; r2 = (r_even | Next[r_even|A[2]]) &r_NO_ERR; A[3] = (Next[r_odd] & CMask) | r2 | r1 ; if(D == 3) goto Nextcharmem; r_even = B[4]; r1 = Init1 & r_even; r2 = (r_odd | Next[r_odd|A[3]]) &r_NO_ERR; A[4] = (Next[r_even] & CMask) | r2 | r1 ; goto Nextcharmem; } /* if NOT Newline */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & B[D]; /* match endofline */ A[D] = (Next[B[D]] & CMask) | r1; if(TAIL) A[D] = Next[A[D]] | A[D]; if((A[D] & 1) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i-1, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; } r1 = Init1 & B[0]; A[0] = (Next[B[0]] & CMask) | r1; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = (B[k-1] | Next[A[k-1]|B[k-1]]) &r_NO_ERR; A[k] = (Next[B[k]] & CMask) | r1 | r2; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextcharmem: c = buffer[i]; CMask = RMask[c]; if(c != Newline) { r1 = Init1 & A[0]; B[0] = (Next[A[0]] & CMask) | r1; r1 = Init1 & A[1]; B[1] = (Next[A[1]] & CMask) | ((A[0] | Next[A[0] | B[0]]) & r_NO_ERR) | r1 ; if(D == 1) goto Nextchar1mem; r1 = Init1 & A[2]; B[2] = (Next[A[2]] & CMask) | ((A[1] | Next[A[1] | B[1]]) &r_NO_ERR) | r1 ; if(D == 2) goto Nextchar1mem; r1 = Init1 & A[3]; B[3] = (Next[A[3]] & CMask) | ((A[2] | Next[A[2] | B[2]])&r_NO_ERR) | r1 ; if(D == 3) goto Nextchar1mem; r1 = Init1 & A[4]; B[4] = (Next[A[4]] & CMask) | ((A[3] | Next[A[3] | B[3]])&r_NO_ERR) | r1 ; goto Nextchar1mem; } /* if(NOT Newline) */ else { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = Init1 & A[D]; /* match endofline */ B[D] = (Next[A[D]] & CMask) | r1; if(TAIL) B[D] = Next[B[D]] | B[D]; if((B[D] & 1) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if (-1 == r_output(buffer, i, end, j)) {free_buf(Text, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; } r1 = Init1 & A[0]; B[0] = (Next[A[0]] & CMask) | r1; for(k=1; k<= D; k++) { r1 = Init1 & A[k]; r2 = (A[k-1] | Next[A[k-1]|B[k-1]])&r_NO_ERR; B[k] = (Next[A[k]] & CMask) | r1 | r2; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } Nextchar1mem: i++; CurrentByteOffset ++; } /* while i < end */ return 0; } #endif /*AGREP_POINTER*/ } /* re */ int r_output (buffer, i, end, j) int i, end, j; CHAR *buffer; { int PRINTED = 0; int bp; if(i >= end) return 0; if ((j < 1) || (CurrentByteOffset < 0)) return 0; num_of_matched++; if(COUNT) return 0; if (SILENT) return 0; if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } bp = i-1; while ((buffer[bp] != '\n') && (bp > 0)) bp--; if(LINENUM) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d: ", j-1); else { char s[32]; int outindex; sprintf(s, "%d: ", j-1); for(outindex=0; (outindex+agrep_outpointer= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } while(bp <= i) agrep_outbuffer[agrep_outpointer ++] = buffer[bp++]; } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } return 0; } /* * Processes the options specified in argc and argv, and fetches the pattern. * Also sets the set of filenames to be searched for internally. Returns: -1 * if there is a serious error, 0 if there is no pattern or an error in getting * the file names, the length (> 0) of the pattern if there is no error. When a * 0 is returned, it means that at least the options were processed correctly. */ int agrep_init(argc, argv, initialfd, pattern_len, pattern_buffer) int argc; char *argv[]; int initialfd; int pattern_len; CHAR *pattern_buffer; { int i, j, seenlsq = 0; char c, *p; int filetype; char **original_argv = argv; char *home; int quitwhile; int NOOUTTAIL=OFF; initial_value(); if (pattern_len < 1) { fprintf(stderr, "agrep_init: pattern length %d too small\n", pattern_len); errno = 3; return -1; } agrep_initialfd = initialfd; strncpy(Progname, argv[0], MAXNAME); if (argc < 2) return agrep_usage(); printf(""); /* dummy statement which avoids program crash with SYS3175 when piping the output of complex AGREP results into a file. This bug is regarded as COMPILER-UNSPECIFIC. For sure, the problem SHOULD BE FIXED somewhere else in AGREP, later. [TG] 16.09.96 Thomas Gries gries@epo.e-mail.com, gries@ibm.net */ Pattern[0] = '\0'; while(--argc > 0 && (*++argv)[0] == '-') { /* argv is incremented automatically here */ p = argv[0]+1; /* ptr to first character after '-' */ c = *(argv[0]+1); quitwhile = OFF; while(!quitwhile && (*p != '\0')) { c = *p; switch(c) { case 'z' : NOOUTPUTZERO = ON; /* don't output files with 0 matches */ PRINT(printf("z\n"); ) break; case 'c' : COUNT = ON; /* output the # of matches */ PRINT(printf("c\n"); ) break; case 's' : SILENT = ON; /* silent mode */ PRINT(printf("s\n"); ) break; case 'p' : I = 0; /* insertion cost is 0 */ PRINT(printf("p\n"); ) break; case 'P' : PRINTPATTERN = 1; /* print pattern before every matched line */ PRINT(printf("p\n"); ) break; case 'x' : WHOLELINE = ON; /* match the whole line */ PRINT(printf("x\n"); ) if(WORDBOUND) { fprintf(stderr, "%s: illegal option combination (-x and -w)\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } break; case 'b' : BYTECOUNT = ON; PRINT(printf("b\n"); ) break; case 'q' : PRINTOFFSET = ON; PRINT(printf("q\n"); ) break; case 'u' : PRINTRECORD = OFF; PRINT(printf("u\n"); ) break; case 'X' : PRINTNONEXISTENTFILE = ON; PRINT(printf("X\n"); ) break; case 'g' : PRINTFILENUMBER = ON; PRINT(printf("g\n"); ) break; case 'j' : PRINTFILETIME = ON; PRINT(printf("@\n"); ) break; case 'L' : if ( *(p + 1) == '\0') {/* space after -L option */ if(argc <= 1) { fprintf(stderr, "%s: the -L option must have an output-limit argument\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argv++; LIMITOUTPUT = LIMITTOTALFILE = LIMITPERFILE = 0; sscanf(argv[0], "%d:%d:%d", &LIMITOUTPUT, &LIMITTOTALFILE, &LIMITPERFILE); if ((LIMITOUTPUT < 0) || (LIMITTOTALFILE < 0) || (LIMITPERFILE < 0)) { fprintf(stderr, "%s: invalid output limit %s\n", Progname, argv[0]); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argc--; } else { LIMITOUTPUT = LIMITTOTALFILE = LIMITPERFILE = 0; sscanf(p+1, "%d:%d:%d", &LIMITOUTPUT, &LIMITTOTALFILE, &LIMITPERFILE); if ((LIMITOUTPUT < 0) || (LIMITTOTALFILE < 0) || (LIMITPERFILE < 0)) { fprintf(stderr, "%s: invalid output limit %s\n", Progname, p+1); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } /* else */ PRINT(printf("L\n"); ) quitwhile = ON; break; case 'd' : DELIMITER = ON; /* user defines delimiter */ PRINT(printf("d\n"); ) if ( *(p + 1) == '\0') {/* space after -d option */ if(argc <= 1) { fprintf(stderr, "%s: the -d option must have a delimiter argument\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argv++; if ((D_length = strlen(argv[0])) > MaxDelimit) { fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", Progname, MaxDelimit); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } D_pattern[0] = '<'; strcpy(D_pattern+1, argv[0]); if (((argv[0][D_length-1] == '\n') || (argv[0][D_length-1] == '$') || (argv[0][D_length-1] == '^')) && (D_length == 1)) OUTTAIL = ON; argc--; PRINT(printf("space\n"); ) } else { if ((D_length = strlen(p + 1)) > MaxDelimit) { fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", Progname, MaxDelimit); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } D_pattern[0] = '<'; strcpy(D_pattern+1, p + 1); if ((((p+1)[D_length-1] == '\n') || ((p+1)[D_length-1] == '$') || ((p+1)[D_length-1] == '^')) && (D_length == 1)) OUTTAIL = ON; } /* else */ strcat(D_pattern, ">; "); D_length++; /* to count '<' as one */ PRINT(printf("D_pattern=%s\n", D_pattern); ) strcpy(original_D_pattern, D_pattern); original_D_length = D_length; quitwhile = ON; break; case 'H': if (*(p + 1) == '\0') {/* space after - option */ if (argc <= 1) { fprintf(stderr, "%s: a directory name must follow the -H option\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return agrep_usage(); } else exit(2); } argv ++; strcpy(COMP_DIR, argv[0]); argc --; } else { strcpy(COMP_DIR, p+1); } quitwhile = ON; break; case 'e' : if ( *(p + 1) == '\0') {/* space after -e option */ if(argc <= 1) { fprintf(stderr, "%s: the -e option must have a pattern argument\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argv++; if(argv[0][0] == '-') { /* not strictly necessary but no harm done */ Pattern[0] = '\\'; strcat(Pattern, (argv)[0]); } else strcat(Pattern, argv[0]); argc--; } else { if (*(p+1) == '-') { /* not strictly necessary but no harm done */ Pattern[0] = '\\'; strcat(Pattern, p+1); } else strcat (Pattern, p+1); } /* else */ PRINT(printf("Pattern=%s\n", Pattern); ) pattern_index = abs(argv - original_argv); quitwhile = ON; break; case 'k' : CONSTANT = ON; if ( *(p + 1) == '\0') {/* space after -e option */ if(argc <= 1) { fprintf(stderr, "%s: the -k option must have a pattern argument\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argv++; strcat(Pattern, argv[0]); if((argc > 2) && (argv[1][0] == '-')) { fprintf(stderr, "%s: -k should be the last option in the command\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } argc--; } else { if((argc > 1) && (argv[1][0] == '-')) { fprintf(stderr, "%s: -k should be the last option in the command\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } strcat (Pattern, p+1); } /* else */ pattern_index = abs(argv - original_argv); quitwhile = ON; break; case 'f' : if (PAT_FILE == ON) { fprintf(stderr, "%s: multiple -f options\n", Progname); if (multifd >= 0) close(multifd); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if (PAT_BUFFER == ON) { fprintf(stderr, "%s: -f and -m are incompatible\n", Progname); if (multibuf != NULL) free(multibuf); multibuf = NULL; multilen = 0; if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } PAT_FILE = ON; PRINT(printf("f\n"); ) argv++; argc--; if (argv[0] == NULL) { /* A -f option with a NULL file name is a NO-OP: stupid, but simplifies glimpse :-) */ PAT_FILE = OFF; quitwhile = ON; break; } if((multifd = open(argv[0], O_RDONLY)) < 0) { PAT_FILE = OFF; fprintf(stderr, "%s: can't open pattern file for reading: %s\n", Progname, argv[0]); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } PRINT(printf("file=%s\n", argv[0]); ) strcpy(PAT_FILE_NAME, argv[0]); if (prepf(multifd, NULL, 0) <= -1) { close(multifd); PAT_FILE = OFF; fprintf(stderr, "%s: error in processing pattern file: %s\n", Progname, argv[0]); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } quitwhile = ON; break; case 'm' : if (PAT_BUFFER == ON) { fprintf(stderr, "%s: multiple -m options\n", Progname); if (multibuf != NULL) free(multibuf); multibuf = NULL; multilen = 0; if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if (PAT_FILE == ON) { fprintf(stderr, "%s: -f and -m are incompatible\n", Progname); if (multifd >= 0) close(multifd); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } PAT_BUFFER = ON; PRINT(printf("m\n"); ) argv ++; argc --; if ((argv[0] == NULL) || ((multilen = strlen(argv[0])) <= 0)) { /* A -m option with a NULL or empty pattern buffer is a NO-OP: stupid, but simplifies glimpse :-) */ PAT_BUFFER = OFF; if (multibuf != NULL) free(multibuf); multilen = 0; multibuf = NULL; } else { multibuf = (char *)malloc(multilen + 2); strcpy(multibuf, argv[0]); PRINT(printf("patterns=%s\n", multibuf); ) if (prepf(-1, multibuf, multilen) <= -1) { free(multibuf); multibuf = NULL; multilen = 0; PAT_BUFFER = OFF; fprintf(stderr, "%s: error in processing pattern buffer\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } quitwhile = ON; break; case 'h' : NOFILENAME = ON; PRINT(printf("h\n"); ) break; case 'i' : NOUPPER = ON; PRINT(printf("i\n"); ) break; case 'l' : FILENAMEONLY = ON; PRINT(printf("l\n"); ) break; case 'n' : LINENUM = ON; /* output prefixed by line no*/ PRINT(printf("n\n"); ) break; case 'r' : RECURSIVE = ON; PRINT(printf("r\n"); ) break; case 'V' : printf("\nThis is agrep version %s, %s.\n\n", AGREP_VERSION, AGREP_DATE); return 0; case 'v' : INVERSE = ON; /* output no-matched lines */ PRINT(printf("v\n"); ) break; case 't' : OUTTAIL = ON; /* output from tail of delimiter */ PRINT(printf("t\n"); ) break; case 'o' : NOOUTTAIL = ON; /* output from front of delimiter */ PRINT(printf("t\n"); ) break; case 'B' : BESTMATCH = ON; PRINT(printf("B\n"); ) break; case 'w' : WORDBOUND = ON;/* match to words */ PRINT(printf("w\n"); ) if(WHOLELINE) { fprintf(stderr, "%s: illegal option combination (-w and -x)\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } break; case 'y' : NOPROMPT = ON; PRINT(printf("y\n"); ) break; case 'I' : I = atoi(p + 1); /* Insertion Cost */ JUMP = ON; quitwhile = ON; break; case 'S' : S = atoi(p + 1); /* Substitution Cost */ JUMP = ON; quitwhile = ON; break; case 'D' : DD = atoi(p + 1); /* Deletion Cost */ JUMP = ON; quitwhile = ON; break; case 'G' : FILEOUT = ON; COUNT = ON; break; case 'A': ALWAYSFILENAME = ON; break; case 'O': POST_FILTER = ON; break; case 'M': MULTI_OUTPUT = ON; break; case 'Z': break; /* no-op: used by glimpse */ default : if (isdigit(c)) { APPROX = ON; D = atoi(p); if (D > MaxError) { fprintf(stderr,"%s: the maximum number of errors is %d\n", Progname, MaxError); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } quitwhile = ON; /* note that even a number should occur at the end of a group of options, as f & e */ } else { fprintf(stderr, "%s: illegal option -%c\n",Progname, c); return agrep_usage(); } } /* switch(c) */ p ++; } } /* while (--argc > 0 && (*++argv)[0] == '-') */ if (NOOUTTAIL == ON) OUTTAIL = OFF; if (COMP_DIR[0] == '\0') { if ((home = (char *)getenv("HOME")) == NULL) { getcwd(COMP_DIR, MAX_LINE_LEN-1); fprintf(stderr, "using working-directory '%s' to locate dictionaries\n", COMP_DIR); } else strncpy(COMP_DIR, home, MAX_LINE_LEN); } strcpy(FREQ_FILE, COMP_DIR); strcat(FREQ_FILE, "/"); strcat(FREQ_FILE, DEF_FREQ_FILE); strcpy(HASH_FILE, COMP_DIR); strcat(HASH_FILE, "/"); strcat(HASH_FILE, DEF_HASH_FILE); strcpy(STRING_FILE, COMP_DIR); strcat(STRING_FILE, "/"); strcat(STRING_FILE, DEF_STRING_FILE); initialize_common(FREQ_FILE, 0); /* no error msgs */ if (FILENAMEONLY && NOFILENAME) { fprintf(stderr, "%s: -h and -l options are mutually exclusive\n",Progname); } if (COUNT && (FILENAMEONLY || NOFILENAME)) { FILENAMEONLY = OFF; if(!FILEOUT) NOFILENAME = OFF; } if (SILENT) { FILEOUT = 0; NOFILENAME = 1; PRINTRECORD = 0; FILENAMEONLY = 0; PRINTFILETIME = 0; BYTECOUNT = 0; PRINTOFFSET = 0; } if (!(PAT_FILE || PAT_BUFFER) && Pattern[0] == '\0') { /* Pattern not set with -e option */ if (argc <= 0) { agrep_usage(); return 0; } strcpy(Pattern, *argv); pattern_index = abs(argv - original_argv); argc--; argv++; } /* if multi-pattern search, just ignore any specified pattern altogether: treat it as a filename */ if (copied_from_argv) { for (i=0; i USERRANGE_MIN) && ( ((unsigned char *)Pattern)[i] <= USERRANGE_MAX)) { fprintf(stderr, "Warning: pattern has some meta-characters interpreted by agrep!\n"); break; } else if (Pattern[i] == '\\') i++; /* extra */ else if (Pattern[i] == '[') seenlsq = 1; else if ((Pattern[i] == '-') && !seenlsq) { for (j=M; j>=i; j--) Pattern[j+1] = Pattern[j]; /* right shift including '\0' */ Pattern[i] = '\\'; /* escape the - */ M ++; i++; } else if (Pattern[i] == ']') seenlsq = 0; } if (M > pattern_len - 1) { fprintf(stderr, "%s: pattern '%s' does not fit in specified buffer\n", Progname, Pattern); errno = 3; return 0; } if (pattern_buffer != Pattern) /* not from mem/file-agrep() */ strncpy(pattern_buffer, Pattern, M+1); /* copy \0 */ return M; } /* * User need not bother about initialfd. * Both functions return -1 on error, 0 if there was no pattern, * length (>=1) of pattern otherwise. */ int memagrep_init(argc, argv, pattern_len, pattern_buffer) int argc; char *argv[]; int pattern_len; char *pattern_buffer; { return (agrep_init(argc, argv, -1, pattern_len, pattern_buffer)); } int fileagrep_init(argc, argv, pattern_len, pattern_buffer) int argc; char *argv[]; int pattern_len; char *pattern_buffer; { return (agrep_init(argc, argv, 3, pattern_len, pattern_buffer)); } /* returns -1 on error, num of matches (>=0) otherwise */ int agrep_search(pattern_len, pattern_buffer, initialfd, input_len, input, output_len, output) int pattern_len; CHAR *pattern_buffer; int initialfd; int input_len; void *input; int output_len; void *output; { int i; int filetype; int ret; int pattern_has_changed = 1; if ((multifd == -1) && (multibuf == NULL) && (pattern_len < 1)) { fprintf(stderr, "%s: pattern length %d too small\n", Progname, pattern_len); errno = 3; return -1; } if (pattern_len >= MAXPAT) { fprintf(stderr, "%s: pattern '%s' too long\n", Progname, pattern_buffer); errno = 3; return -1; } /* courtesy: crd@hplb.hpl.hp.com */ if (agrep_saved_pattern) { if (strcmp(agrep_saved_pattern, pattern_buffer)) { free(agrep_saved_pattern); agrep_saved_pattern = NULL; } else { pattern_has_changed = 0; } } if (! agrep_saved_pattern) { agrep_saved_pattern = (CHAR *)malloc(pattern_len+1); memcpy(agrep_saved_pattern, pattern_buffer, pattern_len); agrep_saved_pattern[pattern_len] = '\0'; } if (!pattern_has_changed) { reinit_value_partial(); } else { reinit_value(); if (pattern_buffer != Pattern) /* not from mem/file-agrep() */ strncpy(Pattern, pattern_buffer, pattern_len+1); /* copy \0 */ M = strlen(Pattern); } if (output == NULL) { fprintf(stderr, "%s: invalid output descriptor\n", Progname); return -1; } if (output_len <= 0) { agrep_finalfp = (FILE *)output; agrep_outlen = 0; agrep_outbuffer = NULL; agrep_outpointer = 0; } else { agrep_finalfp = NULL; agrep_outlen = output_len; agrep_outbuffer = (CHAR *)output; agrep_outpointer = 0; } agrep_initialfd = initialfd; execfd = initialfd; if (initialfd == -1) { agrep_inbuffer = (CHAR *)input; agrep_inlen = input_len; agrep_inpointer = 0; } else if ((input_len > 0) && (input != NULL)) { /* Copy the set of filenames into Textfiles */ if (copied_from_argv) { for (i=0; i; ", D_length is 1 + length of string PAT: see agrep.c/'d' */ preprocess_delimiter(D_pattern+1, D_length - 1, D_pattern, &D_length); /* D_pattern is the exact stuff we want to match, D_length is its strlen */ if ((tc_D_length = quick_tcompress(FREQ_FILE,HASH_FILE,D_pattern,D_length,tc_D_pattern,MaxDelimit*2,TC_EASYSEARCH)) <= 0) { strcpy(tc_D_pattern, D_pattern); tc_D_length = D_length; } /* printf("sgrep's delim=%s,%d tc_delim=%s,%d\n", D_pattern, D_length, tc_D_pattern, tc_D_length); */ } M = strlen(OldPattern); } } if (AParse) { /* boolean converted to multi-pattern search */ int prepf_ret= 0; if (pattern_has_changed) prepf_ret= prepf(-1, multibuf, multilen); if (prepf_ret <= -1) { if (AComplexBoolean) destroy_tree(AParse); AParse = 0; PAT_BUFFER = 0; if (multibuf != NULL) free(multibuf); /* this was allocated for arbit booleans, not multipattern search */ multibuf = NULL; multilen = 0; /* Cannot free multifd here since that is always allocated for multipattern search */ return -1; } } if (Numfiles > 1) FNAME = ON; if (NOFILENAME) FNAME = 0; if (ALWAYSFILENAME) FNAME = ON; /* used by glimpse ONLY: 15/dec/93 */ if (agrep_initialfd == -1) ret = exec(execfd, NULL); else if(RECURSIVE) ret = (recursive(Numfiles, Textfiles)); else ret = (exec(execfd, Textfiles)); return ret; } /* * User need not bother about initialfd. * Both functions return -1 on error, 0 otherwise. */ int memagrep_search(pattern_len, pattern_buffer, input_len, input_buffer, output_len, output) int pattern_len; char *pattern_buffer; int input_len; char *input_buffer; int output_len; void *output; { return(agrep_search(pattern_len, pattern_buffer, -1, input_len, input_buffer, output_len, output)); } int fileagrep_search(pattern_len, pattern_buffer, file_num, file_buffer, output_len, output) int pattern_len; char *pattern_buffer; int file_num; char **file_buffer; int output_len; void *output; { return(agrep_search(pattern_len, pattern_buffer, 3, file_num, file_buffer, output_len, output)); } /* * The original agrep_run() routine was split into agrep_search and agrep_init * so that the interface with glimpse could be made cleaner: see glimpse. * Now, the user can specify an initial set of options, and use them in future * searches. If agrep_init does not find the pattern, options are still SET. * In fileagrep_search, the user can specify a NEW set of files to be searched * after the options are processed (this is used in glimpse). * * Both functions return -1 on error, 0 otherwise. * * The arguments are self explanatory. The pattern should be specified in * one of the argvs. Options too can be specified in one of the argvs -- it * is exactly as if the options are being given to agrep at run time. * The only restrictions are that the input_buffer should begin with a '\n' * and after its end, there must be valid memory to store a copy of the pattern. */ int memagrep(argc, argv, input_len, input_buffer, output_len, output) int argc; char *argv[]; int input_len; char *input_buffer; int output_len; void *output; { int ret; if ((ret = memagrep_init(argc, argv, MAXPAT, Pattern)) < 0) return -1; else if ((ret == 0) && (multifd == -1) && (multibuf == NULL)) return -1; /* ^^^ because one need not specify the pattern on the cmd line if -f OR -m */ return memagrep_search(ret, Pattern, input_len, input_buffer, output_len, output); } int fileagrep(argc, argv, output_len, output) int argc; char *argv[]; int output_len; void *output; { int ret; if ((ret = fileagrep_init(argc, argv, MAXPAT, Pattern)) < 0) return -1; else if ((ret == 0) && (multifd == -1) && (multibuf == NULL)) return -1; /* ^^^ because one need not specify the pattern on the cmd line if -f OR -m */ return fileagrep_search(ret, Pattern, 0, NULL, output_len, output); } /* * RETURNS: total number of matched lines in all files that were searched. * * The pattern(s) remain(s) constant irrespective of the number of files. * Hence, essentially, all the interface routines below have to be changed * so that they DONT do that preprocessing again and again for multiple * files. This bug was found while interfacing agrep with cast. * * At present, sgrep() has been modified to have another parameter, * "samepattern" that tells it whether the pattern is the same as before. * Other funtions too should have such a parameter and should not repeat * preprocessing for all patterns. Since preprocessing for a pattern to * be searched in compressed files is siginificant, this bug was found. * * - bgopal on 15/Nov/93. */ int exec(fd, file_list) int fd; char **file_list; { int i; char c[8]; int ret = 0; /* no error */ if ((Numfiles > 1) && (NOFILENAME == OFF)) FNAME = ON; if ((-1 == compat())) return -1; /* check compatibility between options */ if (fd <= 0) { TCOMPRESSED = ON; /* there is a possibility that the data might be tuncompressible */ if (!SetCurrentByteOffset) CurrentByteOffset = 0; if((fd == 0) && FILENAMEONLY) { fprintf(stderr, "%s: -l option is not compatible with standard input\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(PAT_FILE || PAT_BUFFER) mgrep(fd, AParse); else { if(SGREP) ret = sgrep(OldPattern, strlen(OldPattern), fd, D, 0); else ret = bitap(old_D_pat, Pattern, fd, M, D); } if (ret <= -1) return -1; if (COUNT /* && ret */) { /* dirty solution for glimpse's -b! */ if(INVERSE && (PAT_FILE || PAT_BUFFER)) { /* inverse will never be set in glimpse */ if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d\n", total_line-(num_of_matched - prev_num_of_matched)); else { char s[32]; int outindex; sprintf(s, "%d\n", total_line-(num_of_matched - prev_num_of_matched)); for(outindex=0; (outindex+agrep_outpointer 0 => Numfiles > 0 */ for (i = 0; i < Numfiles; i++, close(fd)) { prev_num_of_matched = num_of_matched; if (!SetCurrentByteOffset) CurrentByteOffset = 0; if (!SetCurrentFileName) { if (PRINTFILENUMBER) sprintf(CurrentFileName, "%d", i); else strcpy(CurrentFileName, file_list[i]); } if (!SetCurrentFileTime) { if (PRINTFILETIME) CurrentFileTime = aget_file_time(NULL, file_list[i]); } TCOMPRESSED = ON; if (!tuncompressible_filename(file_list[i], strlen(file_list[i]))) TCOMPRESSED = OFF; NEW_FILE = ON; if ((fd = my_open(file_list[i], O_RDONLY)) < /*=*/ 0) { if (PRINTNONEXISTENTFILE) printf("%s\n", CurrentFileName); else if (!glimpse_call) fprintf(stderr, "%s: can't open file for reading: %s\n",Progname, file_list[i]); } else { if(PAT_FILE || PAT_BUFFER) mgrep(fd, AParse); else { if(SGREP) ret = sgrep(OldPattern, strlen(OldPattern), fd, D, i); else ret = bitap(old_D_pat, Pattern, fd, M, D); } if (ret <= -1) { close(fd); return -1; } if (num_of_matched - prev_num_of_matched > 0) { NOMATCH = OFF; files_matched ++; } if (COUNT && !FILEOUT) { if( (INVERSE && (PAT_FILE || PAT_BUFFER)) && ((total_line - (num_of_matched - prev_num_of_matched)> 0) || !NOOUTPUTZERO) ) { if(FNAME && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; close(fd); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; close(fd); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ": %d\n", total_line - (num_of_matched - prev_num_of_matched)); else { char s[32]; int outindex; sprintf(s, ": %d\n", total_line - (num_of_matched - prev_num_of_matched)); for(outindex=0; (outindex+agrep_outpointer 0) || !NOOUTPUTZERO) ) { /* inverse is always 0 in glimpse, so we always come here */ if(FNAME && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; close(fd); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; close(fd); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ": %d\n", (num_of_matched - prev_num_of_matched)); else { char s[32]; int outindex; sprintf(s, ": %d\n", (num_of_matched - prev_num_of_matched)); for(outindex=0; (outindex+agrep_outpointer 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITTOTALFILE > 0) && (LIMITTOTALFILE <= files_matched))) { close(fd); break; /* done */ } } /* for i < Numfiles */ if(NOMATCH && BESTMATCH) { if(WORDBOUND || WHOLELINE || INVERSE) { SGREP = 0; if(-1 == preprocess(D_pattern, Pattern)) return -1; strcpy(old_D_pat, D_pattern); if((M = maskgen(Pattern, D)) == -1) return -1; } COUNT=ON; D=1; while(D 0) { if(PAT_FILE || PAT_BUFFER) mgrep(fd, AParse); else { if(SGREP) ret = sgrep(OldPattern,strlen(OldPattern),fd,D, i); else ret = bitap(old_D_pat,Pattern,fd,M,D); } if (ret <= -1) return -1; } /* else don't have to process PRINTNONEXISTENTFILE since must print only once */ if (glimpse_clientdied) { close(fd); return -1; } if (agrep_finalfp != NULL) fflush(agrep_finalfp); if ((((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITTOTALFILE > 0) && (LIMITTOTALFILE <= files_matched))) && (num_of_matched > prev_num_of_matched)) { close(fd); break; } } /* for i < Numfiles */ D++; } /* while */ if(num_of_matched - prev_num_of_matched > 0) { D--; errno = D; /* #of errors if proper return */ COUNT = 0; if(num_of_matched - prev_num_of_matched == 1) fprintf(stderr,"%s: 1 word matches ", Progname); else fprintf(stderr,"%s: %d words match ", Progname, num_of_matched - prev_num_of_matched); if(D==1) fprintf(stderr, "within 1 error"); else fprintf(stderr, "within %d errors", D); fflush(stderr); if(NOPROMPT) fprintf(stderr, "\n"); else { if(num_of_matched - prev_num_of_matched == 1) fprintf(stderr,"; search for it? (y/n)"); else fprintf(stderr,"; search for them? (y/n)"); c[0] = 'y'; if (!glimpse_isserver && (fgets(c, 4, stdin) == NULL)) goto CONT; if(c[0] != 'y') goto CONT; } for (i = 0; i < Numfiles; i++, close(fd)) { prev_num_of_matched = num_of_matched; CurrentByteOffset = 0; if (PRINTFILENUMBER) sprintf(CurrentFileName, "%d", i); else strcpy(CurrentFileName, file_list[i]); if (!SetCurrentFileTime) if (PRINTFILETIME) CurrentFileTime = aget_file_time(NULL, file_list[i]); NEW_FILE = ON; if ((fd = my_open(Textfiles[i], O_RDONLY)) > 0) { if(PAT_FILE || PAT_BUFFER) mgrep(fd, AParse); else { if(SGREP) ret = sgrep(OldPattern,strlen(OldPattern),fd,D, i); else ret = bitap(old_D_pat,Pattern,fd,M,D); } if (ret <= -1) { close(fd); return -1; } } /* else don't have to process PRINTNONEXISTENTFILE since must print only once */ if (glimpse_clientdied) { close(fd); return -1; } if (agrep_finalfp != NULL) fflush(agrep_finalfp); if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITTOTALFILE > 0) && (LIMITTOTALFILE <= files_matched))) { close(fd); break; /* done */ } } /* for i < Numfiles */ NOMATCH = 0; } } } CONT: if(EATFIRST) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else if (agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; EATFIRST = OFF; } if(num_of_matched - prev_num_of_matched > 0) NOMATCH = OFF; /* if(NOMATCH) return(0); */ /*printf("exec=%d\n", num_of_matched);*/ return(num_of_matched); } /* end of exec() */ /* Just output the contents of the file fname onto the std output */ int file_out(fname) char *fname; { int num_read; int fd; int i, len; CHAR buf[SIZE+2]; if(FNAME) { len = strlen(fname); if (agrep_finalfp != NULL) { fputc('\n', agrep_finalfp); for(i=0; i< len; i++) fputc(':', agrep_finalfp); fputc('\n', agrep_finalfp); fprintf(agrep_finalfp, "%s\n", fname); for(i=0; i< len; i++) fputc(':', agrep_finalfp); fputc('\n', agrep_finalfp); fflush(agrep_finalfp); } else { if (1+len+1+len+1+len+1+agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outbuffer[agrep_outpointer++] = '\n'; for (i=0; i 0) write(1, buf, num_read); if (glimpse_clientdied) { close(fd); return -1; } } else { if ((num_read = fill_buf(fd, agrep_outbuffer + agrep_outpointer, agrep_outlen - agrep_outpointer)) > 0) agrep_outpointer += num_read; } close(fd); return 0; } int output(buffer, i1, i2, j) register CHAR *buffer; int i1, i2, j; { int PRINTED = 0; register CHAR *bp, *outend; if(i1 > i2) return 0; num_of_matched++; if(COUNT) return 0; if(SILENT) return 0; if(OUTTAIL || (!DELIMITER && (D_length == 1) && (D_pattern[0] == '\n')) ) { if (j>1) i1 = i1 + D_length; i2 = i2 + D_length; } if(DELIMITER) j = j+1; if(FIRSTOUTPUT) { if (buffer[i1] == '\n') { i1++; EATFIRST = ON; } FIRSTOUTPUT = 0; } if(TRUNCATE) { fprintf(stderr, "WARNING! some lines have been truncated in output record #%d\n", num_of_matched-1); } /* Why do we have to do this? */ while ((buffer[i1] == '\n') && (i1 <= i2)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer < agrep_outlen) agrep_outbuffer[agrep_outpointer ++] = '\n'; else { OUTPUT_OVERFLOW; return -1; } } i1++; } if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(LINENUM) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d: ", j-1); else { char s[32]; int outindex; sprintf(s, "%d: ", j-1); for(outindex=0; (outindex+agrep_outpointer= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } while(bp <= outend) agrep_outbuffer[agrep_outpointer ++] = *bp++; } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } return 0; } int agrep_usage() { if (glimpse_call) return -1; fprintf(stderr, "usage: %s [-@#abcdehiklnoprstvwxyBDGIMSV] [-f patternfile] [-H dir] pattern [files]\n", Progname); fprintf(stderr, "\n"); fprintf(stderr, "summary of frequently used options:\n"); fprintf(stderr, "(For a more detailed listing see 'man agrep'.)\n"); fprintf(stderr, "-#: find matches with at most # errors\n"); fprintf(stderr, "-c: output the number of matched records\n"); fprintf(stderr, "-d: define record delimiter\n"); fprintf(stderr, "-h: do not output file names\n"); fprintf(stderr, "-i: case-insensitive search, e.g., 'a' = 'A'\n"); fprintf(stderr, "-l: output the names of files that contain a match\n"); fprintf(stderr, "-n: output record prefixed by record number\n"); fprintf(stderr, "-v: output those records that have no matches\n"); fprintf(stderr, "-w: pattern has to match as a word, e.g., 'win' will not match 'wind'\n"); fprintf(stderr, "-B: best match mode. find the closest matches to the pattern\n"); fprintf(stderr, "-G: output the files that contain a match\n"); fprintf(stderr, "-H 'dir': the cast-dictionary is located in directory 'dir'\n"); fprintf(stderr, "\n"); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } agrep-4.17/agrep.chronicle0100644001123100001460000002066407010116362013711 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ Started in Feb 1991. This chronicle briefly describes the progress of agrep. Feb/91: The approximate pattern matching algorithm called 'bitap' (bit-parallel approximate pattern matching) is designed. The algorithm is a generalization of Baeza-Yates' "shift-or" algorithm for exact matching. Mar/91: Many extensions of the algorithm 'bitap' are found, especially for approximate regular expression pattern matching. Preliminary implementation of the algorithm showed a strong promise for a general-purpose fast approximate pattern-matching tool. Apr/91: Approximate regular expression pattern matching was implemented. The result is even better than expected. The design of the software tool is pinned down. (For example, record oriented, multi-pattern, AND/OR logic queries.) A partition technique for approximate pattern matching is used. May/91: The prototype of "agrep" is completed. A lot of debugging/optimization in this month. Jun/91: The first version of agrep is released. agrep 1.0 was announced and made available by anonymous ftp from cs.arizona.edu. Jul/91: A sub-linear expected-time algorithm, called "amonkey" for approximate pattern matching (for simple pattern) is designed. The algorithm has the same time complexity as that of Chang&Lawler but is much much faster in practice. The algorithm is based on a variation of Boyer-Moore technique, which we call "block-shifting." A sub-linear expected-time algorithm, called "mgrep" for matching a set of patterns is designed based on the "block-shifting" technique with a hashing technique. Aug/91: "amonkey" is implemented and incorporated into agrep. It is very fast for long patterns like DNA patterns. (But roughly the same for matching English words as the bitap algorithm using the partition technique.) Prototype of "mgrep" is implemented. Sep/91: "mgrep" is incorporated into agrep to support the -f option. An algorithm for approximate pattern matching that combines the 'partition' technique with the sub-linear expected-time algorithm for multi-patterns is designed. Implementation shows it to be the fastest for ASCII text (and pattern). Boyer-moore technique for exact matching is incorporated. Nov/91: The final paper of "agrep" that is to appear in USENIX conference (Jan 1992) is finished. Jan/92: Some new options are added, such as find best matches (-B), and file outputs (-G). The man pages are revised. agrep version 2.0 is released. Fixed the following bugs and change the version to be 2.01. 1. -G option doesn't work correctly. 2. multiple definition of some global variables. 3. -# with -w forced the first character of the pattern to be matched Mar/92: Fixed the following bugs and change the version to be 2.02. 1. agrep sometimes misses some matches for pipeline input. 2. the word delimiter was not defined consistantly. ------------------------------------------------------------------------------ bgopal: The following changes were made to the original agrep during 1993-94: 1. Modifications to make main() take multiple options from the same '-' group: - the only modifications were in main.c. 2. Now, to make agrep take input from a buffer so that it can be used as a procedure from another program. Places where changes have to be done: - asearch.c/fill_buf(), bitap.c/fill_buf() - main.c/read() statements - mgrep.c/read() statements - sgrep.c/read() statements - probably don't have to change scanf in main.c where a y/n is asked. - probably don't have to change readdir in recursive.c. I have used fill_buf everywhere for reading things from a file. I have to verify whether this is actually used to take input in which it has to search for patterns or to read things REALLY from a file (-f option, file_out, etc.). If former, then I can simply modify fill_buf to read from an fd or from an input string. How to specify that string / area of memory is a separate issue to be resolved during the weekend. I have resolved it. I've also made a library interface for agrep. So 2 is done. 3. Make errno = exit code whenever you return -1 instead of exiting. 4. See if there is a way to avoid copying of memory bytes in agrep by using pointer manipulation instead of fill_buf: a part of making agrep a callable routine. Important to make it really fast, that's why do this. Solution: --------- I think I've solved the problem: but there is a restriction for within the memory pattern matching: THE SEARCHBUFFER HAS TO BEGIN WITH A NEWLINE -- otherwise we cannot avoid the copying. This fact can be checked in the library interface. There are some more problems whose solution I'm not sure of: ask Udi. The problem is: a. In asearch(), asearch0() and asearch1(), some data is copied after the data read in the buffer. Is that crucial? The same thing can be seen in bitap(). This is done when num_read < BlockSize -- why? b. In sgrep(), the whole buffer is filled with pat[m-1] so that bm() does not enter an infinite-loop. Is that crucial if there is an equivalent of a single iteration of the while-fill_buf-loop. I have not modified prepf() to read the multi-pattern from memory, not a file. I have to modify it later (including agrep.c). Function fill_buf now simply reads from the fd given: it does not bother about pointer manipulation. Note: wherever there is a while(i lots of problems! *). **** These were completed and added into glimpse/glimpseindex in Spring 1994. 7. One other problems with agrep as a callable routine: the variable names used by agrep can clash with user defined variable names. Making agrep variables static is not going to help since they are accessed throughout agrep code. Making code reentrant is not the issue (it is almost impossible!). agrep-4.17/agrep.h0100644001123100001460000001243607302117062012172 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #ifndef _AGREP_H_ #define _AGREP_H_ #include #include #include #include #include #include "re.h" #include "defs.h" #include "config.h" #include #include #include #include #define MAXNUM_PAT 16 /* 32 parts of a pattern = width of expression-tree */ #define CHAR unsigned char #define MAXPAT 256 #define MAXPATT 256 #define MAXDELIM 8 /* Max size of a delimiter pattern */ #define SHORTREG 15 #define MAXREG 30 #define MAXNAME 256 #define Max_Pats 12 /* max num of patterns */ #define Max_Keys 12 /* max num of keywords */ #define Max_Psize 128 /* max size of a pattern counting all the characters */ #define Max_Keyword 31 /* the max size of a keyword */ #define WORD 32 /* the size of a word */ #define MaxError 8 /* the max number of errors allowed */ #define MaxRerror 4 /* the max number of erros for regular expression */ #define MaxDelimit 16 /* the max raw length of a user defined delimiter */ #define BlockSize 49152/* BlockSize is always >= Max_record */ #define Max_record 49152 #define SIZE 16384 /* BlockSIze in sgrep */ #define MAXLINE 1024 /* maxline in sgrep */ #define MAX_LINE_LEN 1024 #define Maxline 1024 #define RBLOCK 8192 #define RMAXLINE 1024 #define MaxNext 66000 #define ON 1 #define OFF 0 #define Compl 1 #define Maxresult 10000 #define MaxCan 2500 #define MAX_DASHF_FILES 40000 #if 1 #define MAXSYM 256 /* ASCII */ #define WORDB 133 /* -w option */ #define LPARENT 134 /* ( */ #define RPARENT 135 /* ) */ #define LRANGE 136 /* [ */ #define RRANGE 137 /* ] */ #define LANGLE 138 /* < */ #define RANGLE 139 /* > */ #define NOTSYM 140 /* ^ */ #define WILDCD 141 /* wildcard */ #define ORSYM 142 /* | */ #define ORPAT 143 /* , */ #define ANDPAT 144 /* ; */ #define STAR 145 /* closure */ #define HYPHEN 129 /* - */ #define NOCARE 130 /* . */ #define NNLINE 131 /* special symbol for newline in begin of pattern*/ /* matches '\n' and NNLINE */ #define USERRANGE_MIN 128 /* min char in pattern of user: give warning */ #define USERRANGE_MAX 145 /* max char in pattern of user: give warning */ #else #define MAXSYM 256 /* ASCII */ #define WORDB 241 /* -w option */ #define LPARENT 242 /* ( */ #define RPARENT 243 /* ) */ #define LRANGE 244 /* [ */ #define RRANGE 245 /* ] */ #define LANGLE 246 /* < */ #define RANGLE 247 /* > */ #define NOTSYM 248 /* ^ */ #define WILDCD 249 /* wildcard */ #define ORSYM 250 /* | */ #define ORPAT 251 /* , */ #define ANDPAT 252 /* ; */ #define STAR 253 /* closure */ #define HYPHEN 237 /* - */ #define NOCARE 238 /* . */ #define NNLINE 239 /* special symbol for newline in begin of pattern*/ /* matches '\n' and NNLINE */ #define USERRANGE_MIN 236 /* min char in pattern of user: give warning */ #define USERRANGE_MAX 255 /* max char in pattern of user: give warning */ #endif #define OUTPUT_OVERFLOW \ { /* fprintf(stderr, "Output buffer overflow after %d bytes @ %s:%d !!\n", agrep_outpointer, __FILE__, __LINE__) */\ errno = ERANGE;\ } extern unsigned char *forward_delimiter(), *backward_delimiter(); extern int exists_delimiter(); extern void preprocess_delimiter(); unsigned char *forward_delimiter(), *backward_delimiter(); int exists_tcompressed_word(); unsigned char * forward_tcompressed_word(), *backward_tcompressed_word(); void alloc_buf(), free_buf(); extern char *aprint_file_time(); #define AGREP_VERSION "3.0" #define AGREP_DATE "1994" /* To parse patterns in asplit.c */ #define AND_EXP 0x1 /* boolean ; -- remains set throughout */ #define OR_EXP 0x2 /* boolean , -- remains set throughout */ #define ATTR_EXP 0x4 /* set when = is next non-alpha char, remains set until next , or ; --> never used in agrep */ #define VAL_EXP 0x8 /* set all the time except when = is seen for first time --> never used in agrep */ #define ENDSUB_EXP 0x10 /* set when , or ; is seen: must unset ATTR_EXP now --> never used in agrep */ #define INTERNAL 1 #define LEAF 2 #define NOTPAT 0x1000 #define OPMASK 0x00ff typedef struct _ParseTree { short op; char type; char terminalindex; union { struct { struct _ParseTree *left, *right; } internal; struct { int attribute; /* never used in agrep */ unsigned char *value; } leaf; } data; } ParseTree; #define unget_token_bool(bufptr, tokenlen) (*(bufptr)) -= (tokenlen) #define dd(a,b) 1 #define AGREP_ERROR 123 /* errno = 123 means that glimpse should quit searching files: used for errors glimpse itself cannot detect but agrep can */ #if ISO_CHAR_SET /* From Henrik.Martin@eua.ericsson.se (Henrik Martin) */ #define IS_LOCALE_CHAR(c) ((isalnum((c)) || isxdigit((c)) || \ isspace((c)) || ispunct((c)) || iscntrl((c))) ? 1 : 0) #define ISASCII(c) IS_LOCALE_CHAR(c) #else #define ISASCII(c) isascii(c) #endif extern int my_open(); extern FILE *my_fopen(); extern int my_stat(); extern int my_fstat(); extern int my_lstat(); extern int special_get_name(); #endif /* _AGREP_H_ */ agrep-4.17/asearch.c0100644001123100001460000010617007010116362012472 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" #include extern unsigned Init1, Init[], Mask[], endposition, D_endpos, AND, NO_ERR_MASK; extern int DELIMITER, FILENAMEONLY, INVERSE, PRINTFILETIME; extern CHAR CurrentFileName[]; extern long CurrentFileTime; extern int I, num_of_matched, prev_num_of_matched, TRUNCATE; extern int CurrentByteOffset; extern int errno; extern CHAR *agrep_inbuffer; extern int agrep_inlen; extern int agrep_initialfd; extern int EXITONERROR; extern int agrep_inpointer; extern FILE *agrep_finalfp; extern CHAR *agrep_outbuffer; extern int agrep_outlen; extern int agrep_outpointer; extern int NEW_FILE, POST_FILTER; extern int LIMITOUTPUT, LIMITPERFILE; int asearch(old_D_pat, text, D) CHAR old_D_pat[]; int text; register unsigned D; { register unsigned i, c, r1, r2, CMask, r_NO_ERR, r_Init1; register unsigned A0, B0, A1, B1, endpos; unsigned A2, B2, A3, B3, A4, B4; unsigned A[MaxError+1], B[MaxError+1]; unsigned D_Mask; int end; int D_length, FIRSTROUND, ResidueSize, lasti, l, k, j=0; int printout_end; CHAR *buffer; /* CHAR *tempbuf = NULL; */ /* used only when text == -1 */ if (I == 0) Init1 = (unsigned)037777777777; if(D > 4) { return asearch0(old_D_pat, text, D); } D_length = strlen(old_D_pat); D_Mask = D_endpos; for ( i=1; i 0) { i = Max_record; end = Max_record + l ; if (FIRSTROUND) { i = Max_record - 1; if(DELIMITER) { for(k=0; k=D_length) j--; } FIRSTROUND = OFF; } if (l < BlockSize) { /* copy pattern and '\0' at end of buffer */ strncpy(buffer+end, old_D_pat, D_length); buffer[end+D_length] = '\0'; end = end + D_length; } /* ASEARCH_PROCESS: the while-loop below */ while (i < end ) { c = buffer[i]; CMask = Mask[c]; r1 = r_Init1 & B0; A0 = ((B0 >>1 ) & CMask) | r1; r1 = r_Init1 & B1; r2 = B0 | (((A0 | B0) >> 1) & r_NO_ERR); A1 = ((B1 >>1 ) & CMask) | r2 | r1 ; if(D == 1) goto Nextcharfile; r1 = r_Init1 & B2; r2 = B1 | (((A1 | B1) >> 1) & r_NO_ERR); A2 = ((B2 >>1 ) & CMask) | r2 | r1 ; if(D == 2) goto Nextcharfile; r1 = r_Init1 & B3; r2 = B2 | (((A2 | B2) >> 1) & r_NO_ERR); A3 = ((B3 >>1 ) & CMask) | r2 | r1 ; if(D == 3) goto Nextcharfile; r1 = r_Init1 & B4; r2 = B3 | (((A3 | B3) >> 1) & r_NO_ERR); A4 = ((B4 >>1 ) & CMask) | r2 | r1 ; if(D == 4) goto Nextcharfile; Nextcharfile: i=i+1; CurrentByteOffset ++; if(A0 & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = A0; if ( D == 1) r1 = A1; if ( D == 2) r1 = A2; if ( D == 3) r1 = A3; if ( D == 4) r1 = A4; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1 ; if ((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length; /* point to starting position of D_pat */ TRUNCATE = OFF; for(k=0; k<= D; k++) { B[k] = Init[0]; } r1 = B[0] & Init1; A[0] = (((B[0]>>1) & CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = B[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); A[k] = (((B[k]>>1)&CMask) | r1 | r2) ; } A0 = A[0]; B0 = B[0]; A1 = A[1]; B1 = B[1]; A2 = A[2]; B2 = B[2]; A3 = A[3]; B3 = B[3]; A4 = A[4]; B4 = B[4]; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i]; CMask = Mask[c]; r1 = r_Init1 & A0; B0 = ((A0 >> 1 ) & CMask) | r1; /* printf("Mask = %o, B0 = %on", CMask, B0); */ r1 = r_Init1 & A1; r2 = A0 | (((A0 | B0) >> 1) & r_NO_ERR); B1 = ((A1 >>1 ) & CMask) | r2 | r1 ; if(D == 1) goto Nextchar1file; r1 = r_Init1 & A2; r2 = A1 | (((A1 | B1) >> 1) & r_NO_ERR); B2 = ((A2 >>1 ) & CMask) | r2 | r1 ; if(D == 2) goto Nextchar1file; r1 = r_Init1 & A3; r2 = A2 | (((A2 | B2) >> 1) & r_NO_ERR); B3 = ((A3 >>1 ) & CMask) | r2 | r1 ; if(D == 3) goto Nextchar1file; r1 = r_Init1 & A4; r2 = A3 | (((A3 | B3) >> 1) & r_NO_ERR); B4 = ((A4 >>1 ) & CMask) | r2 | r1 ; if(D == 4) goto Nextchar1file; Nextchar1file: i=i+1; CurrentByteOffset ++; if(B0 & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = B0; if ( D == 1) r1 = B1; if ( D == 2) r1 = B2; if ( D == 3) r1 = B3; if ( D == 4) r1 = B4; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; free_buf(text, buffer); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1 ; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; for(k=0; k<= D; k++) { A[k] = Init[0]; } r1 = A[0] & Init1; B[0] = (((A[0]>>1)&CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & A[k]; r2 = A[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); B[k] = (((A[k]>>1)&CMask) | r1 | r2) ; } A0 = A[0]; B0 = B[0]; A1 = A[1]; B1 = B[1]; A2 = A[2]; B2 = B[2]; A3 = A[3]; B3 = B[3]; A4 = A[4]; B4 = B[4]; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } if(l < BlockSize) { lasti = Max_record ; } else { ResidueSize = Max_record + l - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti == 0) lasti = 1; } } free_buf(text, buffer); return 0; #if AGREP_POINTER } else { lasti = 1; /* if (DELIMITER) tempbuf = (CHAR*)malloc(D_length + 1); */ buffer = (CHAR *)agrep_inbuffer; l = agrep_inlen; end = l; /* buffer[end-1] = '\n'; */ /* at end of the text. */ /* buffer[0] = '\n'; */ /* in front of the text. */ i = 0; if(DELIMITER) { for(k=0; k=D_length) j--; /* memcpy(tempbuf, buffer+end, D_length+1); strncpy(buffer+end, old_D_pat, D_length); buffer[end+D_length] = '\0'; end = end + D_length; */ } /* An exact copy of the above ASEARCH_PROCESS: the while-loop below */ while (i < end ) { c = buffer[i]; CMask = Mask[c]; r1 = r_Init1 & B0; A0 = ((B0 >>1 ) & CMask) | r1; r1 = r_Init1 & B1; r2 = B0 | (((A0 | B0) >> 1) & r_NO_ERR); A1 = ((B1 >>1 ) & CMask) | r2 | r1 ; if(D == 1) goto Nextcharmem; r1 = r_Init1 & B2; r2 = B1 | (((A1 | B1) >> 1) & r_NO_ERR); A2 = ((B2 >>1 ) & CMask) | r2 | r1 ; if(D == 2) goto Nextcharmem; r1 = r_Init1 & B3; r2 = B2 | (((A2 | B2) >> 1) & r_NO_ERR); A3 = ((B3 >>1 ) & CMask) | r2 | r1 ; if(D == 3) goto Nextcharmem; r1 = r_Init1 & B4; r2 = B3 | (((A3 | B3) >> 1) & r_NO_ERR); A4 = ((B4 >>1 ) & CMask) | r2 | r1 ; if(D == 4) goto Nextcharmem; Nextcharmem: i=i+1; CurrentByteOffset ++; if(A0 & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = A0; if ( D == 1) r1 = A1; if ( D == 2) r1 = A2; if ( D == 3) r1 = A3; if ( D == 4) r1 = A4; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1 ; if ((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length; /* point to starting position of D_pat */ TRUNCATE = OFF; for(k=0; k<= D; k++) { B[k] = Init[0]; } r1 = B[0] & Init1; A[0] = (((B[0]>>1) & CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = B[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); A[k] = (((B[k]>>1)&CMask) | r1 | r2) ; } A0 = A[0]; B0 = B[0]; A1 = A[1]; B1 = B[1]; A2 = A[2]; B2 = B[2]; A3 = A[3]; B3 = B[3]; A4 = A[4]; B4 = B[4]; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i]; CMask = Mask[c]; r1 = r_Init1 & A0; B0 = ((A0 >> 1 ) & CMask) | r1; /* printf("Mask = %o, B0 = %on", CMask, B0); */ r1 = r_Init1 & A1; r2 = A0 | (((A0 | B0) >> 1) & r_NO_ERR); B1 = ((A1 >>1 ) & CMask) | r2 | r1 ; if(D == 1) goto Nextchar1mem; r1 = r_Init1 & A2; r2 = A1 | (((A1 | B1) >> 1) & r_NO_ERR); B2 = ((A2 >>1 ) & CMask) | r2 | r1 ; if(D == 2) goto Nextchar1mem; r1 = r_Init1 & A3; r2 = A2 | (((A2 | B2) >> 1) & r_NO_ERR); B3 = ((A3 >>1 ) & CMask) | r2 | r1 ; if(D == 3) goto Nextchar1mem; r1 = r_Init1 & A4; r2 = A3 | (((A3 | B3) >> 1) & r_NO_ERR); B4 = ((A4 >>1 ) & CMask) | r2 | r1 ; if(D == 4) goto Nextchar1mem; Nextchar1mem: i=i+1; CurrentByteOffset ++; if(B0 & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = B0; if ( D == 1) r1 = B1; if ( D == 2) r1 = B2; if ( D == 3) r1 = B3; if ( D == 4) r1 = B4; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; free_buf(text, buffer); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1 ; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; for(k=0; k<= D; k++) { A[k] = Init[0]; } r1 = A[0] & Init1; B[0] = (((A[0]>>1)&CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & A[k]; r2 = A[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); B[k] = (((A[k]>>1)&CMask) | r1 | r2) ; } A0 = A[0]; B0 = B[0]; A1 = A[1]; B1 = B[1]; A2 = A[2]; B2 = B[2]; A3 = A[3]; B3 = B[3]; A4 = A[4]; B4 = B[4]; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* if (DELIMITER) { memcpy(buffer+end, tempbuf, D_length+1); free(tempbuf); } */ return 0; } #endif /*AGREP_POINTER*/ } int asearch0(old_D_pat, text, D) CHAR old_D_pat[]; int text; register unsigned D; { register unsigned i, c, r1, r2, CMask, r_NO_ERR, r_Init1, end, endpos; unsigned A[MaxError+2], B[MaxError+2]; unsigned D_Mask; int D_length, FIRSTROUND, ResidueSize, lasti, l, k, j=0; int printout_end; CHAR *buffer; /* CHAR *tempbuf = NULL;*/ /* used only when text == -1 */ D_length = strlen(old_D_pat); D_Mask = D_endpos; for ( i=1; i 0) { i = Max_record; end = Max_record + l ; if (FIRSTROUND) { i = Max_record - 1; FIRSTROUND = OFF; } if (l < BlockSize) { strncpy(buffer+end, old_D_pat, D_length); buffer[end+D_length] = '\0'; end = end + D_length; } /* ASEARCH0_PROCESS: the while-loop below */ while (i < end ) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = B[0] & r_Init1; A[0] = (((B[0] >> 1)) & CMask | r1 ) ; for(k=1; k<=D; k++) { r1 = r_Init1 & B[k]; r2 = B[k-1] | (((A[k-1]|B[k-1])>>1) & r_NO_ERR); A[k] = ((B[k] >> 1) & CMask) | r2 | r1; } if(A[0] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = A[D]; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length; /* point to starting position of D_pat */ for(k=0; k<= D; k++) { B[k] = Init[0]; } r1 = B[0] & r_Init1; A[0] = (((B[0]>>1) & CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = B[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); A[k] = (((B[k]>>1)&CMask) | r1 | r2) ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & A[0]; B[0] = ((A[0] >> 1 ) & CMask) | r1; for(k=1; k<=D; k++) { r1 = r_Init1 & A[k]; r2 = A[k-1] | (((A[k-1]|B[k-1])>>1) & r_NO_ERR); B[k] = ((A[k] >> 1) & CMask) | r2 | r1; } if(B[0] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = B[D]; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length -1 ; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length ; for(k=0; k<= D; k++) { A[k] = Init[0]; } r1 = A[0] & r_Init1; B[0] = (((A[0]>>1)&CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = r_Init1 & A[k]; r2 = A[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); B[k] = (((A[k]>>1)&CMask) | r1 | r2) ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } if(l < BlockSize) { lasti = Max_record; } else { ResidueSize = Max_record + l - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti == 0) lasti = 1; } } free_buf(text, buffer); return 0; #if AGREP_POINTER } else { lasti = 1; /* if (DELIMITER) tempbuf = (CHAR*)malloc(D_length + 1); */ buffer = (CHAR *)agrep_inbuffer; l = agrep_inlen; end = l; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; if(DELIMITER) { for(k=0; k=D_length) j--; /* memcpy(tempbuf, buffer+end, D_length+1); strncpy(buffer+end, old_D_pat, D_length); buffer[end+D_length] = '\0'; end = end + D_length; */ } /* An exact copy of the above ASEARCH0_PROCESS: the while-loop below */ while (i < end ) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = B[0] & r_Init1; A[0] = (((B[0] >> 1)) & CMask | r1 ) ; for(k=1; k<=D; k++) { r1 = r_Init1 & B[k]; r2 = B[k-1] | (((A[k-1]|B[k-1])>>1) & r_NO_ERR); A[k] = ((B[k] >> 1) & CMask) | r2 | r1; } if(A[0] & endpos) { if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; j++; r1 = A[D]; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length - 1; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length; /* point to starting position of D_pat */ for(k=0; k<= D; k++) { B[k] = Init[0]; } r1 = B[0] & r_Init1; A[0] = (((B[0]>>1) & CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = Init1 & B[k]; r2 = B[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); A[k] = (((B[k]>>1)&CMask) | r1 | r2) ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & A[0]; B[0] = ((A[0] >> 1 ) & CMask) | r1; for(k=1; k<=D; k++) { r1 = r_Init1 & A[k]; r2 = A[k-1] | (((A[k-1]|B[k-1])>>1) & r_NO_ERR); B[k] = ((A[k] >> 1) & CMask) | r2 | r1; } if(B[0] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; r1 = B[D]; if(((AND == 1) && ((r1 & endposition) == endposition)) || ((AND == 0) && (r1 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(text, buffer); NEW_FILE = OFF; return 0; } printout_end = i - D_length -1 ; if((text != -1) && !(lasti >= Max_record + l - 1)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } else if ((text == -1) && !(lasti >= l)) { if (-1 == output(buffer, lasti, printout_end, j)) {free_buf(text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(text, buffer); return 0; /* done */ } } lasti = i - D_length ; for(k=0; k<= D; k++) { A[k] = Init[0]; } r1 = A[0] & r_Init1; B[0] = (((A[0]>>1)&CMask) | r1) & D_Mask; for(k=1; k<= D; k++) { r1 = r_Init1 & A[k]; r2 = A[k-1] | (((A[k-1] | B[k-1])>>1)&r_NO_ERR); B[k] = (((A[k]>>1)&CMask) | r1 | r2) ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } /* if (DELIMITER) { memcpy(buffer+end, tempbuf, D_length+1); free(tempbuf); } */ return 0; } #endif /*AGREP_POINTER*/ } agrep-4.17/asearch1.c0100644001123100001460000004100407010116362012545 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" #include extern unsigned Init1, Init[], Mask[], endposition, D_endpos; extern unsigned NO_ERR_MASK; extern int TRUNCATE, DELIMITER, AND, I, S, DD, INVERSE, FILENAMEONLY, PRINTFILETIME ; extern char CurrentFileName[]; extern long CurrentFileTime; extern int num_of_matched, prev_num_of_matched; extern int CurrentByteOffset; extern CHAR *agrep_inbuffer; extern int agrep_inlen; extern FILE *agrep_finalfp; extern CHAR *agrep_outbuffer; extern int agrep_outlen; extern int agrep_outpointer; extern int NEW_FILE, POST_FILTER; extern int LIMITOUTPUT, LIMITPERFILE; int asearch1(old_D_pat, Text, D) char old_D_pat[]; int Text; register unsigned D; { register unsigned end, i, r1, r3, r4, r5, CMask, D_Mask, k, endpos; register unsigned r_NO_ERR; unsigned A[MaxError*2+1], B[MaxError*2+1]; int D_length, ResidueSize, lasti, num_read, FIRSTROUND=1, j=0; CHAR *buffer; /* CHAR *tempbuf = NULL;*/ /* used only when Text == -1 */ if(I == 0) Init1 = (unsigned)037777777777; if(DD > D) DD = D+1; if(I > D) I = D+1; if(S > D) S = D+1; D_length = strlen(old_D_pat); r_NO_ERR = NO_ERR_MASK; D_Mask = D_endpos; for(i=1; i 0) { i=Max_record; end = Max_record + num_read; if(FIRSTROUND) { i = Max_record -1 ; if(DELIMITER) { for(k=0; k=D_length) j--; } FIRSTROUND = 0; } if(num_read < BlockSize) { strncpy(buffer+Max_record+num_read, old_D_pat, D_length); end = end + D_length; buffer[end] = '\0'; } /* ASEARCH1_PROCESS: the while-loop below */ while (i < end) { CMask = Mask[buffer[i++]]; CurrentByteOffset ++; r1 = Init1 & B[D]; A[D] = ((B[D] >> 1) & CMask ) | r1; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = B[k]; r1 = Init1 & r5; A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; } if(A[D] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((A[D*2] & endposition) == endposition)) || ((AND == 0) && (A[D*2] & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (Text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if((Text != -1) && !(lasti >= Max_record + num_read - 1)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } else if ((Text == -1) && !(lasti >= num_read)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; for(k = D; k <= r4 ; k++) A[k] = B[k] = Init[0]; r1 = Init1 & B[D]; A[D] = (((B[D] >> 1) & CMask ) | r1) & D_Mask; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = B[k]; r1 = Init1 & r5; A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } /* end if (A[D]&endpos) */ CMask = Mask[buffer[i++]]; CurrentByteOffset ++; r1 = A[D] & Init1; B[D] = ((A[D] >> 1) & CMask) | r1; for(k = r3; k <= r4; k++) { r1 = A[k] & Init1; B[k] = ((A[k] >> 1) & CMask) | A[k-I] | (((B[k-DD] | A[k-S]) >>1)&r_NO_ERR) | r1 ; } if(B[D] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((B[r4] & endposition) == endposition)) || ((AND == 0) && (B[r4] & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (Text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if((Text != -1) && !(lasti >= Max_record + num_read - 1)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } else if ((Text == -1) && !(lasti >= num_read)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i-D_length; TRUNCATE = OFF; for(k=D; k <= r4; k++) A[k] = B[k] = Init[0]; r1 = Init1 & A[D]; B[D] = (((A[D] >> 1) & CMask ) | r1) & D_Mask; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = A[k]; r1 = Init1 & r5; B[k] = ((r5 >> 1) & CMask) | A[k-I] | (((B[k-DD] | A[k-S]) >>1) & r_NO_ERR) | r1 ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } /* end if (B[D]&endpos) */ } ResidueSize = Max_record + num_read - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti < 0) lasti = 1; if(num_read < BlockSize) lasti = Max_record; } free_buf(Text, buffer); return 0; #if AGREP_POINTER } else { lasti = 1; /* if (DELIMITER) tempbuf = (CHAR*)malloc(D_length + 1); */ buffer = (CHAR *)agrep_inbuffer; num_read = agrep_inlen; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; if(DELIMITER) { for(k=0; k=D_length) j--; /* memcpy(tempbuf, buffer+end, D_length+1); strncpy(buffer+end, old_D_pat, D_length); buffer[end+D_length] = '\0'; end = end + D_length; */ } /* An exact copy of the above ASEARCH1_PROCESS: the while-loop below */ while (i < end) { CMask = Mask[buffer[i++]]; CurrentByteOffset ++; r1 = Init1 & B[D]; A[D] = ((B[D] >> 1) & CMask ) | r1; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = B[k]; r1 = Init1 & r5; A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; } if(A[D] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((A[D*2] & endposition) == endposition)) || ((AND == 0) && (A[D*2] & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (Text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if((Text != -1) && !(lasti >= Max_record + num_read - 1)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } else if ((Text == -1) && !(lasti >= num_read)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; for(k = D; k <= r4 ; k++) A[k] = B[k] = Init[0]; r1 = Init1 & B[D]; A[D] = (((B[D] >> 1) & CMask ) | r1) & D_Mask; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = B[k]; r1 = Init1 & r5; A[k] = ((r5 >> 1) & CMask) | B[k-I] | (((A[k-DD] | B[k-S]) >>1) & r_NO_ERR) | r1 ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } /* end if (A[D]&endpos) */ CMask = Mask[buffer[i++]]; CurrentByteOffset ++; r1 = A[D] & Init1; B[D] = ((A[D] >> 1) & CMask) | r1; for(k = r3; k <= r4; k++) { r1 = A[k] & Init1; B[k] = ((A[k] >> 1) & CMask) | A[k-I] | (((B[k-DD] | A[k-S]) >>1)&r_NO_ERR) | r1 ; } if(B[D] & endpos) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((B[r4] & endposition) == endposition)) || ((AND == 0) && (B[r4] & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(Text, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } /* if (Text == -1) { memcpy(buffer+end-D_length, tempbuf, D_length+1); } */ free_buf(Text, buffer); NEW_FILE = OFF; return 0; } if((Text != -1) && !(lasti >= Max_record + num_read - 1)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } else if ((Text == -1) && !(lasti >= num_read)) { if (-1 == output(buffer, lasti, i-D_length-1, j)) {free_buf(Text, buffer); return -1;} } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(Text, buffer); return 0; /* done */ } } lasti = i-D_length; TRUNCATE = OFF; for(k=D; k <= r4; k++) A[k] = B[k] = Init[0]; r1 = Init1 & A[D]; B[D] = (((A[D] >> 1) & CMask ) | r1) & D_Mask; for(k = r3; k <= r4; k++) /* r3 = D+1, r4 = 2*D */ { r5 = A[k]; r1 = Init1 & r5; B[k] = ((r5 >> 1) & CMask) | A[k-I] | (((B[k-DD] | A[k-S]) >>1) & r_NO_ERR) | r1 ; } if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } /* end if (B[D]&endpos) */ } /* if (DELIMITER) { memcpy(buffer+end, tempbuf, D_length+1); free(tempbuf); } */ return 0; } #endif /*AGREP_POINTER*/ } agrep-4.17/asplit.c0100644001123100001460000002624507010116362012364 0ustar friurz/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ #include "agrep.h" #include "putils.c" extern int checksg(); extern int D; extern FILE *debug; /* All borrowed from agrep.c and are needed for searching the index */ extern ParseTree aterminals[MAXNUM_PAT]; extern int AComplexBoolean; /* returns where it found the distinguishing token: until that from prev value of begin is the current pattern (not just the "words" in it) */ CHAR * aparse_flat(begin, end, prev, next) CHAR *begin; CHAR *end; int prev; int *next; { if (begin > end) { *next = prev; return end; } if (prev & ENDSUB_EXP) prev &= ~ATTR_EXP; if ((prev & ATTR_EXP) && !(prev & VAL_EXP)) prev |= VAL_EXP; while (begin <= end) { if (*begin == ',') { prev |= OR_EXP; prev |= VAL_EXP; prev |= ENDSUB_EXP; if (prev & AND_EXP) { fprintf(stderr, "asplit.c: parse error at character '%c'\n", *begin); return NULL; } *next = prev; return begin; } else if (*begin == ';') { prev |= AND_EXP; prev |= VAL_EXP; prev |= ENDSUB_EXP; if (prev & OR_EXP) { fprintf(stderr, "asplit.c: parse error at character '%c'\n", *begin); return NULL; } *next = prev; return begin; } else if (*begin == '\\') begin ++; /* skip two things */ begin++; } *next = prev; return begin; } int asplit_pattern_flat(APattern, AM, terminals, pnum_terminals, pAParse) CHAR *APattern; int AM; ParseTree terminals[MAXNUM_PAT]; int *pnum_terminals; int *pAParse; { CHAR *buffer; CHAR *buffer_pat; CHAR *buffer_end; buffer = APattern; buffer_end = buffer + AM; *pAParse = 0; /* * buffer is the runnning pointer, buffer_pat is the place where * the distinguishing delimiter was found, buffer_end is the end. */ while (buffer_pat = aparse_flat(buffer, buffer_end, *pAParse, pAParse)) { /* there is no pattern until after the distinguishing delimiter position: some agrep garbage */ if (buffer_pat <= buffer) { buffer = buffer_pat+1; if (buffer_pat >= buffer_end) break; continue; } if (*pnum_terminals >= MAXNUM_PAT) { fprintf(stderr, "boolean expression has too many terms\n"); return -1; } terminals[*pnum_terminals].op = 0; terminals[*pnum_terminals].type = LEAF; terminals[*pnum_terminals].terminalindex = *pnum_terminals; terminals[*pnum_terminals].data.leaf.attribute = 0; /* default is no structure */ terminals[*pnum_terminals].data.leaf.value = (CHAR *)malloc(buffer_pat - buffer + 2); memcpy(terminals[*pnum_terminals].data.leaf.value, buffer, buffer_pat - buffer); /* without distinguishing delimiter */ terminals[*pnum_terminals].data.leaf.value[buffer_pat - buffer] = '\0'; (*pnum_terminals)++; if (buffer_pat >= buffer_end) break; buffer = buffer_pat+1; } if (buffer_pat == NULL) return -1; /* got out of while loop because of NULL rather than break */ return(*pnum_terminals); } /* * Recursive descent; C-style => AND + OR have equal priority => must bracketize expressions appropriately or will go left->right. * Grammar: * E = {E} | ~a | ~{E} | E ; E | E , E | a * Parser: * One look ahead at each literal will tell you what to do. * ~ has highest priority, ; and , have equal priority (left to right associativity), ~~ is not allowed. */ ParseTree * aparse_tree(buffer, len, bufptr, terminals, pnum_terminals) CHAR *buffer; int len; int *bufptr; ParseTree terminals[]; int *pnum_terminals; { int token, tokenlen; CHAR tokenbuf[MAXNAME]; int oldtokenlen; CHAR oldtokenbuf[MAXNAME]; ParseTree *t, *n, *leftn; token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen); switch(token) { case '{': /* (exp) */ if ((t = aparse_tree(buffer, len, bufptr, terminals, pnum_terminals)) == NULL) return NULL; if ((token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen)) != '}') { fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); destroy_tree(t); return (NULL); } if ((token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen)) == 'e') return t; switch(token) { /* must find boolean infix operator */ case ',': case ';': leftn = t; if ((t = aparse_tree(buffer, len, bufptr, terminals, pnum_terminals)) == NULL) return NULL; n = (ParseTree *)malloc(sizeof(ParseTree)); n->op = (token == ';') ? ANDPAT : ORPAT ; n->type = INTERNAL; n->data.internal.left = leftn; n->data.internal.right = t; return n; /* or end of parent sub expression */ case '}': unget_token_bool(bufptr, tokenlen); /* part of someone else who called me */ return t; default: destroy_tree(t); fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); return NULL; } /* Go one level deeper */ case '~': /* not exp */ if ((token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen)) == 'e') return NULL; switch(token) { case 'a': if (*pnum_terminals >= MAXNUM_PAT) { fprintf(stderr, "Pattern expression too large (> %d)\n", MAXNUM_PAT); return NULL; } n = &terminals[*pnum_terminals]; n->op = 0; n->type = LEAF; n->terminalindex = (*pnum_terminals); n->data.leaf.attribute = 0; n->data.leaf.value = (unsigned char*)malloc(tokenlen + 2); memcpy(n->data.leaf.value, tokenbuf, tokenlen); n->data.leaf.value[tokenlen] = '\0'; (*pnum_terminals)++; n->op |= NOTPAT; t = n; break; case '{': if ((t = aparse_tree(buffer, len, bufptr, terminals, pnum_terminals)) == NULL) return NULL; if (t->op & NOTPAT) t->op &= ~NOTPAT; else t->op |= NOTPAT; if ((token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen)) != '}') { fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); destroy_tree(t); return NULL; } break; default: fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); return NULL; } /* The resulting tree is in t. Now do another lookahead at this level */ if ((token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen)) == 'e') return t; switch(token) { /* must find boolean infix operator */ case ',': case ';': leftn = t; if ((t = aparse_tree(buffer, len, bufptr, terminals, pnum_terminals)) == NULL) return NULL; n = (ParseTree *)malloc(sizeof(ParseTree)); n->op = (token == ';') ? ANDPAT : ORPAT ; n->type = INTERNAL; n->data.internal.left = leftn; n->data.internal.right = t; return n; case '}': unget_token_bool(bufptr, tokenlen); return t; default: destroy_tree(t); fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); return NULL; } case 'a': /* individual term (attr=val) */ if (tokenlen == 0) return NULL; memcpy(oldtokenbuf, tokenbuf, tokenlen); oldtokenlen = tokenlen; oldtokenbuf[oldtokenlen] = '\0'; token = get_token_bool(buffer, len, bufptr, tokenbuf, &tokenlen); switch(token) { case '}': /* part of case '{' above: else syntax error not detected but semantics ok */ unget_token_bool(bufptr, tokenlen); case 'e': /* endof input */ case ',': case ';': if (*pnum_terminals >= MAXNUM_PAT) { fprintf(stderr, "Pattern expression too large (> %d)\n", MAXNUM_PAT); return NULL; } n = &terminals[*pnum_terminals]; n->op = 0; n->type = LEAF; n->terminalindex = (*pnum_terminals); n->data.leaf.attribute = 0; n->data.leaf.value = (unsigned char*)malloc(oldtokenlen + 2); strcpy(n->data.leaf.value, oldtokenbuf); (*pnum_terminals)++; if ((token == 'e') || (token == '}')) return n; /* nothing after terminal in expression */ leftn = n; if ((t = aparse_tree(buffer, len, bufptr, terminals, pnum_terminals)) == NULL) return NULL; n = (ParseTree *)malloc(sizeof(ParseTree)); n->op = (token == ';') ? ANDPAT : ORPAT ; n->type = INTERNAL; n->data.internal.left = leftn; n->data.internal.right = t; return n; default: fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); return NULL; } case 'e': /* can't happen as I always do a lookahead above and return current tree if e */ default: fprintf(stderr, "asplit.c: parse error at offset %d\n", *bufptr); return NULL; } } int asplit_pattern(APattern, AM, terminals, pnum_terminals, pAParse) CHAR *APattern; int AM; ParseTree terminals[]; int *pnum_terminals; ParseTree **pAParse; { int bufptr = 0, ret, i, j; if (is_complex_boolean(APattern, AM)) { AComplexBoolean = 1; *pnum_terminals = 0; if ((*pAParse = aparse_tree(APattern, AM, &bufptr, terminals, pnum_terminals)) == NULL) return -1; /* print_tree(*pAParse, 0); */ return *pnum_terminals; } else { for (i=0; itype == LEAF) return ((tree->op & NOTPAT) ? (!matched_terminals[tree->terminalindex]) : (matched_terminals[tree->terminalindex])); else if (tree->type == INTERNAL) { if ((tree->op & OPMASK) == ANDPAT) { /* sequential evaluation */ if ((res = eval_tree(tree->data.internal.left, matched_terminals)) != 0) res = eval_tree(tree->data.internal.right, matched_terminals); return (tree->op & NOTPAT) ? !res : res; } else { /* sequential evaluation */ if ((res = eval_tree(tree->data.internal.left, matched_terminals)) == 0) res = eval_tree(tree->data.internal.right, matched_terminals); return (tree->op & NOTPAT) ? !res : res; } } else { fprintf(stderr, "Eval on bad tree: returning false\n"); return 0; /* safety sake, but cannot happen! */ } } /* [first, last) = C-style range for which we want the words in terminal-values' patterns: 0..num_terminals for !ComplexBoolean, term/term otherwise */ int asplit_terminal(first, last, pat_buf, pat_ptr) int first, last; char *pat_buf; int *pat_ptr; { int word_length; int type; int num_pat; *pat_ptr = 0; num_pat = 0; for (; first= MAXNUM_PAT) { fprintf(stderr, "Warning: too many words in pattern (> %d): ignoring...\n", MAXNUM_PAT); break; } } return num_pat; } agrep-4.17/bitap.c0100644001123100001460000004367007571015510012175 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* if the pattern is not simple fixed pattern, then after preprocessing */ /* and generating the masks, the program goes here. four cases: 1. */ /* the pattern is simple regular expression and no error, then do the */ /* matching here. 2. the pattern is simple regular expression and */ /* unit cost errors are allowed: then go to asearch(). */ /* 3. the pattern is simple regular expression, and the edit cost is */ /* not uniform, then go to asearch1(). */ /* if the pattern is regular expression then go to re() if M < 14, */ /* else go to re1() */ /* input parameters: old_D_pat: delimiter pattern. */ /* fd, input file descriptor, M: size of pattern, D: # of errors. */ #include #include "agrep.h" #include "memory.h" #include extern int CurrentByteOffset; extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[]; extern int LIMITOUTPUT, LIMITPERFILE; extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE, PRINTFILETIME; extern char D_pattern[]; extern int TRUNCATE, DD, S; extern char Progname[], CurrentFileName[]; extern long CurrentFileTime; extern int num_of_matched, prev_num_of_matched; extern int agrep_initialfd; extern int EXITONERROR; extern int agrep_inlen; extern CHAR *agrep_inbuffer; extern int agrep_inpointer; extern CHAR *agrep_outbuffer; extern int agrep_outlen; extern int agrep_outpointer; extern FILE *agrep_finalfp; extern int errno; extern int NEW_FILE, POST_FILTER; /* bitap dispatches job */ int bitap(old_D_pat, Pattern, fd, M, D) char old_D_pat[], *Pattern; int fd, M, D; { unsigned char c; /* Patch to fix -n with ISO characters, "O.Bartunov" , S.Nazin (leng@sai.msu.su) */ register unsigned r1, r2, r3, CMask, i; register unsigned end, endpos, r_Init1; register unsigned D_Mask; int ResidueSize , FIRSTROUND, lasti, print_end, j, num_read; int k; CHAR *buffer; int NumBufferFills; D_length = strlen(old_D_pat); for(i=0; i 4) { fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if (M <= SHORTREG) { return re(fd, M, D); /* SUN: need to find a even point */ } else { return re1(fd, M, D); } } if (D > 0 && JUMP == ON) { return asearch1(old_D_pat, fd, D); } if (D > 0) { return asearch(old_D_pat, fd, D); } if(I == 0) Init1 = (unsigned)037777777777; j=0; r_Init1 = Init1; r1 = r2 = r3 = Init[0]; endpos = D_endpos; D_Mask = D_endpos; for(i=1 ; i 0) { NumBufferFills++; i=Max_record; end = Max_record + num_read; if(FIRSTROUND) { i = Max_record - 1 ; if(DELIMITER) { for(k=0; k=D_length) j--; } FIRSTROUND = OFF; } if(num_read < BlockSize) { strncpy(buffer+Max_record+num_read, old_D_pat, D_length); end = end + D_length; buffer[end] = '\0'; } /* BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j - (NumBufferFills - 1))) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j - (NumBufferFills - 1))) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } ResidueSize = num_read + Max_record - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti < 0) { lasti = 1; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } free_buf(fd, buffer); return 0; #if AGREP_POINTER } else { buffer = agrep_inbuffer; num_read = agrep_inlen; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; lasti = 1; if(DELIMITER) { for(k=0; k=D_length) j--; } /* An exact copy of the above: BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } return 0; } #endif /*AGREP_POINTER*/ } fill_buf(fd, buf, record_size) int fd, record_size; unsigned char *buf; { int num_read=1; int total_read=0; extern int glimpse_clientdied; static int havePending = 0; static int pendingChar = 0; if (fd >= 0) { /* Decrement record size so we have room for an appended * newline, if we might need one. */ if (0 == DELIMITER) { --record_size; } if (havePending) { havePending = 0; buf [total_read++] = pendingChar; } while(total_read < record_size && num_read > 0) { if (glimpse_clientdied) return 0; num_read = read(fd, buf+total_read, record_size - total_read); total_read = total_read + num_read; } if (0 < num_read) { /* We're stopping because the buffer is full. Save * the last char for the next time through. This * guarantees, if we just read the last char, that * on the next call we'll know that we still need to * append a delimiter, even though we didn't "read" * anything. */ havePending = 1; pendingChar = buf [--total_read]; } else { /* Stopping because we read the last char. This * resets state for the next call. */ havePending = 0; } if ((0 == num_read) && /* Reached end-of-file */ (0 < total_read) && /* Got something, maybe from pending */ (0 == DELIMITER) && /* Not expecting special delimiter */ ('\n' != buf [total_read-1])) { /* Default delimiter not present */ /* Add the default delimiter, so the last line of the * file (terminated with EOF instead of newline) isn't * quietly dropped. */ buf [total_read] = '\n'; ++total_read; } } #if AGREP_POINTER else return 0; /* should not call this function if buffer is a pointer to a user-specified region! */ #else /*AGREP_POINTER*/ else { /* simulate a file */ total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_inlen - agrep_inpointer) : record_size; memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read); agrep_inpointer += total_read; /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/ } #endif /*AGREP_POINTER*/ if (glimpse_clientdied) return 0; return(total_read); } /* * In these functions no allocs/copying is done when * fd == -1, i.e., agrep is called to search within memory. */ void alloc_buf(fd, buf, size) int fd; char **buf; int size; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ *buf = (char *)malloc(size); } void free_buf(fd, buf) int fd; char *buf; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ free(buf); } agrep-4.17/bitap.c.orig0100644001123100001460000004105607010116362013123 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* if the pattern is not simple fixed pattern, then after preprocessing */ /* and generating the masks, the program goes here. four cases: 1. */ /* the pattern is simple regular expression and no error, then do the */ /* matching here. 2. the pattern is simple regular expression and */ /* unit cost errors are allowed: then go to asearch(). */ /* 3. the pattern is simple regular expression, and the edit cost is */ /* not uniform, then go to asearch1(). */ /* if the pattern is regular expression then go to re() if M < 14, */ /* else go to re1() */ /* input parameters: old_D_pat: delimiter pattern. */ /* fd, input file descriptor, M: size of pattern, D: # of errors. */ #include "agrep.h" #include "memory.h" #include extern int CurrentByteOffset; extern unsigned Init1, D_endpos, endposition, Init[], Mask[], Bit[]; extern int LIMITOUTPUT, LIMITPERFILE; extern int DELIMITER, FILENAMEONLY, D_length, I, AND, REGEX, JUMP, INVERSE, PRINTFILETIME; extern char D_pattern[]; extern int TRUNCATE, DD, S; extern char Progname[], CurrentFileName[]; extern long CurrentFileTime; extern int num_of_matched, prev_num_of_matched; extern int agrep_initialfd; extern int EXITONERROR; extern int agrep_inlen; extern CHAR *agrep_inbuffer; extern int agrep_inpointer; extern CHAR *agrep_outbuffer; extern int agrep_outlen; extern int agrep_outpointer; extern FILE *agrep_finalfp; extern int errno; extern int NEW_FILE, POST_FILTER; /* bitap dispatches job */ int bitap(old_D_pat, Pattern, fd, M, D) char old_D_pat[], *Pattern; int fd, M, D; { unsigned char c; /* Patch to fix -n with ISO characters, "O.Bartunov" , S.Nazin (leng@sai.msu.su) */ register unsigned r1, r2, r3, CMask, i; register unsigned end, endpos, r_Init1; register unsigned D_Mask; int ResidueSize , FIRSTROUND, lasti, print_end, j, num_read; int k; CHAR *buffer; D_length = strlen(old_D_pat); for(i=0; i 4) { fprintf(stderr, "%s: the maximum number of erorrs allowed for full regular expressions is 4\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if (M <= SHORTREG) { return re(fd, M, D); /* SUN: need to find a even point */ } else { return re1(fd, M, D); } } if (D > 0 && JUMP == ON) { return asearch1(old_D_pat, fd, D); } if (D > 0) { return asearch(old_D_pat, fd, D); } if(I == 0) Init1 = (unsigned)037777777777; j=0; r_Init1 = Init1; r1 = r2 = r3 = Init[0]; endpos = D_endpos; D_Mask = D_endpos; for(i=1 ; i 0) { i=Max_record; end = Max_record + num_read; if(FIRSTROUND) { i = Max_record - 1 ; if(DELIMITER) { for(k=0; k=D_length) j--; } FIRSTROUND = OFF; } if(num_read < BlockSize) { strncpy(buffer+Max_record+num_read, old_D_pat, D_length); end = end + D_length; buffer[end] = '\0'; } /* BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } ResidueSize = num_read + Max_record - lasti; if(ResidueSize > Max_record) { ResidueSize = Max_record; TRUNCATE = ON; } strncpy(buffer+Max_record-ResidueSize, buffer+lasti, ResidueSize); lasti = Max_record - ResidueSize; if(lasti < 0) { lasti = 1; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } free_buf(fd, buffer); return 0; #if AGREP_POINTER } else { buffer = agrep_inbuffer; num_read = agrep_inlen; end = num_read; /* buffer[end-1] = '\n';*/ /* at end of the text. */ /* buffer[0] = '\n';*/ /* in front of the text. */ i = 0; lasti = 1; if(DELIMITER) { for(k=0; k=D_length) j--; } /* An exact copy of the above: BITAP_PROCESS: the while-loop below */ while (i < end) { c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r3; r2 = (( r3 >> 1 ) & CMask) | r1; if ( r2 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r2 & endposition) == endposition)) || ((AND == 0) && (r2 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r3; r2 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } c = buffer[i++]; CurrentByteOffset ++; CMask = Mask[c]; r1 = r_Init1 & r2; r3 = (( r2 >> 1 ) & CMask) | r1; if ( r3 & endpos ) { j++; if (DELIMITER) CurrentByteOffset -= D_length; else CurrentByteOffset -= 1; if(((AND == 1) && ((r3 & endposition) == endposition)) || ((AND == 0) && (r3 & endposition)) ^ INVERSE ) { if(FILENAMEONLY && (NEW_FILE || !POST_FILTER)) { num_of_matched++; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, buffer); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, buffer); NEW_FILE = OFF; return 0; } print_end = i - D_length - 1; if ( ((fd != -1) && !(lasti >= Max_record+num_read - 1)) || ((fd == -1) && !(lasti >= num_read)) ) if (-1 == output(buffer, lasti, print_end, j)) { free_buf(fd, buffer); return -1;} if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, buffer); return 0; /* done */ } } lasti = i - D_length ; TRUNCATE = OFF; r2 = r3 = r1 = Init[0]; r1 = r_Init1 & r2; r3 = ((( r2 >> 1) & CMask) | r1 ) & D_Mask; if (DELIMITER) CurrentByteOffset += 1*D_length; else CurrentByteOffset += 1*1; } } return 0; } #endif /*AGREP_POINTER*/ } fill_buf(fd, buf, record_size) int fd, record_size; unsigned char *buf; { int num_read=1; int total_read=0; extern int glimpse_clientdied; if (fd >= 0) { while(total_read < record_size && num_read > 0) { if (glimpse_clientdied) return 0; num_read = read(fd, buf+total_read, record_size - total_read); total_read = total_read + num_read; } } #if AGREP_POINTER else return 0; /* should not call this function if buffer is a pointer to a user-specified region! */ #else /*AGREP_POINTER*/ else { /* simulate a file */ total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size; memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read); agrep_inpointer += total_read; /* printf("agrep_inpointer %d total_read %d\n", agrep_inpointer, total_read);*/ } #endif /*AGREP_POINTER*/ if (glimpse_clientdied) return 0; return(total_read); } /* * In these functions no allocs/copying is done when * fd == -1, i.e., agrep is called to search within memory. */ void alloc_buf(fd, buf, size) int fd; char **buf; int size; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ *buf = (char *)malloc(size); } void free_buf(fd, buf) int fd; char *buf; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ free(buf); } agrep-4.17/checkfile.c0100644001123100001460000000451007010116362012774 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* * checkfile.c * takes a file name and checks to see if a file is a regular ascii file * */ #include #include #include #include #include #include #include "checkfile.h" #ifndef S_ISREG #define S_ISREG(mode) (0100000&(mode)) #endif #ifndef S_ISDIR #define S_ISDIR(mode) (0040000&(mode)) #endif #define MAXLINE 512 extern char Progname[]; extern int errno; unsigned char ibuf[MAXLINE]; /************************************************************************** * * check_file * input: filename or path (null-terminated character string) * returns: int (0 if file is a regular file, non-0 if not) * * uses stat(2) to see if a file is a regular file. * ***************************************************************************/ int check_file(fname) char *fname; { struct stat buf; if (my_stat(fname, &buf) != 0) { if (errno == ENOENT) return NOSUCHFILE; else return STATFAILED; } else { /* int ftype; if (S_ISREG(buf.st_mode)) { if ((ftype = samplefile(fname)) == ISASCIIFILE) { return ISASCIIFILE; } else if (ftype == ISBINARYFILE) { return ISBINARYFILE; } else if (ftype == OPENFAILED) { return OPENFAILED; } } if (S_ISDIR(buf.st_mode)) { return ISDIRECTORY; } if (S_ISBLK(buf.st_mode)) { return ISBLOCKFILE; } if (S_ISSOCK(buf.st_mode)) { return ISSOCKET; } */ return 0; } } /*************************************************************************** * * samplefile * reads in the first part of a file, and checks to see that it is * all ascii. * ***************************************************************************/ /* int samplefile(fname) char *fname; { char *p; int numread; int fd; if ((fd = open(fname, O_RDONLY)) == -1) { fprintf(stderr, "open failed on filename %s\n", fname); return OPENFAILED; } -comment- No need to use alloc_buf and free_buf here since always read from non-ve fd -tnemmoc- if (numread = fill_buf(fd, ibuf, MAXLINE)) { close(fd); p = ibuf; while (ISASCII(*p++) && --numread); if (!numread) { return(ISASCIIFILE); } else { return(ISBINARYFILE); } } else { close(fd); return(ISASCIIFILE); } } */ agrep-4.17/checkfile.h0100644001123100001460000000037607010116362013007 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #define NOSUCHFILE -3 #define OPENFAILED -2 #define STATFAILED -1 #define ISASCIIFILE 0 #define ISDIRECTORY 1 #define ISBLOCKFILE 2 #define ISSOCKET 3 #define ISBINARYFILE 4 agrep-4.17/checksg.c0100644001123100001460000000771307545142260012507 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" #include "checkfile.h" #include extern int errno; extern CHAR Progname[MAXNAME]; extern int SGREP, PAT_FILE, PAT_BUFFER, EXITONERROR, SIMPLEPATTERN, CONSTANT, D, NOUPPER, JUMP, I, LINENUM, INVERSE, WORDBOUND, WHOLELINE, SILENT, DNA, BESTMATCH, DELIMITER; /* Make it an interface routine that tells you whether mgrep can be used for the pattern or not: must sneak and access global variable D though... */ int checksg(Pattern, D, set) CHAR *Pattern; int D; int set; /* should I set flags SGREP and DNA? not if called from glimpse via library */ { char c; int i, m; int NOTSGREP = 0; if (set) SGREP = OFF; m = strlen(Pattern); #if DEBUG fprintf(stderr, "checksg: len=%d, pat=%s, pat[len]=%d\n", m, Pattern, Pattern[m]); #endif if(!(PAT_FILE || PAT_BUFFER) && (m <= D)) { fprintf(stderr, "%s: size of pattern '%s' must be > #of errors %d\n", Progname, Pattern, D); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } SIMPLEPATTERN = ON; for (i=0; i < m; i++) { switch(Pattern[i]) { case ';' : SIMPLEPATTERN = OFF; goto outoffor; case ',' : SIMPLEPATTERN = OFF; goto outoffor; case '.' : SIMPLEPATTERN = OFF; goto outoffor; case '*' : SIMPLEPATTERN = OFF; goto outoffor; case '-' : SIMPLEPATTERN = OFF; goto outoffor; case '[' : SIMPLEPATTERN = OFF; goto outoffor; case ']' : SIMPLEPATTERN = OFF; goto outoffor; case '(' : SIMPLEPATTERN = OFF; goto outoffor; case ')' : SIMPLEPATTERN = OFF; goto outoffor; case '<' : SIMPLEPATTERN = OFF; goto outoffor; case '>' : SIMPLEPATTERN = OFF; goto outoffor; case '^' : /* NOTSGREP = 1; sgrep does it; bg 4/27/97 */ if(D > 0) { SIMPLEPATTERN = OFF; goto outoffor; } break; case '$' : /* NOTSGREP = 1; sgrep does it; bg 4/27/97 */ if(D > 0) { SIMPLEPATTERN = OFF; goto outoffor; } break; case '|' : SIMPLEPATTERN = OFF; goto outoffor; case '#' : SIMPLEPATTERN = OFF; goto outoffor; case '{': SIMPLEPATTERN = OFF; goto outoffor; case '}': SIMPLEPATTERN = OFF; goto outoffor; case '~': SIMPLEPATTERN = OFF; goto outoffor; case '\\' : { /* Should I DO the left shift Pattern including Pattern[m] which is '\0', or just ignore the next character after '\\'????? */ if (set) { /* preprocess and maskgen figure out what to do */ i++; /* in addition to for loop ++ */ } else { /* maskgen won't be called if we can help it, so shift it to make it verbatim */ /* int j; for (j=i; j0)) return 0; /* errors, not simple */ if (NOUPPER && (D>0)) return 0; /* errors, not simple */ if (JUMP == ON) return 0; /* I, S, D costs, not simple */ if (DELIMITER) return 0; /* delimiters avoid mgrep */ if (I == 0) return 0; /* I has 0 cost not 1, not simple */ if (LINENUM) return 0; /* can't use mgrep, so not simple */ if (WORDBOUND && (D > 0)) return 0; /* errors, not simple */ if (WHOLELINE && (D > 0)) return 0; /* errors, not simple */ if (SILENT) return 1; /* dont care output, so dont care pat */ if (set) { if (!NOTSGREP || CONSTANT) SGREP = ON; if (m >= 16) DNA = ON; for(i=0; i set MUST be on */ for (i=0; i < m; i++) { switch(Pattern[i]) { case '\\' : for (j=i; j #include "agrep.h" #include extern int D; extern int FILENAMEONLY, APPROX, PAT_FILE, PAT_BUFFER, MULTI_OUTPUT, COUNT, INVERSE, BESTMATCH; extern FILEOUT; extern REGEX; extern DELIMITER; extern WHOLELINE; extern LINENUM; extern I, S, DD; extern JUMP; extern char Progname[MAXNAME]; extern int agrep_initialfd; extern int EXITONERROR; extern int errno; int compat() { if(BESTMATCH) { if(COUNT || FILENAMEONLY || APPROX || PAT_FILE) { BESTMATCH = 0; fprintf(stderr, "%s: -B option ignored when -c, -l, -f, or -# is on\n", Progname); } /* if (LINENUM) { BESTMATCH = 0; fprintf(stderr, "%s: -B option ignored when -n is on", Progname); *//* Currently, the BESTMATCH option disables -n but there * doesn't seem to be a reason for it. * compat.c modified while testing continues 10-26-2002 KAM *//* } */ } if (COUNT && LINENUM) { LINENUM = 0; fprintf(stderr, "%s: -n option ignored with -c\n", Progname); } if(PAT_FILE || PAT_BUFFER) { if(APPROX && (D > 0)) { fprintf(stderr, "%s: approximate matching is not supported with -f option\n", Progname); } /* if(INVERSE) { fprintf(stderr, "%s: -f and -v are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } */ if(LINENUM) { fprintf(stderr, "%s: -f and -n are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } /* if(DELIMITER) { fprintf(stderr, "%s: -f and -d are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } */ } if (MULTI_OUTPUT && LINENUM) { fprintf(stderr, "%s: -M and -n are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(JUMP) { if(REGEX) { fprintf(stderr, "%s: -D#, -I#, or -S# option is ignored for regular expression pattern\n", Progname); JUMP = 0; } if(I == 0 || S == 0 || DD == 0) { fprintf(stderr, "%s: the error cost cannot be 0\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } if(DELIMITER) { if(WHOLELINE) { fprintf(stderr, "%s: -d and -x are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } if (INVERSE && (PAT_FILE || PAT_BUFFER) && MULTI_OUTPUT) { fprintf(stderr, "%s: -v and -M are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } return 0; } agrep-4.17/config.h0100644001123100001460000000076607010116362012342 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* * Definitions in this file will be visible throughout glimpse source code. * Any global flags or macros can should be defined here. */ #if defined(__NeXT__) #define getcwd(buf,size) getwd(buf) /* NB: unchecked target size--could overflow; BG: Ok since buffers are usually >= 256B */ #define S_ISREG(mode) (((mode) & (_S_IFMT)) == (_S_IFREG)) #define S_ISDIR(mode) (((mode) & (_S_IFMT)) == (_S_IFDIR)) #endif agrep-4.17/contribution.list0100644001123100001460000000064307010116362014332 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ List of people (other than authors) who have contributed to agrep: Chunghwa H. Rao Gene Myers Ricardo Baeza-Yates Cliff Hathaway Ric Anderson Su-Ing Tsuei Raphael Finkel Andrew Hume David W. Sanderson William I. Chang Jack Kirman Dave Lutz Tony Plate Ken Lalonde Mark Christopher Dieter Becker Ian Young James M. Winget John F. Stoffel agrep-4.17/defs.h0100644001123100001460000000113307010116362012003 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* Must be the same as those defined in compress/defs.h */ #define SIGNATURE_LEN 16 #define TC_LITTLE_ENDIAN 1 #define TC_BIG_ENDIAN 0 #define TC_EASYSEARCH 0x1 #define TC_UNTILNEWLINE 0x2 #define TC_REMOVE 0x4 #define TC_OVERWRITE 0x8 #define TC_RECURSIVE 0x10 #define TC_ERRORMSGS 0x20 #define TC_SILENT 0x40 #define TC_NOPROMPT 0x80 #define TC_FILENAMESONSTDIN 0x100 #define COMP_SUFFIX ".CZ" #define DEF_FREQ_FILE ".glimpse_quick" #define DEF_HASH_FILE ".glimpse_compress" #define DEF_STRING_FILE ".glimpse_uncompress" agrep-4.17/delim.c0100644001123100001460000000647707010116362012167 0ustar friurz/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ #include "agrep.h" extern int EASYSEARCH, TCOMPRESSED; /* Accesses src completely before dest, so that dest can be = src */ void preprocess_delimiter(src, srclen, dest, pdestlen) unsigned char *src, *dest; int srclen, *pdestlen; { CHAR temp[Maxline]; int i, j; strcpy(temp, src); temp[srclen] = '\0'; for (i=0, j=0; i end) return -1; if (TCOMPRESSED == ON) return (exists_tcompressed_word(delim, len, begin, end - begin, EASYSEARCH)); for (curbegin = begin; curbegin + len <= end; curbegin ++) { for (curbuf = curbegin, curdelim = delim; curbuf < curbegin + len; curbuf ++, curdelim++) if (*curbuf != *curdelim) break; if (curbuf >= curbegin + len) return (curbegin - begin); } return -1; } /* return where delimiter begins or ends (=outtail): range = [begin, end) */ unsigned char * forward_delimiter(begin, end, delim, len, outtail) unsigned char *begin, *end, *delim; int len, outtail; { register unsigned char *curbegin, *curbuf, *curdelim; unsigned char *oldbegin = begin, *retval = begin; if (begin + len > end) { retval = end + 1; goto _ret; } if ((len == 1) && (*delim == '\n')) { begin ++; while ((begin < end) && (*begin != '\n')) begin ++; if (outtail && (*begin == '\n')) begin++; retval = begin; goto _ret; } if (TCOMPRESSED == ON) return forward_tcompressed_word(begin, end, delim, len, outtail, EASYSEARCH); for (curbegin = begin; curbegin + len <= end; curbegin ++) { for (curbuf = curbegin, curdelim = delim; curbuf < curbegin + len; curbuf ++, curdelim++) if (*curbuf != *curdelim) break; if (curbuf >= curbegin + len) break; } if (!outtail) retval = (curbegin <= end - len ? curbegin: end + 1); else retval = (curbegin <= end - len ? curbegin + len : end + 1); _ret: /* Gurantee that this skips at least one character */ if (retval <= oldbegin) return oldbegin + 1; else return retval; } /* return where the delimiter begins or ends (=outtail): range = [begin, end) */ unsigned char * backward_delimiter(end, begin, delim, len, outtail) unsigned char *end, *begin, *delim; int len, outtail; { register unsigned char *curbegin, *curbuf, *curdelim; if (end - len < begin) return begin; if ((len == 1) && (*delim == '\n')) { end --; while ((end > begin) && (*end != '\n')) end --; if (outtail && (*end == '\n')) end++; return end; } if (TCOMPRESSED == ON) return backward_tcompressed_word(end, begin, delim, len, outtail, EASYSEARCH); for (curbegin = end-len; curbegin >= begin; curbegin --) { for (curbuf = curbegin, curdelim = delim; curbuf < curbegin + len; curbuf ++, curdelim++) if (*curbuf != *curdelim) break; if (curbuf >= curbegin + len) break; } if (!outtail) return (curbegin >= begin ? curbegin : begin); else return (curbegin >= begin ? curbegin + len : begin); } agrep-4.17/dummyfilters.c0100644001123100001460000000266707010116362013616 0ustar friurz/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ /* bgopal: used if search in compressed text files is not being performed */ /* Always say could not be compressed */ int quick_tcompress() { return 0; } /* Always say could not be uncompressed */ int quick_tuncompress() { return 0; } /* Always return uncompressible */ int tuncompressible() { return 0; } /* Always return uncompressible */ int tuncompressible_filename() { return 0; } /* Always return uncompressible */ int tuncompressible_file() { return 0; } /* Always return uncompressible */ int tuncompressible_fp() { return 0; } int exists_tcompressed_word() { return -1; } unsigned char * forward_tcompressed_word(begin, end, delim, len, outtail, flags) unsigned char *begin, *end, *delim; int len, outtail, flags; { return begin; } unsigned char * backward_tcompressed_word(end, begin, delim, len, outtail, flags) unsigned char *begin, *end, *delim; int len, outtail, flags; { return end; } int tcompress_file() { return 0; } int tuncompress_file() { return 0; } int initialize_tcompress() { return 0; } int initialize_tuncompress() { return 0; } int initialize_common() { return 0; } int uninitialize_tuncompress() { return 0; } int compute_dictionary() { return 0; } int uninitialize_common() { return 0; } int uninitialize_tcompress() { return 0; } int usemalloc = 0; int set_usemalloc() { return 0; } int unset_usemalloc() { return 0; } agrep-4.17/dummysyscalls.c0100644001123100001460000000110207010116362013762 0ustar friurz /* These functions have been added here so that agrep/cast binaries will work independent of glimpse */ int my_open(name, flags, mode) char *name; int flags, mode; { return open(name, flags, mode); } FILE * my_fopen(name, flags) char *name; char *flags; { return fopen(name, flags); } int my_lstat(name, buf) char *name; struct stat *buf; { return lstat(name, buf); } int my_stat(name, buf) char *name; struct stat *buf; { return stat(name, buf); } int special_get_name(name, len, temp) char *name; int len; char *temp; { strcpy(temp, name); return 0; } agrep-4.17/follow.c0100644001123100001460000001167707010116362012375 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* the functions in this file take a syntax tree for a regular expression and produce a DFA using the McNaughton-Yamada construction. */ #include #include #include #include #include "re.h" #define TRUE 1 extern Pset pset_union(); extern int pos_cnt; extern Re_node parse(); Re_lit_array lpos; /* extend_re() extends the RE by adding a ".*(" at the front and a "(" at the back. */ char *extend_re(s) char *s; { char *s1; s1 = malloc((unsigned) strlen(s)+4+1); return strcat(strcat(strcpy(s1, ".*("), s), ")"); } void free_pos(fpos, pos_cnt) Pset_array fpos; int pos_cnt; { Pset tpos, pos; int i; if ((fpos == NULL) || (*fpos == NULL)) return; for (i=0; i<=pos_cnt; i++) { pos = (*fpos)[i]; while (pos != NULL) { tpos = pos; pos = pos->nextpos; free(tpos); } } free(fpos); } /* Function to clear out a Ch_Set */ void free_cset(cset) Ch_Set cset; { Ch_Set tset; while (cset != NULL) { tset = cset; cset = cset->rest; free(tset->elt); free(tset); } } /* Function to clear out the tree of re-nodes */ void free_re(e) Re_node e; { if (e == NULL) return; /* * Was creating "reading freed memory", "freeing unallocated/freed memory" * errors. So abandoned it. Leaks are now up by 60B/call to 80B/call * -bg * Enabled on 26/Aug/1996 after changing pos routines to copy stuff rather than link up parents/children/etc. */ { Pset tpos, pos; int tofree = 0; if ((Lastpos(e)) != (Firstpos(e))) tofree = 1; pos = Lastpos(e); while (pos != NULL) { tpos = pos; pos = pos->nextpos; free(tpos); } Lastpos(e) = NULL; if (tofree) { pos = Firstpos(e); while (pos != NULL) { tpos = pos; pos = pos->nextpos; free(tpos); } Firstpos(e) = NULL; } } /* Enabled on 26/Aug/1996 after changing pos routines to copy stuff rather than link up parents/children/etc. */ switch (Op(e)) { case EOS: if (lit_type(Lit(e)) == C_SET) free_cset(lit_cset(Lit(e))); free(Lit(e)); break; case OPSTAR: free_re(Child(e)); break; case OPCAT: free_re(Lchild(e)); free_re(Rchild(e)); break; case OPOPT: free_re(Child(e)); break; case OPALT: free_re(Lchild(e)); free_re(Rchild(e)); break; case LITERAL: if (lit_type(Lit(e)) == C_SET) free_cset(lit_cset(Lit(e))); free(Lit(e)); break; default: fprintf(stderr, "free_re: unknown node type %d\n", Op(e)); } free(e); return; } /* mk_followpos() takes a syntax tree for a regular expression and traverses it once, computing the followpos function at each node and returns a pointer to an array whose ith element is a pointer to a list of position nodes, representing the positions in followpos(i). */ void mk_followpos_1(e, fpos) Re_node e; Pset_array fpos; { Pset pos; int i; switch (Op(e)) { case EOS: break; case OPSTAR: pos = Lastpos(e); while (pos != NULL) { i = pos->posnum; (*fpos)[i] = pset_union(Firstpos(e), (*fpos)[i], 1); pos = pos->nextpos; } mk_followpos_1(Child(e), fpos); break; case OPCAT: pos = Lastpos(Lchild(e)); while (pos != NULL) { i = pos->posnum; (*fpos)[i] = pset_union(Firstpos(Rchild(e)), (*fpos)[i], 1); pos = pos->nextpos; } mk_followpos_1(Lchild(e), fpos); mk_followpos_1(Rchild(e), fpos); break; case OPOPT: mk_followpos_1(Child(e), fpos); break; case OPALT: mk_followpos_1(Lchild(e), fpos); mk_followpos_1(Rchild(e), fpos); break; case LITERAL: break; default: fprintf(stderr, "mk_followpos: unknown node type %d\n", Op(e)); } return; } Pset_array mk_followpos(tree, npos) Re_node tree; int npos; { int i; Pset_array fpos; if (tree == NULL || npos < 0) return NULL; fpos = (Pset_array) malloc((unsigned) (npos+1)*sizeof(Pset)); if (fpos == NULL) return NULL; for (i = 0; i <= npos; i++) (*fpos)[i] = NULL; mk_followpos_1(tree, fpos); return fpos; } /* mk_poslist() sets a static array whose i_th element is a pointer to the RE-literal at position i. It returns 1 if everything is OK, 0 otherwise. */ /* init performs initialization actions; it returns -1 in case of error, 0 if everything goes OK. */ int init(s, table) char *s; int table[32][32]; { Pset_array fpos; Re_node e; Pset l; int i, j; char *s1; if ((s1 = extend_re(s)) == NULL) return -1; if ((e = parse(s1)) == NULL) { free(s1); return -1; } free(s1); if ((fpos = mk_followpos(e, pos_cnt)) == NULL) { free_re(e); return -1; } for (i = 0; i <= pos_cnt; i += 1) { #ifdef Debug printf("followpos[%d] = ", i); #endif l = (*fpos)[i]; j = 0; for ( ; l != NULL; l = l->nextpos) { #ifdef Debug printf("%d ", l->posnum); #endif table[i][j] = l->posnum; j++; } #ifdef Debug printf("\n"); #endif } #ifdef Debug for (i=0; i <= pos_cnt; i += 1) { j = 0; while (table[i][j] != 0) { printf(" %d ", table[i][j]); j++; } printf("\n"); } #endif free_pos(fpos, pos_cnt); free_re(e); return (pos_cnt); } agrep-4.17/io.c0100644001123100001460000000304307571015524011500 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" /* AGREP_POINTER must be defined to be 1 always */ /* #define AGREP_POINTER 1 */ /* Removed since we now have a -DAGREP_POINTER=1 option in the Makefile */ fill_buf(fd, buf, record_size) int fd, record_size; unsigned char *buf; { int num_read=1; int total_read=0; if (fd >= 0) { --record_size; while(total_read < record_size && num_read > 0) { num_read = read(fd, buf+total_read, record_size - total_read); total_read = total_read + num_read; } if ((0 == num_read) && (0 < total_read)) { /* Add newline terminator */ buf [total_read] = '\n'; ++total_read; } } #if AGREP_POINTER else return 0; /* should not call this function if buf is a pointer to a user-specified region! */ #else /*AGREP_POINTER*/ else { /* simulate a file */ total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_inlen - agrep_inpointer) : record_size; memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read); agrep_inpointer += total_read; } #endif /*AGREP_POINTER*/ return(total_read); } /* * In these functions no allocs/copying is done when * fd == -1, i.e., agrep is called to search within memory. */ alloc_buf(fd, buf, size) int fd; char **buf; int size; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ *buf = (char *)malloc(size); } free_buf(fd, buf) int fd; char *buf; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ free(buf); } agrep-4.17/io.c.orig0100644001123100001460000000252407010116362012430 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" /* AGREP_POINTER must be defined to be 1 always */ /* #define AGREP_POINTER 1 */ /* Removed since we now have a -DAGREP_POINTER=1 option in the Makefile */ fill_buf(fd, buf, record_size) int fd, record_size; unsigned char *buf; { int num_read=1; int total_read=0; if (fd >= 0) { while(total_read < record_size && num_read > 0) { num_read = read(fd, buf+total_read, record_size - total_read); total_read = total_read + num_read; } } #if AGREP_POINTER else return 0; /* should not call this function if buf is a pointer to a user-specified region! */ #else /*AGREP_POINTER*/ else { /* simulate a file */ total_read = (record_size > (agrep_inlen - agrep_inpointer)) ? (agrep_len - agrep_inpointer) : record_size; memcpy(buf, agrep_inbuffer + agrep_inpointer, total_read); agrep_inpointer += total_read; } #endif /*AGREP_POINTER*/ return(total_read); } /* * In these functions no allocs/copying is done when * fd == -1, i.e., agrep is called to search within memory. */ alloc_buf(fd, buf, size) int fd; char **buf; int size; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ *buf = (char *)malloc(size); } free_buf(fd, buf) int fd; char *buf; { #if AGREP_POINTER if (fd != -1) #endif /*AGREP_POINTER*/ free(buf); } agrep-4.17/main.c0100644001123100001460000000172107010116362012004 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include #include "agrep.h" #if ISO_CHAR_SET #include /* support for 8bit character set: ew@senate.be */ #endif #if MEASURE_TIMES extern int INFILTER_ms, OUTFILTER_ms, FILTERALGO_ms; #endif /*MEASURE_TIMES*/ extern char Pattern[MAXPAT]; extern int EXITONERROR; #include "dummysyscalls.c" int main(argc, argv) int argc; char *argv[]; { int ret; #if ISO_CHAR_SET setlocale(LC_ALL,""); /* support for 8bit character set: ew@senate.be, Henrik.Martin@eua.ericsson.se, "O.Bartunov" , S.Nazin (leng@sai.msu.su) */ #endif EXITONERROR = 1; /* the only place where it is set to 1 */ ret = fileagrep(argc, argv, 0, stdout); #if MEASURE_TIMES fprintf(stderr, "ret = %d infilter = %d ms\toutfilter = %d ms\tfilteralgo = %d ms\n", ret, INFILTER_ms, OUTFILTER_ms, FILTERALGO_ms); #endif /*MEASURE_TIMES*/ if(ret<0) exit(2); if(ret==0) exit(1); exit(0); } agrep-4.17/maskgen.c0100644001123100001460000001434207545142305012521 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include "agrep.h" #include extern unsigned D_endpos, endposition, Init1, wildmask; extern Mask[], Bit[], Init[], NO_ERR_MASK; extern int AND, REGEX, NOUPPER, D_length; extern unsigned char Progname[]; extern int agrep_initialfd; extern int EXITONERROR; extern int errno; int maskgen(Pattern, D) unsigned char *Pattern; int D; { struct term { int flag; unsigned char class[WORD]; } position[WORD+10]; unsigned char c; int i, j, k, l, M, OR=0, EVEN = 0, base, No_error; #ifdef DEBUG fprintf(stderr, "maskgen: len=%d, pat=%s, D=%d\n", strlen(Pattern), Pattern, D); #endif for(i=0; i' (use \\<, \\> to search for <, >)\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } break; case LRANGE : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j]; i=i+1; if (Pattern[i] == NOTSYM) { position[j].flag = Compl; i++; } k=0; while (Pattern[i] != RRANGE && i < M) { if(Pattern[i] == HYPHEN) { position[j].class[k-1] = Pattern[i+1]; i=i+2; } else { position[j].class[k] = position[j].class[k+1] = Pattern[i]; k = k+2; i++; } } if(i == M) { fprintf(stderr, "%s: unmatched '[', ']' (use \\[, \\] to search for [, ])\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } position[j].class[k] = '\0'; j++; break; case RRANGE : fprintf(stderr, "%s: unmatched '[', ']' (use \\[, \\] to search for [, ])\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); break; case ORPAT : if(REGEX == ON || AND == ON) { fprintf(stderr, "illegal pattern: cannot handle OR (',') and AND (';')/regular-expressions simultaneously\n"); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } OR = ON; position[j].flag = 2; position[j].class[0] = '\0'; endposition = endposition | Bit[j++]; break; case ANDPAT : position[j].flag = 2; position[j].class[0] = '\0'; if(j > D_length) AND = ON; if(OR || (REGEX == ON && j>D_length)) { fprintf(stderr, "illegal pattern: cannot handle AND (';') and OR (',')/regular-expressions simultaneously\n"); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } endposition = endposition | Bit[j++]; break; /* case ' ' : if (Pattern[i-1] == ORPAT || Pattern[i-1] == ANDPAT) break; if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j]; position[j].flag = 0; position[j].class[0] = position[j].class[1] = Pattern[i]; position[j++].class[2] = '\0'; break; */ case '\n' : NO_ERR_MASK = NO_ERR_MASK | Bit[j]; position[j].class[0] = position[j].class[1] = '\n'; position[j++].class[2] = '\0'; break; case WORDB : NO_ERR_MASK = NO_ERR_MASK | Bit[j]; position[j].class[0] = 1; position[j].class[1] = 47; position[j].class[2] = 58; position[j].class[3] = 64; position[j].class[4] = 91; position[j].class[5] = 96; position[j].class[6] = 123; position[j].class[7] = 127; position[j++].class[8] = '\0'; break; case NNLINE : NO_ERR_MASK |= Bit[j]; position[j].class[0] = position[j].class[1] = '\n'; position[j].class[2] = position[j].class[3] = NNLINE; position[j++].class[4] = '\0'; break; default : if(No_error == ON) NO_ERR_MASK = NO_ERR_MASK | Bit[j]; position[j].flag = 0; position[j].class[0] = position[j].class[1] = Pattern[i]; position[j++].class[2] = '\0'; } if(j > WORD) { fprintf(stderr, "%s: pattern too long (has > %d chars)\n", Progname, WORD); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } if (EVEN != 0) { fprintf(stderr, "%s: unmatched '<', '>' (use \\<, \\> to search for <, >)\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } M = j - 1; base = WORD - M; wildmask = (wildmask >> base); endposition = (endposition >> base); NO_ERR_MASK = (NO_ERR_MASK >> 1) & (~Bit[1]); NO_ERR_MASK = ~NO_ERR_MASK >> (base-1); for (i=1; i<= WORD - M ; i++) Init[0] = Init[0] | Bit[i]; Init[0] = Init[0] | endposition; /* not necessary for INit[i], i>0, */ /* but at every begining of the matching process append one no-match character to initialize the error vectors */ endposition = ( endposition << 1 ) + 1; Init1 = (Init[0] | wildmask | endposition) ; D_endpos = ( endposition >> ( M - D_length ) ) << ( M - D_length); endposition = endposition ^ D_endpos; #ifdef DEBUG printf("endposition: %o\n", endposition); printf("no_err_mask: %o\n", NO_ERR_MASK); #endif for(c=0, i=0; i < MAXSYM; c++, i++) { for (k=1, l=0; k<=M ; k++, l=0) { while (position[k].class[l] != '\0') { if (position[k].class[l] == NOCARE && (c != '\n' || REGEX) ) { Mask[c] = Mask[c] | Bit[base + k]; break; } if (c >= position[k].class[l] && c <= position[k].class[l+1]) { Mask[c] = Mask[c] | Bit[base + k]; break; } l = l + 2; } if (position[k].flag == Compl) Mask[c] = Mask[c] ^ Bit[base+k]; } } if(NOUPPER) for(i=0; i #include #include #ifdef ultrix #include #endif #include #include "agrep.h" #include #define ddebug #define uchar unsigned char #undef MAXPAT #define MAXPAT 256 #undef MAXLINE #define MAXLINE 1024 #undef MAXSYM #define MAXSYM 256 #define MAXMEMBER1 32768 /* #define MAXMEMBER1 262144 */ /*2^18 */ #define MAXPATFILE 600000 #define BLOCKSIZE 16384 #define MAXHASH 32768 /* #define MAXHASH 262144 */ #define mask5 32767 #define max_num MAX_DASHF_FILES #if ISO_CHAR_SET #define W_DELIM 256 #else #define W_DELIM 128 #endif #define L_DELIM 10 #define Hbits 5 /* how much to shift to perform the hash */ extern char aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */ extern char tc_aduplicates[MAXNUM_PAT][MAXNUM_PAT]; /* tells what other patterns are exactly equal to the i-th one */ extern ParseTree aterminals[MAXNUM_PAT]; extern int AComplexBoolean; extern int LIMITOUTPUT, LIMITPERFILE; extern int BYTECOUNT, PRINTOFFSET, PRINTRECORD, CurrentByteOffset; extern int MULTI_OUTPUT; /* used by glimpse only if OR, never for AND */ extern int DELIMITER; extern CHAR D_pattern[MaxDelimit*2]; extern int D_length; extern CHAR tc_D_pattern[MaxDelimit*2]; extern int tc_D_length; extern COUNT, FNAME, SILENT, FILENAMEONLY, prev_num_of_matched, num_of_matched, PRINTFILETIME; extern INVERSE, OUTTAIL; extern WORDBOUND, WHOLELINE, NOUPPER; extern ParseTree *AParse; extern int AComplexPattern; extern unsigned char CurrentFileName[], Progname[]; extern long CurrentFileTime; extern total_line; extern agrep_initialfd; extern int EXITONERROR; extern int PRINTPATTERN; extern int agrep_inlen; extern CHAR *agrep_inbuffer; extern FILE *agrep_finalfp; extern int agrep_outpointer; extern int agrep_outlen; extern CHAR * agrep_outbuffer; extern int errno; extern int NEW_FILE, POST_FILTER; extern int tuncompressible(); extern int quick_tcompress(); extern int quick_tuncompress(); extern int TCOMPRESSED; extern int EASYSEARCH; extern char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; extern char PAT_FILE_NAME[MAX_LINE_LEN]; uchar SHIFT1[MAXMEMBER1]; int LONG = 0; int SHORT = 0; int p_size= 0; uchar tr[MAXSYM]; uchar tr1[MAXSYM]; int HASH[MAXHASH]; int Hash2[max_num]; uchar *PatPtr[max_num]; uchar *pat_spool = NULL; /* [MAXPATFILE+2*max_num+MAXPAT]; */ uchar *patt[max_num]; int pat_len[max_num]; int pat_indices[max_num]; /* pat_indices[p] gives the actual index in matched_teriminals: used only with AParse != 0 */ int num_pat; extern char amatched_terminals[MAXNUM_PAT]; /* which patterns have been matched in the current line? Used only with AParse != 0, so max_num is not needed */ extern int anum_terminals; extern int AComplexBoolean; static void countline(); void acompute_duplicates(); #if DOTCOMPRESSED /* Equivalent variables for compression search */ uchar tc_SHIFT1[MAXMEMBER1]; int tc_LONG = 0; int tc_SHORT = 0; int tc_p_size= 0; uchar tc_tr[MAXSYM]; uchar tc_tr1[MAXSYM]; int tc_HASH[MAXHASH]; int tc_Hash2[max_num]; uchar *tc_PatPtr[max_num]; uchar *tc_pat_spool = NULL; /* [MAXPATFILE+2*max_num+MAXPAT]; */ uchar *tc_patt[max_num]; int tc_pat_len[max_num]; int tc_pat_indices[max_num]; /* pat_indices[p] gives the actual index in matched_teriminals: used only with AParse != 0 */ int tc_num_pat; /* must be the same as num_pat */ #endif /*DOTCOMPRESSED*/ static void f_prep(); static void f_prep1(); static void accumulate(); #if DOTCOMPRESSED static void tc_f_prep(); static void tc_f_prep1(); static void tc_accumulate(); #endif #ifdef perf_check int cshift=0, cshift0=0, chash=0; #endif /* * General idea behind output processing with delimiters, inverse, compression, etc. * CAUTION: In compressed files, we can search ONLY for simple patterns or their ;,. * Attempts to search for complex patterns / with errors might lead to spurious matches. * 1. Once we find the match, go back and forward to get the delimiters that surround * the matched region. * 2. If it is a compressed file, verify that the match is "real" (compressed files * can have pseudo matches hence this filtering step is required). * 3. Increment num_of_matched. * 4. Process some output options which print stuff before the matched region is * printed. * 5. If there is compression, decomress and output the matched region. Otherwise * just output it as is. Remember, from step (1) we know the matched region. * 6. If inverse is set, then we must keep track of the end of the last matched region * in the variable lastout. When there is a match, we must print everything from * lastout to the beginning of the current matched region (curtextbegin) and then * update lastout to point to the end of the current matched region (curtextend). * ALSO: if we exit from the main loops, we must output everything from the end * of the last matched region to the end of the input buffer. * 7. Delimiter handling in complex patterns is different: there the search is done * for a boolean and of the delimiter pattern and the actual pattern. * 8. For convenience and speed, the multipattern matching routines to handle * compressed files have been separated from their (normal) counterparts. * 9. One special note on handling complicated boolean patterns: the parse * tree will be the same for both compressed and uncomrpessed patterns and the * same amatched_terminals array will be used in both. BUT, the pat_spool and * pat_index, etc., will be different as they refer to the individual terminals. */ int prepf(mfp, mbuf, mlen) int mfp, mlen; unsigned char *mbuf; { int length=0, i, p=1; uchar *pat_ptr; unsigned Mask = 31; int num_read; unsigned char *buf; struct stat stbuf; int j, k; /* to implement \\ */ if ((mfp == -1) && ((mbuf == NULL) || (mlen <= 0))) return -1; if (mfp != -1) { if (fstat(mfp, &stbuf) == -1) { fprintf(stderr, "%s: cannot stat file: %s\n", Progname, PAT_FILE_NAME); return -1; } if (!S_ISREG(stbuf.st_mode)) { fprintf(stderr, "%s: pattern file not regular file: %s\n", Progname, PAT_FILE_NAME); return -1; } if (stbuf.st_size*2 > MAXPATFILE + 2*max_num) { fprintf(stderr, "%s: pattern file too large (> %d B): %s\n", Progname, (MAXPATFILE+2*max_num)/2, PAT_FILE_NAME); return -1; } if (pat_spool != NULL) free(pat_spool); pat_ptr = pat_spool = (unsigned char *)malloc(stbuf.st_size*2 + MAXPAT); alloc_buf(mfp, &buf, MAXPATFILE+2*BlockSize); while((num_read = fill_buf(mfp, buf+length, 2*BlockSize)) > 0) { length = length + num_read; if(length > MAXPATFILE) { fprintf(stderr, "%s: maximum pattern file size is %d\n", Progname, MAXPATFILE); if (!EXITONERROR) { errno = AGREP_ERROR; free_buf(mfp, buf); return -1; } else exit(2); } } } else { buf = mbuf; length = mlen; if (mlen*2 > MAXPATFILE + 2*max_num) { fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2); return -1; } if (pat_spool != NULL) free(pat_spool); pat_ptr = pat_spool = (unsigned char *)malloc(mlen*2 + MAXPAT); } /* Now all the patterns are in buf */ buf[length] = '\n'; i=0; p=1; /* removed by Udi 11/8/94 - we now do WORDBOUND "by hand" if(WORDBOUND) { while(imax_num) { fprintf(stderr, "%s: maximum number of patterns is %d\n", Progname, max_num); if (!EXITONERROR) { errno = AGREP_ERROR; free_buf(mfp, buf); return -1; } else exit(2); } for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */ /* I might have to keep changing tr s.t. mgrep won't get confused with W_DELIM */ for(i=0; i< MAXSYM; i++) tr[i] = i; if(NOUPPER) { for (i=0; i 1) && ((patt[i][p-1] == '^') || (patt[i][p-1] == '$')) && (patt[i][p-2] != '\\')) patt[i][p-1] = '\n'; /* Added by bg, Dec 2nd 1994 */ for (k=0; k0?p-2:1):p); changed by Udi 11/8/94 */ #ifdef debug printf("prepf(): patt[%d]=%s, pat_len[%d]=%d\n", i, patt[i], i, pat_len[i]); #endif if(p!=0 && p < p_size) p_size = p; /* MIN */ } if(p_size == 0) { fprintf(stderr, "%s: the pattern file is empty\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; free_buf(mfp, buf); return -1; } else exit(2); } if(length > 400 && p_size > 2) LONG = 1; if(p_size == 1) SHORT = 1; for(i=0; i MAXPATFILE + 2*max_num) { fprintf(stderr, "%s: pattern buffer too large (> %d B)\n", Progname, (MAXPATFILE+2*max_num)/2); return -1; } if (tc_pat_spool != NULL) free(tc_pat_spool); pat_ptr = tc_pat_spool = (unsigned char *)malloc(length*2 + MAXPAT); #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ i=0; p=1; while(i < length) { tc_patt[p] = pat_ptr; while((*pat_ptr = buf[i++]) != '\n') pat_ptr++; *pat_ptr++ = 0; if ((tc_length = quick_tcompress(FREQ_FILE, HASH_FILE, tc_patt[p], strlen(tc_patt[p]), tc_buf, MAXPAT * 2 - 8, TC_EASYSEARCH)) > 0) { memcpy(tc_patt[p], tc_buf, tc_length); tc_patt[p][tc_length] = '\0'; pat_ptr = tc_patt[p] + tc_length + 1; /* character after '\0' */ } p++; } for(i=1; i<20; i++) *pat_ptr = i; /* boundary safety zone */ /* Ignore all other options: it is automatically W_DELIM */ for(i=0; i< MAXSYM; i++) tc_tr[i] = i; for(i=0; i< MAXSYM; i++) tc_tr1[i] = tc_tr[i]&Mask; tc_num_pat = p-1; tc_p_size = MAXPAT; for(i=1; i<=num_pat; i++) { p = strlen(tc_patt[i]); tc_pat_len[i] = p; #ifdef debug printf("prepf(): tc_patt[%d]=%s, tc_pat_len[%d]=%d\n", i, tc_patt[i], i, tc_pat_len[i]); #endif if(p!=0 && p < tc_p_size) tc_p_size = p; /* MIN */ } if(tc_p_size == 0) { /* cannot happen NOW */ fprintf(stderr, "%s: the pattern file is empty\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(length > 400 && tc_p_size > 2) tc_LONG = 1; if(tc_p_size == 1) tc_SHORT = 1; for(i=0; i 0) { buf_end = end = Max_record + num_read -1 ; oldCurrentByteOffset = CurrentByteOffset; if (first_time) { if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) { EASYSEARCH = text[Max_record+SIGNATURE_LEN-1]; start += SIGNATURE_LEN; CurrentByteOffset += SIGNATURE_LEN; if (!EASYSEARCH) { fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); } } else TCOMPRESSED = OFF; first_time = 0; } if (!DELIMITER) { while(text[end] != r_newline && end > Max_record) end--; text[start-1] = r_newline; } else { unsigned char *newbuf = text + end + 1; newbuf = backward_delimiter(newbuf, text+Max_record, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */ if (newbuf < text+Max_record+D_length) newbuf = text + end + 1; end = newbuf - text - 1; memcpy(text+start-D_length, D_pattern, D_length); } residue = buf_end - end + 1 ; if(INVERSE && COUNT) countline(text+Max_record, num_read); /* MGREP_PROCESS */ if (TCOMPRESSED) { /* separate functions since separate globals => too many if-statements within a single function makes it slow */ #if DOTCOMPRESSED if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}} else { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}} #endif /*DOTCOMPRESSED*/ } else { if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}} else { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}} } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; return 0; } CurrentByteOffset = oldCurrentByteOffset + end - start + 1; start = Max_record - residue; if(start < 0) { start = 1; } strncpy(text+start, text+end, residue); if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, text); return 0; /* done */ } } /* end of while(num_read = ... */ if (!DELIMITER) { text[start-1] = '\n'; text[start+residue] = '\n'; } else { if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length); memcpy(text+start+residue, D_pattern, D_length); } end = start + residue; if(residue > 1) { if (TCOMPRESSED) { #if DOTCOMPRESSED if(tc_SHORT) tc_m_short(text, start, end); else tc_monkey1(text, start, end); #endif /*DOTCOMPRESSED*/ } else { if(SHORT) m_short(text, start, end); else monkey1(text, start, end); } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; return 0; } } free_buf(fd, text); return (0); #if AGREP_POINTER } else { text = (unsigned char *)agrep_inbuffer; num_read = agrep_inlen; start = 0; buf_end = end = num_read - 1; oldCurrentByteOffset = CurrentByteOffset; if (first_time) { if ((TCOMPRESSED == ON) && tuncompressible(text+Max_record, num_read)) { EASYSEARCH = text[Max_record+SIGNATURE_LEN-1]; start += SIGNATURE_LEN; CurrentByteOffset += SIGNATURE_LEN; if (!EASYSEARCH) { fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); } } else TCOMPRESSED = OFF; first_time = 0; } if (!DELIMITER) while(text[end] != r_newline && end > 1) end--; else { unsigned char *newbuf = text + end + 1; newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */ if (newbuf < text+D_length) newbuf = text + end + 1; end = newbuf - text - 1; } /* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end] */ if (INVERSE && COUNT) countline(text, num_read); /* An exact copy of the above MGREP_PROCESS */ if (TCOMPRESSED) { /* separate functions since separate globals => too many if-statements within a single function makes it slow */ #if DOTCOMPRESSED if(tc_SHORT) { if (-1 == tc_m_short(text, start, end)) {free_buf(fd, text); return -1;}} else { if (-1 == tc_monkey1(text, start, end)) {free_buf(fd, text); return -1;}} #endif /*DOTCOMPRESSED*/ } else { if(SHORT) { if (-1 == m_short(text, start, end)) {free_buf(fd, text); return -1;}} else { if (-1 == monkey1(text, start, end)) {free_buf(fd, text); return -1;}} } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; return 0; } return 0; } #endif /*AGREP_POINTER*/ #ifdef perf_check fprintf(stderr,"Shifted %d times; shift=0 %d times; hash was = %d times\n",cshift, cshift0, chash); return 0; #endif } /* end mgrep */ static void countline(text, len) unsigned char *text; int len; { int i; for (i=0; i= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if (PRINTPATTERN) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d- ", pat_index); else { char s[32]; int outindex; sprintf(s, "%d- ", pat_index); for(outindex=0; (outindex+agrep_outpointer textend) return 0; if (WORDBOUND) { if (isalnum(*(unsigned char *)qx)) goto skip_output; if (isalnum(*(unsigned char *)(text-m1-1))) goto skip_output; } if (!DOWITHMASK) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL); } if (!OUTTAIL || INVERSE) textbegin = curtextend; else if (DELIMITER) textbegin = curtextend - D_length; else textbegin = curtextend - 1; } DOWITHMASK = 1; if (pat_index <= anum_terminals) { int iii; amatched_terminals[pat_index - 1] = 1; for (iii=0; iii= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ else {\ memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\ agrep_outpointer += curtextend - curtextbegin;\ }\ }\ }\ else if (PRINTED) {\ if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\ else agrep_outbuffer[agrep_outpointer ++] = '\n';\ PRINTED = 0;\ }\ if ((change_text) && MULTI_OUTPUT) { /* next match starting from end of current */\ CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\ text = oldtext + pat_len[pat_index] - 1;\ MATCHED = 0;\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ else { /* INVERSE */\ /* if(lastout < curtextbegin) OUT=1; */\ if (!SILENT) {\ if (agrep_finalfp != NULL)\ fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\ else {\ if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\ agrep_outpointer += (curtextbegin-lastout);\ }\ }\ lastout=textbegin;\ if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ }\ else if (change_text) { /* COUNT */\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\ ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */\ DO_OUTPUT(1) } skip_output: if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break; /* else look for more possible matches since we never know how many will match */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if(!MATCHED) shift = 1; /* || MULTI_OUTPUT is implicit */ else { MATCHED = 0; shift = m1 - 1 > 0 ? m1 - 1 : 1; } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } text += shift; CurrentByteOffset += shift; } /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if(INVERSE && !COUNT && (lastout <= textend)) { if (!SILENT) { if (agrep_finalfp != NULL) { while(lastout <= textend) fputc(*lastout++, agrep_finalfp); } else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout+1); agrep_outpointer += (textend-lastout+1); lastout = textend; } } } return 0; } #if DOTCOMPRESSED int tc_monkey1( text, start, end ) int start, end; register unsigned char *text; { int PRINTED = 0; unsigned char *oldtext; int pat_index; register uchar *textend; unsigned char *textbegin; unsigned char *curtextend; unsigned char *curtextbegin; register unsigned hash; register uchar shift; register int m1, Long=LONG; int MATCHED=0; register uchar *qx; register uchar *px; register int p, p_end; uchar *lastout; /* int OUT=0; */ int hash2; int j; int DOWITHMASK; struct timeval initt, finalt; int newlen; DOWITHMASK = 0; if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); textbegin = text + start; textend = text + end; m1 = tc_p_size-1; lastout = text+start; text = text + start + m1 -1; /* -1 to allow match to the first \n in case the pattern has ^ in front of it */ /* WORDBOUND adjustment not required */ while (text <= textend) { hash=tc_tr1[*text]; hash=(hash< textend) return 0; if (!DOWITHMASK) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */ #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ /* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */ if (-1==exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH)) goto skip_output; #if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ if (!DOWITHMASK) { if (!OUTTAIL || INVERSE) textbegin = curtextend; else if (DELIMITER) textbegin = curtextend - D_length; else textbegin = curtextend - 1; } DOWITHMASK = 1; if (pat_index <= anum_terminals) { int iii; amatched_terminals[pat_index - 1] = 1; for (iii=0; iii 0) {\ if (newlen + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ agrep_outpointer += newlen;\ }\ }\ /* #if MEASURE_TIMES\ gettimeofday(&finalt, NULL);\ OUTFILTER_ms += (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\ */ /*#endif MEASURE_TIMES */\ }\ else if (PRINTED) {\ if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\ else agrep_outbuffer[agrep_outpointer ++] = '\n';\ PRINTED = 0;\ }\ if ((change_text) && MULTI_OUTPUT) { /* next match starting from end of current */\ CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\ text = oldtext + tc_pat_len[pat_index] - 1;\ MATCHED = 0;\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ else { /* INVERSE: Don't care about filtering time */\ /* if(lastout < curtextbegin) OUT=1; */\ if (!SILENT) {\ if (agrep_finalfp != NULL)\ newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\ else {\ if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\ if (newlen + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ agrep_outpointer += newlen;\ }\ }\ }\ lastout=textbegin;\ if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\ ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */\ DO_OUTPUT(1) } skip_output: if (MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break; /* else look for more possible matches since we never know how many will match */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if(!MATCHED) shift = 1; /* || MULTI_OUTPUT is implicit */ else { MATCHED = 0; shift = m1 - 1 > 0 ? m1 - 1 : 1; } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } text += shift; CurrentByteOffset += shift; } /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if (INVERSE && !COUNT && (lastout <= textend)) { if (!SILENT) { if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } } return 0; } #endif /*DOTCOMPRESSED*/ /* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */ int m_short(text, start, end) int start, end; register uchar *text; { int m1=1; int PRINTED = 0; int pat_index; unsigned char *oldtext; register uchar *textend; unsigned char *textbegin; unsigned char *curtextend; unsigned char *curtextbegin; register int p, p_end; int MATCHED=0; /* int OUT=0; */ uchar *lastout; uchar *qx; uchar *px; int j; int DOWITHMASK; DOWITHMASK = 0; if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); textend = text + end; lastout = text + start; textbegin = text + start; text = text + start - 1 ; /* if (WORDBOUND || WHOLELINE) text = text-1; */ if (WHOLELINE) text = text-1; /* to accomodate the extra 2 W_delim */ while (++text <= textend) { CurrentByteOffset ++; p = HASH[tr[*text]]; p_end = HASH[tr[*text]+1]; while(p++ < p_end) { if (((pat_index = pat_indices[p]) <= 0) || (pat_len[pat_index] <= 0)) continue; #ifdef debug printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text); #endif px = PatPtr[p]; qx = text; while((*px!=0)&&(tr[*px] == tr[*qx])) { px++; qx++; } if (*px == 0) { if(text >= textend) return 0; if (WORDBOUND) { if (isalnum(*(unsigned char *)qx)) goto skip_output; if (isalnum(*(unsigned char *)(text-1))) goto skip_output; } if (!DOWITHMASK) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, D_pattern, D_length, OUTTAIL); } if (!OUTTAIL || INVERSE) textbegin = curtextend; else if (DELIMITER) textbegin = curtextend - D_length; else textbegin = curtextend - 1; } /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */ DOWITHMASK = 1; if (pat_index <= anum_terminals) { int iii; amatched_terminals[pat_index - 1] = 1; for (iii=0; iii= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ else {\ memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend-curtextbegin);\ agrep_outpointer += curtextend - curtextbegin;\ }\ }\ }\ else if (PRINTED) {\ if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\ else agrep_outbuffer[agrep_outpointer ++] = '\n';\ PRINTED = 0;\ }\ if ((change_text) && MULTI_OUTPUT) { /* next match starting from end of current */\ CurrentByteOffset += (oldtext + pat_len[pat_index] - 1 - text);\ text = oldtext + pat_len[pat_index] - 1;\ MATCHED = 0;\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ else {\ /* if(lastout < curtextbegin) OUT=1; */\ if (!SILENT) {\ if (agrep_finalfp != NULL)\ fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp);\ else {\ if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout);\ agrep_outpointer += (curtextbegin-lastout);\ }\ }\ lastout=textbegin;\ if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\ ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */\ DO_OUTPUT(1) } skip_output: if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break; /* else look for more possible matches */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if (MATCHED) text --; MATCHED = 0; } /* while */ CurrentByteOffset ++; /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if(INVERSE && !COUNT && (lastout <= textend)) { if (!SILENT) { if (agrep_finalfp != NULL) { while(lastout <= textend) fputc(*lastout++, agrep_finalfp); } else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, text-lastout+1); agrep_outpointer += (text-lastout+1); lastout = textend; } } } return 0; } #if DOTCOMPRESSED /* shift is always 1: slight change in MATCHED semantics: it is set to 1 even if COUNT is set: previously, it wasn't set. Will it effect m_short? */ int tc_m_short(text, start, end) int start, end; register uchar *text; { int m1=1; int PRINTED = 0; int pat_index; unsigned char *oldtext; register uchar *textend; unsigned char *textbegin; unsigned char *curtextend; unsigned char *curtextbegin; register int p, p_end; int MATCHED=0; /* int OUT=0; */ uchar *lastout; uchar *qx; uchar *px; int j; int DOWITHMASK; struct timeval initt, finalt; int newlen; DOWITHMASK = 0; if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); textend = text + end; lastout = text + start; text = text + start - 1 ; textbegin = text + start; /* WORDBOUND adjustment not required */ while (++text <= textend) { CurrentByteOffset ++; p = tc_HASH[tc_tr[*text]]; p_end = tc_HASH[tc_tr[*text]+1]; while(p++ < p_end) { if (((pat_index = tc_pat_indices[p]) <= 0) || (tc_pat_len[pat_index] <= 0)) continue; #ifdef debug printf("m_short(): p=%d pat_index=%d off=%d\n", p, pat_index, textend - text); #endif px = tc_PatPtr[p]; qx = text; while((*px!=0)&&(tc_tr[*px] == tc_tr[*qx])) { px++; qx++; } if (*px == 0) { if(text >= textend) return 0; if (!DOWITHMASK) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m1*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text-m1*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } /* else prev curtextbegin is OK: if full AND isn't found, DOWITHMASK is 0-ed so that we search at most 1 line below */ #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ /* Was it really a match in the compressed line from prev line in text to text + strlen(tc_pat_len[pat_index]? */ if (-1 == exists_tcompressed_word(tc_PatPtr[p], tc_pat_len[pat_index], curtextbegin, text - curtextbegin + tc_pat_len[pat_index], EASYSEARCH)) goto skip_output; #if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ if (!DOWITHMASK) { if (!OUTTAIL || INVERSE) textbegin = curtextend; else if (DELIMITER) textbegin = curtextend - D_length; else textbegin = curtextend - 1; } DOWITHMASK = 1; if (pat_index <= anum_terminals) { int iii; amatched_terminals[pat_index - 1] = 1; for (iii=0; iii 0) {\ if (newlen + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ agrep_outpointer += newlen;\ }\ }\ /*#if MEASURE_TIMES\ gettimeofday(&finalt, NULL);\ OUTFILTER_ms += (finalt.tv_sec* 1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000);\ */ /*#endif MEASURE_TIMES*/\ }\ else if (PRINTED) {\ if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp);\ else agrep_outbuffer[agrep_outpointer ++] = '\n';\ PRINTED = 0;\ }\ if ((change_text) && MULTI_OUTPUT) { /* next match starting from end of current */\ CurrentByteOffset += (oldtext + tc_pat_len[pat_index] - 1 - text);\ text = oldtext + tc_pat_len[pat_index] - 1;\ MATCHED = 0;\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ else { /* INVERSE: Don't care about filtering time */\ /* if(lastout < curtextbegin) OUT=1; */\ if (!SILENT) {\ if (agrep_finalfp != NULL)\ newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH);\ else {\ if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) {\ if (newlen + agrep_outpointer >= agrep_outlen) {\ OUTPUT_OVERFLOW;\ return -1;\ }\ agrep_outpointer += newlen;\ }\ }\ }\ lastout=textbegin;\ if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ }\ }\ else if (change_text) {\ CurrentByteOffset += textbegin - text;\ text = textbegin;\ }\ if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||\ ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */\ DO_OUTPUT(1) } skip_output: if(MATCHED && !MULTI_OUTPUT && !AComplexBoolean) break; /* else look for more possible matches */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } } /* If I found some match and I am about to cross over a delimiter, then set DOWITHMASK to 0 and zero out the amatched_terminals */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if (MATCHED) text--; MATCHED = 0; } /* while */ CurrentByteOffset ++; /* Do residual stuff: check if there was a match at the end of the line | check if rest of the buffer needs to be output due to inverse */ if (DOWITHMASK && (text >= curtextend - 1)) { DOWITHMASK = 0; if (AComplexBoolean && dd(curtextbegin, curtextend) && eval_tree(AParse, amatched_terminals)) { DO_OUTPUT(0) } if (AParse != 0) memset(amatched_terminals, '\0', anum_terminals); } if (INVERSE && !COUNT && (lastout <= textend)) { if (!SILENT) { if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } } return 0; } #endif /*DOTCOMPRESSED*/ static void f_prep(pat_index, Pattern) uchar *Pattern; int pat_index; { int i, m; register unsigned hash=0; #ifdef debug puts(Pattern); #endif m = p_size; for (i=m-1; i>=(1+LONG); i--) { hash = (tr1[Pattern[i]]); hash = (hash << Hbits) + (tr1[Pattern[i-1]]); if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] ); if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i; } i=m-1; hash = (tr1[Pattern[i]]); hash = (hash << Hbits) + (tr1[Pattern[i-1]]); if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] ); if(SHORT) hash=tr[Pattern[0]]; #ifdef debug printf("hash = %d\n", hash); #endif HASH[hash]++; return; } #if DOTCOMPRESSED static void tc_f_prep(pat_index, Pattern) uchar *Pattern; int pat_index; { int i, m; register unsigned hash=0; #ifdef debug puts(Pattern); #endif m = tc_p_size; for (i=m-1; i>=(1+tc_LONG); i--) { hash = (tc_tr1[Pattern[i]]); hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]); if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] ); if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i; } i=m-1; hash = (tc_tr1[Pattern[i]]); hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]); if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] ); if(tc_SHORT) hash=tc_tr[Pattern[0]]; #ifdef debug printf("hash = %d\n", hash); #endif tc_HASH[hash]++; return; } #endif /*DOTCOMPRESSED*/ static void f_prep1(pat_index, Pattern) uchar *Pattern; int pat_index; { int i, m; int hash2; register unsigned hash; m = p_size; #ifdef debug puts(Pattern); #endif for (i=m-1; i>=(1+LONG); i--) { hash = (tr1[Pattern[i]]); hash = (hash << Hbits) + (tr1[Pattern[i-1]]); if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] ); if(SHIFT1[hash] >= m-1-i) SHIFT1[hash] = m-1-i; } i=m-1; hash = (tr1[Pattern[i]]); hash = (hash << Hbits) + (tr1[Pattern[i-1]]); if(LONG) hash = (hash << Hbits) + (tr1[Pattern[i-2]] ); if(SHORT) hash=tr[Pattern[0]]; hash2 = (tr[Pattern[0]] << 8) + tr[Pattern[1]]; #ifdef debug printf("hash = %d, HASH[hash] = %d\n", hash, HASH[hash]); #endif PatPtr[HASH[hash]] = Pattern; pat_indices[HASH[hash]] = pat_index; Hash2[HASH[hash]] = hash2; HASH[hash]--; return; } #if DOTCOMPRESSED static void tc_f_prep1(pat_index, Pattern) uchar *Pattern; int pat_index; { int i, m; int hash2; register unsigned hash; m = tc_p_size; #ifdef debug puts(Pattern); #endif for (i=m-1; i>=(1+tc_LONG); i--) { hash = (tc_tr1[Pattern[i]]); hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]); if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] ); if(tc_SHIFT1[hash] >= m-1-i) tc_SHIFT1[hash] = m-1-i; } i=m-1; hash = (tc_tr1[Pattern[i]]); hash = (hash << Hbits) + (tc_tr1[Pattern[i-1]]); if(tc_LONG) hash = (hash << Hbits) + (tc_tr1[Pattern[i-2]] ); if(tc_SHORT) hash=tc_tr[Pattern[0]]; hash2 = (tc_tr[Pattern[0]] << 8) + tc_tr[Pattern[1]]; #ifdef debug printf("hash = %d, tc_HASH[hash] = %d\n", hash, tc_HASH[hash]); #endif tc_PatPtr[tc_HASH[hash]] = Pattern; tc_pat_indices[tc_HASH[hash]] = pat_index; tc_Hash2[tc_HASH[hash]] = hash2; tc_HASH[hash]--; return; } #endif /*DOTCOMPRESSED*/ static void accumulate() { int i; for(i=1; i #include "re.h" #define FALSE 0 #define TRUE 1 #define NextChar(s) *(*s)++ #define Unexpected(s, c) (**s == NUL || **s == c) #define Invalid_range(x, y) (x == NUL || x == '-' || x == ']' || x < y) extern Stack Push(); extern Re_node Pop(); extern Re_node Top(); extern int Size(); extern Pset pset_union(); extern Pset create_pos(); extern void free_re(); int final_pos, pos_cnt = 0; /* retract_token() moves the string pointer back, effectively "unseeing" the last character seen. It is used only to retract a right paren -- the idea is that the incarnation of parse_re() that saw the corresponding left paren is supposed to take care of matching the right paren. This is necessary to prevent recursive calls from mistakenly eating up someone else's right parens. */ #define retract_token(s) --(*s) /* mk_leaf() creates a leaf node that is (usually) a literal node. */ Re_node mk_leaf(opval, type, ch, cset) short opval, type; char ch; Ch_Set cset; { Re_node node; Re_Lit l; new_node(Re_Lit, l, l); new_node(Re_node, node, node); if (l == NULL || node == NULL) { if (l != NULL) free(l); if (node != NULL) free(node); return NULL; } lit_type(l) = type; lit_pos(l) = pos_cnt++; if (type == C_SET) lit_cset(l) = cset; else lit_char(l) = ch; /* type == C_LIT */ Op(node) = opval; Lit(node) = l; Nullable(node) = FALSE; Firstpos(node) = create_pos(lit_pos(l)); Lastpos(node) = Firstpos(node); return node; } /* parse_cset() takes a pointer to a pointer to a string and parses a prefix of it denoting a character set literal. It returns a pointer to a Re_node node, NULL if there is an error. */ Re_node parse_cset(s) char **s; { Ch_Set cs_ptr, curr_ptr, prev_ptr; char ch; Ch_Range range = NULL; if (Unexpected(s, ']')) return NULL; new_node(Ch_Set, curr_ptr, curr_ptr); cs_ptr = curr_ptr; while (!Unexpected(s, ']')) { new_node(Ch_Range, range, range); curr_ptr->elt = range; ch = NextChar(s); if (ch == '-') { free(range); free(curr_ptr); return NULL; /* invalid range */ } range->low_bd = ch; if (**s == NUL) { free(range); free(curr_ptr); return NULL; } else if (**s == '-') { /* character range */ (*s)++; if (Invalid_range(**s, ch)) { free(range); free(curr_ptr); return NULL; } else range->hi_bd = NextChar(s); } else range->hi_bd = ch; prev_ptr = curr_ptr; new_node(Ch_Set, curr_ptr, curr_ptr); prev_ptr->rest = curr_ptr; }; if (**s == ']') { prev_ptr->rest = NULL; return mk_leaf(LITERAL, C_SET, NUL, cs_ptr); } else { if (range != NULL) free(range); free(curr_ptr); return NULL; } } /* parse_cset */ /* parse_wildcard() "parses" a wildcard -- a wildcard is treated as a character range whose values span all ASCII values. parse_wildcard() creates a node representing such a range. */ Re_node parse_wildcard() { Ch_Set s; Ch_Range r; new_node(Ch_Range, r, r); r->low_bd = ASCII_MIN; /* smallest ASCII value */ r->hi_bd = ASCII_MAX; /* greatest ASCII value */ new_node(Ch_Set, s, s); s->elt = r; s->rest = NULL; return mk_leaf(LITERAL, C_SET, NUL, s); } /* parse_chlit() parses a character literal. It is assumed that the character in question does not have any special meaning. It returns a pointer to a node for that literal. */ Re_node parse_chlit(ch) char ch; { if (ch == NUL) return NULL; else return mk_leaf(LITERAL, C_LIT, ch, NULL); } /* routine to free the malloced token */ void free_tok(next_token) Tok_node next_token; { if (next_token == NULL) return; switch (tok_type(next_token)) { case LITERAL: free_re(tok_val(next_token)); case EOS: case RPAREN: case LPAREN: case OPSTAR: case OPALT: case OPOPT: default: free(next_token); break; } } /* get_token() returns the next token -- this may be a character literal, a character set, an escaped character, a punctuation (i.e. parenthesis), or an operator. It traverses the character string representing the RE, given by a pointer s; leaves s positioned immediately after the unit it parsed, and returns a pointer to a token node for that unit. */ Tok_node get_token(s) char **s; { Tok_node rn = NULL; if (s == NULL || *s == NULL) return NULL; /* error */ new_node(Tok_node, rn, rn); if (**s == NUL) tok_type(rn) = EOS; /* end of string */ else { switch (**s) { case '.': /* wildcard */ tok_type(rn) = LITERAL; tok_val(rn) = parse_wildcard(); if (tok_val(rn) == NULL) { free_tok(rn); return NULL; } break; case '[': /* character set literal */ (*s)++; tok_type(rn) = LITERAL; tok_val(rn) = parse_cset(s); if (tok_val(rn) == NULL) { free_tok(rn); return NULL; } break; case '(': tok_type(rn) = LPAREN; break; case ')' : tok_type(rn) = RPAREN; break; case '*' : tok_type(rn) = OPSTAR; break; case '|' : tok_type(rn) = OPALT; break; case '?' : tok_type(rn) = OPOPT; break; case '\\': /* escaped character */ (*s)++; default : /* must be ordinary character */ tok_type(rn) = LITERAL; tok_val(rn) = parse_chlit(**s); if (tok_val(rn) == NULL) { free_tok(rn); return NULL; } break; } /* switch (**s) */ (*s)++; } /* else */ return rn; } /* cat2() takes a stack of RE-nodes and, if the stack contains more than one node, returns the stack obtained by condensing the top two nodes of the stack into a single CAT-node. If there is only one node on the stack, nothing is done. */ Stack cat2(stk) Stack *stk; { Re_node r; if (stk == NULL) return NULL; if (*stk == NULL || (*stk)->next == NULL) return *stk; new_node(Re_node, r, r); if (r == NULL) return NULL; /* can't allocate memory */ Op(r) = OPCAT; Rchild(r) = Pop(stk); Lchild(r) = Pop(stk); if (Push(stk, r) == NULL) { free_re(Rchild(r)); free_re(Lchild(r)); free(r); return NULL; } Nullable(r) = Nullable(Lchild(r)) && Nullable(Rchild(r)); if (Nullable(Lchild(r))) Firstpos(r) = pset_union(Firstpos(Lchild(r)), Firstpos(Rchild(r)), 0); else Firstpos(r) = pset_union(Firstpos(Lchild(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */ if (Nullable(Rchild(r))) Lastpos(r) = pset_union(Lastpos(Lchild(r)), Lastpos(Rchild(r)), 0); else Lastpos(r) = pset_union(Lastpos(Rchild(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */ return *stk; } /* wrap() takes a stack and an operator, takes the top element of the stack and "wraps" that operator around it, then puts this back on the stack and returns the resulting stack. */ Stack wrap(s, opv) Stack *s; short opv; { Re_node r; if (s == NULL || *s == NULL) return NULL; new_node(Re_node, r, r); if (r == NULL) return NULL; Op(r) = opv; Child(r) = Pop(s); if (Push(s, r) == NULL) { free_re(Child(r)); free(r); return NULL; } Nullable(r) = TRUE; Firstpos(r) = pset_union(Firstpos(Child(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */ Lastpos(r) = pset_union(Lastpos(Child(r)), NULL, 0); /* added pset_union with NULL 26/Aug/1996 */ return *s; } /* mk_alt() takes a stack and a regular expression, creates an ALT-node from the top of the stack and the given RE, and replaces the top-of-stack by the resulting ALT-node. */ Stack mk_alt(s, r) Stack *s; Re_node r; { Re_node node; if (s == NULL || *s == NULL || r == NULL) return NULL; new_node(Re_node, node, node); if (node == NULL) return NULL; Op(node) = OPALT; Lchild(node) = Pop(s); Rchild(node) = r; if (Push(s, node) == NULL) return NULL; Nullable(node) = Nullable(Lchild(node)) || Nullable(Rchild(node)); Firstpos(node) = pset_union(Firstpos(Lchild(node)), Firstpos(Rchild(node)), 0); Lastpos(node) = pset_union(Lastpos(Lchild(node)), Lastpos(Rchild(node)), 0); return *s; } /* parse_re() takes a pointer to a string and traverses that string, returning a pointer to a syntax tree for the regular expression represented by that string, NULL if there is an error. */ Re_node parse_re(s, end) char **s; short end; { Stack stk = NULL, ret = NULL, top, temp; Tok_node next_token, t1; Re_node re = NULL, val; if (s == NULL || *s == NULL) return NULL; while (TRUE) { ret = NULL; if ((next_token = get_token(s)) == NULL) return NULL; switch (tok_type(next_token)) { case RPAREN: retract_token(s); case EOS: if (end == tok_type(next_token)) { free_tok(next_token); top = cat2(&stk); val = Top(top); free(top); return val; } else { free_tok(next_token); return NULL; } case LPAREN: free_tok(next_token); re = parse_re(s, RPAREN); if ((ret = Push(&stk, re)) == NULL) { free_re(re); /* ZZZZZZZZZZZZZZZZZZ */ return NULL; } if ((t1 = get_token(s)) == NULL) { free_re(re); /* ZZZZZZZZZZZZZZZZZZ */ free(ret); return NULL; } if ((tok_type(t1) != RPAREN) || (re == NULL)) { free_re(re); /* ZZZZZZZZZZZZZZZZZZ */ free(ret); free_tok(t1); return NULL; } free_tok(t1); if (Size(stk) > 2) { temp = stk->next; stk->next = cat2(&temp); /* condense CAT nodes */ if (stk->next == NULL) { free_re(re); /* ZZZZZZZZZZZZZZZZZZ */ free(ret); return NULL; } else stk->size = stk->next->size + 1; } break; case OPSTAR: if ((ret = wrap(&stk, OPSTAR)) == NULL) { free_tok(next_token); return NULL; } free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */ break; case OPOPT: if ((ret = wrap(&stk, OPOPT)) == NULL) { free_tok(next_token); return NULL; } free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */ break; case OPALT: if ((ret = cat2(&stk)) == NULL) { free_tok(next_token); return NULL; } re = parse_re(s, end); if (re == NULL) { free(ret); free_tok(next_token); return NULL; } if (mk_alt(&stk, re) == NULL) { free(ret); free_tok(next_token); return NULL; } free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */ break; case LITERAL: if ((ret = Push(&stk, tok_val(next_token))) == NULL) { free_tok(next_token); return NULL; } free(next_token); if (Size(stk) > 2) { temp = stk->next; stk->next = cat2(&temp); /* condense CAT nodes */ if (stk->next == NULL) { free(ret); return NULL; } else stk->size = stk->next->size + 1; } break; default: printf("parse_re: unknown token type %d\n", tok_type(next_token)); free_tok(next_token); /* ZZZZZZZZZZZZZZZZZZ */ break; } /* free_tok(next_token); */ } } /* parse() essentially just calls parse_re(). Its purpose is to stick an end-of-string token at the end of the syntax tree returned by parse_re(). It should really be done in parse_re, but the recursion there makes it more desirable to have it here. */ Re_node parse(s) char *s; { Re_node val, tree, temp; Stack top, stk = NULL; if ((tree = parse_re(&s, NUL)) == NULL) return NULL; if (Push(&stk, tree) == NULL) return NULL; temp = mk_leaf(EOS, C_LIT, NUL, NULL); if (temp == NULL || Push(&stk, temp) == NULL) return NULL; final_pos = --pos_cnt; top = cat2(&stk); val = Top(top); free(top); return val; } agrep-4.17/preprocess.c0100644001123100001460000002211507545142323013256 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* substitute metachar with special symbol */ /* if regularr expression, then set flag REGEX */ /* if REGEX and MULTIPAT then report error message, */ /* -w only for single word pattern. If WORDBOUND & MULTIWORD error */ /* process start of line, endof line symbol, */ /* process -w WORDBOUND option, append special symbol at begin&end of */ /* process -d option before this routine */ /* the delimiter pattern is in D_pattern (need to end with '; ') */ /* if '-t' (suggestion: how about -B) the pattern is passed to sgrep */ /* and doesn't go here */ /* in that case, -d is ignored? or not necessary */ /* upon return, Pattern contains the pattern to be processed by maskgen */ /* D_pattern contains transformed D_pattern */ #include "agrep.h" #include extern int PAT_FILE, PAT_BUFFER; extern ParseTree *AParse; extern int WHOLELINE, REGEX, FASTREGEX, RE_ERR, DELIMITER, TAIL, WORDBOUND; extern int HEAD; extern CHAR Progname[]; extern int D_length, tc_D_length; extern CHAR tc_D_pattern[MaxDelimit * 2]; extern int table[WORD][WORD]; extern int agrep_initialfd; extern int EXITONERROR; extern int errno; extern int multifd; extern char *multibuf; extern int multilen; extern int anum_terminals; extern ParseTree aterminals[MAXNUM_PAT]; extern char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; /* interfacing with tcompress */ extern int AComplexBoolean; int preprocess(D_pattern, Pattern) /* need two parameters */ CHAR D_pattern[], Pattern[]; { CHAR temp[Maxline], *r_pat, *old_pat; /* r_pat for r.e. */ CHAR old_D_pat[MaxDelimit*2]; int i, j=0, rp=0, m, t=0, num_pos, ANDON = 0; int d_end ; int IN_RANGE=0; int ret1, ret2; #if DEBUG fprintf(stderr, "preprocess: m=%d, pat=%s, PAT_FILE=%d, PAT_BUFFER=%d\n", strlen(Pattern), Pattern, PAT_FILE, PAT_BUFFER); #endif if ((m = strlen(Pattern)) <= 0) return 0; if (PAT_FILE || PAT_BUFFER) return 0; REGEX = OFF; FASTREGEX = OFF; old_pat = Pattern; /* to remember the starting position */ /* Check if pattern is a concatenation of ands OR ors of simple patterns */ multibuf = (char *)malloc(m * 2 + 2); /* worst case: a,a,a,a,a,a */ if (multibuf == NULL) goto normal_processing; /* if (WORDBOUND) goto normal_processing; */ multilen = 0; AParse = 0; ret1 = ret2 = 0; if (((ret1 = asplit_pattern(Pattern, m, aterminals, &anum_terminals, &AParse)) <= 0) || /* can change the pattern if simple boolean with {} */ ((ret2 = asplit_terminal(0, anum_terminals, multibuf, &multilen)) <= 0) || ((ret2 == 1) && !(aterminals[0].op & NOTPAT))) { /* must do normal processing */ if (AComplexBoolean && (AParse != NULL)) destroy_tree(AParse); /* so that direct exec invocations don't use AParse by mistake! */ #if DEBUG fprintf(stderr, "preprocess: split_pat = %d, split_term = %d, #terms = %d\n", ret1, ret2, anum_terminals); #endif /*DEBUG*/ /* if (ret2 == 1) { strcpy(Pattern, aterminals[0].data.leaf.value); m = strlen(Pattern); } */ m = strlen(Pattern); AParse = 0; free(multibuf); multibuf = NULL; multilen = 0; goto normal_processing; } /* This is quick processing */ if (AParse != 0) { /* successfully converted to ANDPAT/ORPAT */ PAT_BUFFER = 1; /* printf("preprocess(): converted= %d, patterns= %s", AParse, multibuf); */ /* Now I have to process the delimiter if any */ if (DELIMITER) { /* D_pattern is "; ", D_length is 1 + length of string PAT: see agrep.c/'d' */ preprocess_delimiter(D_pattern+1, D_length - 1, D_pattern, &D_length); /* D_pattern is the exact stuff we want to match, D_length is its strlen */ if ((tc_D_length = quick_tcompress(FREQ_FILE, HASH_FILE, D_pattern, D_length, tc_D_pattern, MaxDelimit*2, TC_EASYSEARCH)) <= 0) { strcpy(tc_D_pattern, D_pattern); tc_D_length = D_length; } /* printf("mgrep's delim=%s,%d tc_delim=%s,%d\n", D_pattern, D_length, tc_D_pattern, tc_D_length); */ } return 0; } /* else either unknown character, one simple pattern or none at all */ normal_processing: for(i=0; i< m; i++) { if(Pattern[i] == '\\') i++; else if(Pattern[i] == '|' || Pattern[i] == '*') REGEX = ON; } r_pat = (CHAR *) malloc(strlen(Pattern)+2*strlen(D_pattern) + 8); /* bug-report, From: Chris Dalton */ strcpy(temp, D_pattern); d_end = t = strlen(temp); /* size of D_pattern, including '; ' */ if (WHOLELINE) { temp[t++] = LANGLE; temp[t++] = NNLINE; temp[t++] = RANGLE; temp[t] = '\0'; strcat(temp, Pattern); m = strlen(temp); temp[m++] = LANGLE; temp[m++] = '\n'; temp[m++] = RANGLE; temp[m] = '\0'; } else { if (WORDBOUND) { temp[t++] = LANGLE; temp[t++] = WORDB; temp[t++] = RANGLE; temp[t] = '\0'; } strcat(temp, Pattern); m = strlen(temp); if (WORDBOUND) { temp[m++] = LANGLE; temp[m++] = WORDB; temp[m++] = RANGLE; } temp[m] = '\0'; } /* now temp contains augmented pattern , m it's size */ D_length = 0; for (i=0, j=0; i< d_end-2; i++) { switch(temp[i]) { case '\\' : i++; Pattern[j++] = temp[i]; old_D_pat[D_length++] = temp[i]; break; case '<' : Pattern[j++] = LANGLE; break; case '>' : Pattern[j++] = RANGLE; break; case '^' : Pattern[j++] = '\n'; old_D_pat[D_length++] = temp[i]; break; case '$' : Pattern[j++] = '\n'; old_D_pat[D_length++] = temp[i]; break; default : Pattern[j++] = temp[i]; old_D_pat[D_length++] = temp[i]; break; } } if(D_length > MAXDELIM) { fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", Progname, MAXDELIM); free(r_pat); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } Pattern[j++] = ANDPAT; old_D_pat[D_length] = '\0'; strcpy(D_pattern, old_D_pat); D_length++; /* Pattern[j++] = ' '; */ Pattern[j] = '\0'; rp = 0; if(REGEX) { r_pat[rp++] = '.'; /* if REGEX: always append '.' in front */ r_pat[rp++] = '('; Pattern[j++] = NOCARE; HEAD = ON; } for (i=d_end; i < m ; i++) { switch(temp[i]) { case '\\': i++; Pattern[j++] = temp[i]; r_pat[rp++] = 'o'; /* the symbol doesn't matter */ break; case '#': FASTREGEX = ON; if(REGEX) { Pattern[j++] = NOCARE; r_pat[rp++] = '.'; r_pat[rp++] = '*'; break; } Pattern[j++] = WILDCD; break; case '(': Pattern[j++] = LPARENT; r_pat[rp++] = '('; break; case ')': Pattern[j++] = RPARENT; r_pat[rp++] = ')'; break; case '[': Pattern[j++] = LRANGE; r_pat[rp++] = '['; IN_RANGE = ON; break; case ']': Pattern[j++] = RRANGE; r_pat[rp++] = ']'; IN_RANGE = OFF; break; case '<': Pattern[j++] = LANGLE; break; case '>': Pattern[j++] = RANGLE; break; case '^': if (temp[i-1] == '[') Pattern[j++] = NOTSYM; else Pattern[j++] = '\n'; r_pat[rp++] = '^'; break; case '$': Pattern[j++] = '\n'; r_pat[rp++] = '$'; break; case '.': Pattern[j++] = NOCARE; r_pat[rp++] = '.'; break; case '*': Pattern[j++] = STAR; r_pat[rp++] = '*'; break; case '|': Pattern[j++] = ORSYM; r_pat[rp++] = '|'; break; case ',': Pattern[j++] = ORPAT; RE_ERR = ON; break; case ';': if(ANDON) RE_ERR = ON; Pattern[j++] = ANDPAT; ANDON = ON; break; case '-': if(IN_RANGE) { Pattern[j++] = HYPHEN; r_pat[rp++] = '-'; } else { Pattern[j++] = temp[i]; r_pat[rp++] = temp[i]; } break; case NNLINE : Pattern[j++] = temp[i]; r_pat[rp++] = 'N'; break; default: Pattern[j++] = temp[i]; r_pat[rp++] = temp[i]; break; } } if(REGEX) { /* append ').' at end of regular expression */ r_pat[rp++] = ')'; r_pat[rp++] = '.'; Pattern[j++] = NOCARE; TAIL = ON; } Pattern[j] = '\0'; m = j; r_pat[rp] = '\0'; if(REGEX) { if(DELIMITER || WORDBOUND) { fprintf(stderr, "%s: -d or -w option is not supported for this pattern\n", Progname); free(r_pat); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(RE_ERR) { fprintf(stderr, "%s: illegal regular expression\n", Progname); free(r_pat); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } while(*Pattern != NOCARE && m-- > 0) Pattern++; /* poit to . */ num_pos = init(r_pat, table); if(num_pos <= 0) { fprintf(stderr, "%s: illegal regular expression\n", Progname); free(r_pat); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(num_pos > 30) { fprintf(stderr, "%s: regular expression too long\n", Progname); free(r_pat); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } strcpy(old_pat, Pattern); /* do real change to the Pattern to be returned */ free(r_pat); return 0; } /* if regex */ free(r_pat); return 0; } agrep-4.17/putils.c0100644001123100001460000000560507010116362012405 0ustar friurz/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. */ #include "agrep.h" int is_complex_boolean(buffer, len) CHAR *buffer; int len; { int i = 0; CHAR cur = '\0'; while (i < len) { if (buffer[i] == '\\') i+=2; else if (buffer[i] == ',') { if ((cur == ';') || (cur == '~')) return 1; else cur = ','; i++; } else if (buffer[i] == ';') { if ((cur == ',') || (cur == '~')) return 1; else cur = ';'; i++; } /* else if ((buffer[i] == '~') || (buffer[i] == '{') || (buffer[i] == '}')) { */ else if (buffer[i] == '~') { /* even if pattern has just ~s... user must use -v option for single NOT */ return 1; } else i++; } return 0; } /* The possible tokens are: ; , a e ~ { } */ int get_token_bool(buffer, len, ptr, tokenbuf, tokenlen) CHAR *buffer, *tokenbuf; int len, *ptr, *tokenlen; { if ((*ptr>=len) || (buffer[*ptr] == '\n') || (buffer[*ptr] == '\0')) return 'e'; while ((*ptr=len) || (buffer[*ptr] == '\n') || (buffer[*ptr] == '\0')) return 'e'; if ((buffer[*ptr] == ',') || (buffer[*ptr] == ';') || (buffer[*ptr] == '~') || (buffer[*ptr] == '{') || (buffer[*ptr] == '}')) { tokenbuf[0] = buffer[*ptr]; *tokenlen = 1; return buffer[(*ptr)++]; } *tokenlen = 0; if (buffer[*ptr] == '\\') { tokenbuf[(*tokenlen)++] = buffer[(*ptr)++]; tokenbuf[(*tokenlen)++] = buffer[(*ptr)++]; } else tokenbuf[(*tokenlen)++] = buffer[(*ptr)++]; while ( (*ptr= len) */ } void print_tree(t, level) ParseTree *t; { int i; if (t == NULL) printf("NULL"); else if (t->type == LEAF) { for (i=0; iop, t->terminalindex, t->data.leaf.value); } else if (t->type == INTERNAL) { if (t->data.internal.left != NULL) print_tree(t->data.internal.left, level + 1); for (i=0; iop); if (t->data.internal.right != NULL) print_tree(t->data.internal.right, level + 1); } } void destroy_tree(t) ParseTree *t; { if (t == NULL) return; if (t->type == LEAF) { free(t->data.leaf.value); /* t itself should not be freed: static allocation */ } else if (t->type == INTERNAL) { if (t->data.internal.left != NULL) destroy_tree(t->data.internal.left); if (t->data.internal.right != NULL) destroy_tree(t->data.internal.right); free(t); } } agrep-4.17/re.h0100644001123100001460000000704407010116362011477 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /************************************************************* * * * Macros defining special characters. * * * *************************************************************/ #define NUL '\0' #define ASCII_MIN '\001' #define ASCII_MAX '\177' /************************************************************* * * * Macros defining lexical categories. * * * *************************************************************/ #define C_LIT 0 /* individual character literal */ #define C_SET 1 /* character set literal */ #define EOS 0 /* end-of-string */ #define LITERAL 1 #define OPSTAR 2 #define OPALT 3 #define OPOPT 4 #define OPCAT 5 #define LPAREN 6 #define RPAREN 7 /************************************************************* * * * Macros for manipulating syntax tree nodes. * * * *************************************************************/ #define lit_type(x) (x->l_type) #define lit_pos(x) (x->pos) #define lit_char(x) ((x->val).c) #define lit_cset(x) ((x->val).cset) #define tok_type(x) (x->type) #define tok_val(x) (x->val) #define tok_op(x) (x->val->op) #define tok_lit(x) ((x->val->refs).lit) #define Op(x) (x->op) #define Lit(x) ((x->refs).lit) #define Child(x) ((x->refs).child) #define Lchild(x) ((x->refs).children.l_child) #define Rchild(x) ((x->refs).children.r_child) #define Nullable(x) (x->nullable) #define Firstpos(x) (x->firstposn) #define Lastpos(x) (x->lastposn) /************************************************************* * * * Macros for manipulating DFA states and sets of states. * * * *************************************************************/ #define Positions(x) (x->posns) #define Final_St(x) (x->final) #define Goto(x, c) ((x->trans)[c]) #define Next_State(x) ((x)->next_state) /*************************************************************/ #define new_node(type, l, x) \ {\ extern void *malloc();\ \ (l) = (type) malloc(sizeof(*(x)));\ if ((l) == NULL) {\ fprintf(stderr, "malloc failure in new_node\n");\ exit(2);\ }\ memset((l), '\0', sizeof(*(x)));\ } typedef struct { /* character range literals */ char low_bd, hi_bd; } *Ch_Range; typedef struct ch_set { /* character set literals */ Ch_Range elt; /* rep. as list of ranges */ struct ch_set *rest; } *Ch_Set; typedef struct { /* regular expression literal */ int pos; /* position in syntax tree */ short l_type; /* type of literal */ union { char c; /* for character literals */ Ch_Set cset; /* for character sets */ } val; } *Re_Lit, *(*Re_lit_array)[]; typedef struct pnode { int posnum; struct pnode *nextpos; } *Pset, *(*Pset_array)[]; typedef struct rnode { /* regular expression node */ short op; /* operator at that node */ union { Re_Lit lit; /* child is a leaf node */ struct rnode *child; /* child of unary op */ struct { struct rnode *l_child; struct rnode *r_child; } children; /* children of binary op */ } refs; short nullable; Pset firstposn, lastposn; } *Re_node; typedef struct { /* token node */ short type; Re_node val; } *Tok_node; typedef struct snode { Re_node val; int size; struct snode *next; } *Stack; typedef struct dfa_st { Pset posns; int final; /* 1 if the state is a final state, 0 o/w */ struct dfa_st *trans[128]; } *Dfa_state; typedef struct dfa_stset { Dfa_state st; struct dfa_stset *next_state; } *Dfa_state_set; agrep-4.17/recursive.c0100644001123100001460000000637307010116362013077 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* The function of the program is to traverse the direcctory tree and collect paath names. This program is derived from the C-programming language book Originally, the program open a directory file as a regular file. But it won't work. We have to open a directory file using opendir system call, and use readdir() to read each entry of the directory. */ #include "autoconf.h" /* ../libtemplate/include */ #include #include #if ISO_CHAR_SET #include #endif #if HAVE_DIRENT_H # include # define NAMLEN(dirent) strlen((dirent)->d_name) #else # define dirent direct # define NAMLEN(dirent) (dirent)->d_namlen # if HAVE_SYS_NDIR_H # include # endif # if HAVE_SYS_DIR_H # include # endif # if HAVE_NDIR_H # include # endif #endif #include #include #define BUFSIZE 256 #define DIRSIZE 14 #define max_list 10 #ifndef S_ISREG #define S_ISREG(mode) (0100000&(mode)) #endif #ifndef S_ISDIR #define S_ISDIR(mode) (0040000&(mode)) #endif char *file_list[max_list*2]; int fdx=0; /* index of file_List */ extern int Numfiles; char name_buf[BUFSIZE]; void directory(); static void treewalk(); /* returns -1 if error, num of matches >= 0 otherwise */ int recursive(argc, argv) int argc; char **argv; { int i,j; int num = 0, ret; for(i=0; i< argc; i++) { strcpy(name_buf, argv[i]); treewalk(name_buf); if(fdx > 0) { Numfiles = fdx; if ((ret = exec(3, file_list)) == -1) return -1; num += ret; for(j=0; j 0) { strcpy(buf, *++argv); treewalk(buf); } } */ static void treewalk(name) char *name; { struct stat stbuf; int i; extern void *malloc(); /* printf(" In treewalk\n"); */ if(my_lstat(name, &stbuf) == -1) { fprintf(stderr, "permission denied or non-existent: %s\n", name); return; } if ((stbuf.st_mode & S_IFMT) == S_IFLNK) { return; } if (( stbuf.st_mode & S_IFMT) == S_IFDIR) directory(name); else { file_list[fdx] = (char *)malloc(BUFSIZE); strcpy(file_list[fdx++], name); /* printf(" %s\n", name); */ if(fdx >= max_list) { Numfiles = fdx; exec(3, file_list); for(i=0; i= name+BUFSIZE ) /* name too long */ { fprintf(stderr, "name too long: %.32s...\n", name); return; } if((dirp = opendir(name)) == NULL) { fprintf(stderr, "permission denied: %s\n", name); return; } *nbp++ = '/'; *nbp = '\0'; for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { if (dp->d_name[0] == '\0' || strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..")==0) goto CONT; /* printf("dp->d_name = %s\n", dp->d_name); */ strcpy(nbp, dp->d_name); treewalk(name); CONT: ; } closedir (dirp); *--nbp = '\0'; /* restore name */ } agrep-4.17/sgrep.c0100644001123100001460000023557707007374372012237 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ #include #include #include "agrep.h" #include #undef MAXSYM #define MAXSYM 256 #define MAXMEMBER 8192 #define CHARTYPE unsigned char #undef MaxError /* don't use agrep.h definition */ #define MaxError 20 #define MAXPATT 256 #undef MAXLINE #define MAXLINE 1024 #undef MAXNAME #define MAXNAME 256 #undef MaxCan /* don't use agrep.h definition */ #define MaxCan 2048 #define BLOCKSIZE 16384 #define MAX_SHIFT_2 4096 #undef ON #define ON 1 #undef OFF #define OFF 0 #define LOG_ASCII 8 #define LOG_DNA 3 #define MAXMEMBER_1 65536 #define LONG_EXAC 20 #define LONG_APPX 24 #if ISO_CHAR_SET #define W_DELIM 256 #else #define W_DELIM 128 #endif #include extern int tuncompressible(); extern int quick_tcompress(); extern int quick_tuncompress(); extern int DELIMITER, OUTTAIL; extern int D_length, tc_D_length; extern unsigned char D_pattern[MaxDelimit *2], tc_D_pattern[MaxDelimit *2]; extern int LIMITOUTPUT, LIMITPERFILE, INVERSE; extern int CurrentByteOffset; extern int BYTECOUNT; extern int PRINTOFFSET; extern int PRINTRECORD; extern int CONSTANT, COUNT, FNAME, SILENT, FILENAMEONLY, prev_num_of_matched, num_of_matched, PRINTFILETIME; extern int DNA ; /* DNA flag is set in checksg when pattern is DNA pattern and p_size > 16 */ extern WORDBOUND, WHOLELINE, NOUPPER; extern unsigned char CurrentFileName[], Progname[]; extern long CurrentFileTime; extern unsigned Mask[]; extern unsigned endposition; extern int agrep_inlen; extern CHARTYPE *agrep_inbuffer; extern int agrep_initialfd; extern FILE *agrep_finalfp; extern int agrep_outpointer; extern int agrep_outlen; extern CHARTYPE * agrep_outbuffer; extern int NEW_FILE, POST_FILTER; extern int EXITONERROR; extern int errno; extern int TCOMPRESSED; extern int EASYSEARCH; extern char FREQ_FILE[MAX_LINE_LEN], HASH_FILE[MAX_LINE_LEN], STRING_FILE[MAX_LINE_LEN]; #if MEASURE_TIMES /* timing variables */ extern int OUTFILTER_ms; extern int FILTERALGO_ms; extern int INFILTER_ms; #endif /*MEASURE_TIMES*/ unsigned char BSize; /* log_c m */ unsigned char char_map[MAXSYM]; /* data area */ int shift_1; CHARTYPE SHIFT[MAXSYM]; CHARTYPE MEMBER[MAXMEMBER]; CHARTYPE pat[MAXPATT]; unsigned Hashmask; char MEMBER_1[MAXMEMBER_1]; CHARTYPE TR[MAXSYM]; static void initmask(); static void am_preprocess(); static void m_preprocess(); static void prep(); static void prep4(); static void prep_bm(); /* * General idea behind output processing with delimiters, inverse, compression, etc. * CAUTION: In compressed files, we can search ONLY for simple patterns or their ;,. * Attempts to search for complex patterns / with errors might lead to spurious matches. * 1. Once we find the match, go back and forward to get the delimiters that surround * the matched region. * 2. If it is a compressed file, verify that the match is "real" (compressed files * can have pseudo matches hence this filtering step is required). * 3. Increment num_of_matched. * 4. Process some output options which print stuff before the matched region is * printed. * 5. If there is compression, decomress and output the matched region. Otherwise * just output it as is. Remember, from step (1) we know the matched region. * 6. If inverse is set, then we must keep track of the end of the last matched region * in the variable lastout. When there is a match, we must print everything from * lastout to the beginning of the current matched region (curtextbegin) and then * update lastout to point to the end of the current matched region (curtextend). * ALSO: if we exit from the main loops, we must output everything from the end * of the last matched region to the end of the input buffer. * 7. Delimiter handling in complex patterns is different: there the search is done * for a boolean and of the delimiter pattern and the actual pattern. */ /* skips over escaped characters */ unsigned char * mystrchr(s, c) unsigned char *s; int c; { unsigned char *t = s; while (*t) { if (*t == '\\') t++; else if (c == *t) return t; t ++; } return NULL; } void char_tr(pat, m) unsigned char *pat; int *m; { int i; unsigned char temp[MAXPATT]; for(i=0; i1) && (pat[m-2] != '\\') && ((pat[m-1] == '^') || (pat[m-1] == '$'))) pat[m-1] = '\n';\ }\ /* whether constant or not, interpret the escape character */\ for (k=0; k= MAXPATT) {\ fprintf(stderr, "%s: pattern too long (has > %d chars)\n", Progname, MAXPATT);\ if (!EXITONERROR) {\ errno = AGREP_ERROR;\ return -1;\ }\ else exit(2);\ }\ if(D == 0) {\ if(m > LONG_EXAC) m_preprocess(pat);\ else prep_bm(pat, m);\ }\ else if (DNA) prep4(pat, m);\ else if(m >= LONG_APPX) am_preprocess(pat);\ else {\ prep(pat, m, D);\ initmask(pat, Mask, m, 0, &endposition);\ } #if AGREP_POINTER if (fd != -1) { #endif /*AGREP_POINTER*/ alloc_buf(fd, &text, 2*BlockSize+2*Max_record+MAXPATT); text[offset-1] = '\n'; /* initial case */ for(i=0; i < Max_record; i++) text[i] = 0; /* security zone */ start = offset; if(WHOLELINE) { start--; CurrentByteOffset --; } while( (num_read = fill_buf(fd, text+offset, 2*BlockSize)) > 0) { buf_end = end = offset + num_read -1 ; oldCurrentByteOffset = CurrentByteOffset; if (first_time) { if ((TCOMPRESSED == ON) && tuncompressible(text+offset, num_read)) { EASYSEARCH = text[offset+SIGNATURE_LEN-1]; start += SIGNATURE_LEN; CurrentByteOffset += SIGNATURE_LEN; if (!EASYSEARCH) { fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); } #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ if (samepattern || ((newm = quick_tcompress(FREQ_FILE, HASH_FILE, pat, m, newpat, Max_record-8, EASYSEARCH)) > 0)) { oldm = m; oldpat = pat; m = newm; pat = newpat; } #if MEASURE_TIMES gettimeofday(&finalt, NULL); INFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else TCOMPRESSED = OFF; PROCESS_PATTERN /* must be AFTER we know that it is a compressed pattern... */ for(i=1; i<=m; i++) text[2*BlockSize+offset+i] = pat[m-1]; /* to make sure the skip loop in bm() won't go out of bound in later iterations */ first_time = 0; } if (!DELIMITER) { while ((text[end] != '\n') && (end > offset)) end--; text[start-1] = '\n'; } else { unsigned char *newbuf = text + end + 1; newbuf = backward_delimiter(newbuf, text+offset, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */ if (newbuf < text+offset+D_length) newbuf = text + end + 1; end = newbuf - text - 1; memcpy(text+start-D_length, D_pattern, D_length); } residue = buf_end - end + 1 ; /* SGREP_PROCESS */ /* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */ if(D==0) { if(m > LONG_EXAC) { if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); return -1; } } else { if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); return -1; } } } else { if(DNA) { if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) { free_buf(fd, text); return -1; } } else { if(m >= LONG_APPX) { if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); return -1; } } else { if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); return -1; } } } } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; return 0; } CurrentByteOffset = oldCurrentByteOffset + end - start + 1; /* for a new iteration: avoid complicated calculations below */ start = offset - residue ; if(start < Max_record) { start = Max_record; } /* strncpy(text+start, text+end, residue); */ memcpy(text+start, text+end, residue); start++; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) { free_buf(fd, text); return 0; /* done */ } } /* end of while(num_read = ...) */ if (!DELIMITER) { text[start-1] = '\n'; text[start+residue] = '\n'; } else { if (start > D_length) memcpy(text+start-D_length, D_pattern, D_length); memcpy(text+start+residue, D_pattern, D_length); } end = start + residue - 2; if(residue > 1) { /* SGREP_PROCESS */ /* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */ if(D==0) { if(m > LONG_EXAC) { if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); return -1; } } else { if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); return -1; } } } else { if(DNA) { if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) { free_buf(fd, text); return -1; } } else { if(m >= LONG_APPX) { if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); return -1; } } else { if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); return -1; } } } } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; return 0; } } free_buf(fd, text); return 0; #if AGREP_POINTER } else { /* as if only one iteration of the while-loop and offset = 0 */ tempbuf = (CHARTYPE*)malloc(m); text = (CHARTYPE *)agrep_inbuffer; num_read = agrep_inlen; start = 0; buf_end = end = num_read - 1; #if 0 if (WHOLELINE) { start --; CurrentByteOffset --; } #endif if ((TCOMPRESSED == ON) && tuncompressible(text+1, num_read)) { EASYSEARCH = text[offset+SIGNATURE_LEN-1]; start += SIGNATURE_LEN; CurrentByteOffset += SIGNATURE_LEN; if (!EASYSEARCH) { fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); } #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ if (samepattern || ((newm = quick_tcompress(FREQ_FILE, HASH_FILE, pat, m, newpat, Max_record-8, EASYSEARCH)) > 0)) { oldm = m; oldpat = pat; m = newm; pat = newpat; } #if MEASURE_TIMES gettimeofday(&finalt, NULL); INFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else TCOMPRESSED = OFF; PROCESS_PATTERN /* must be after we know whether it is compressed or not */ memcpy(tempbuf, text+end+1, m); /* save portion being overwritten */ for(i=1; i<=m; i++) text[end+i] = pat[m-1]; /* to make sure the skip loop in bm() won't go out of bound in later iterations */ if (!DELIMITER) while(text[end] != '\n' && end > 1) end--; else { unsigned char *newbuf = text + end + 1; newbuf = backward_delimiter(newbuf, text, D_pattern, D_length, OUTTAIL); /* see agrep.c/'d' */ if (newbuf < text+offset+D_length) newbuf = text + end + 1; end = newbuf - text - 1; } /* text[0] = text[end] = r_newline; : the user must ensure that the delimiter is there at text[0] and occurs somewhere before text[end ] */ /* An exact copy of the above SGREP_PROCESS */ /* No harm in sending a few extra parameters even if they are unused: they are not accessed in monkey*()s */ if(D==0) { if(m > LONG_EXAC) { if (-1 == monkey(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return -1; } } else { if (-1 == bm(pat, m, text+start, text+end, oldpat, oldm)) { free_buf(fd, text); memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return -1; } } } else { if(DNA) { if (-1 == monkey4( pat, m, text+start, text+end, D , oldpat, oldm )) { free_buf(fd, text); memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return -1; } } else { if(m >= LONG_APPX) { if (-1 == a_monkey(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return -1; } } else { if (-1 == agrep(pat, m, text+start, text+end, D, oldpat, oldm)) { free_buf(fd, text); memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return -1; } } } } if(FILENAMEONLY && (num_of_matched - prev_num_of_matched) && (NEW_FILE || !POST_FILTER)) { /* externally set */ if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", CurrentFileName); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "\n"); else { if (agrep_outpointer+1>=agrep_outlen) { OUTPUT_OVERFLOW; free_buf(fd, text); return -1; } else agrep_outbuffer[agrep_outpointer++] = '\n'; } free_buf(fd, text); NEW_FILE = OFF; } memcpy(text+end+1, tempbuf, m); /* restore */ free(tempbuf); return 0; } #endif /*AGREP_POINTER*/ } /* end sgrep */ /* SUN: bm assumes that the content of text[n]...text[n+m-1] is pat[m-1] such that the skip loop is guaranteed to terminated */ int bm(pat, m, text, textend, oldpat, oldm) CHARTYPE *text, *textend, *pat, *oldpat; int m, oldm; { int PRINTED = 0; register int shift; register int m1, j, d1; CHARTYPE *textbegin = text; int newlen; CHARTYPE *textstart; CHARTYPE *curtextbegin; CHARTYPE *curtextend; #if MEASURE_TIMES struct timeval initt, finalt; #endif CHARTYPE *lastout = text; d1 = shift_1; /* at least 1 */ m1 = m - 1; shift = 0; while (text <= textend) { textstart = text; shift = SHIFT[*(text += shift)]; while(shift) { shift = SHIFT[*(text += shift)]; shift = SHIFT[*(text += shift)]; shift = SHIFT[*(text += shift)]; } CurrentByteOffset += text - textstart; j = 0; while(TR[pat[m1 - j]] == TR[*(text - j)]) { if(++j == m) break; /* if statement can be saved, but for safty ... */ } if (j == m ) { if(text > textend) return 0; if(WORDBOUND) { /* if(isalnum(*(unsigned char *)(text+1))) goto CONT; --> fixed by SHIOZAKI Takehiko */ if((text+1 <= textend) && isalnum(*(unsigned char *)(text+1)) && isalnum(*(unsigned char *)text)) { shift = 1; /* bg 4/27/97 */ goto WCONT; /* as if there was no match */ } /* if(isalnum(*(unsigned char *)(text-m))) goto CONT; --> fixed by SHIOZAKI Takehiko */ if((textbegin <= (text-m)) && isalnum(*(unsigned char *)(text-m)) && isalnum(*(unsigned char *)(text-m+1))) { shift = 1; /* bg 4/27/97 */ goto WCONT; /* as if there was no match */ } /* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */ } if (TCOMPRESSED == ON) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin; /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); if (!OUTTAIL) { curtextend = forward_delimiter(curtextbegin+D_length/*text-m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } else { curtextend = forward_delimiter(curtextbegin/*text-m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } } else { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); if (!OUTTAIL) { curtextend = forward_delimiter(curtextbegin+D_length/*text-m*/, textend, D_pattern, D_length, OUTTAIL); } else { curtextend = forward_delimiter(curtextbegin/*text-m*/, textend, D_pattern, D_length, OUTTAIL); } } } if (TCOMPRESSED == ON) { #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH)) goto CONT; /* as if there was no match */ #if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ num_of_matched++; if(FILENAMEONLY) return 0; if(!COUNT) { if (!INVERSE) { if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d=", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } #if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } } else { /* INVERSE */ if (!SILENT) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp); else { if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout); agrep_outpointer += (curtextbegin - lastout); } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ } /* !SILENT */ } /* INVERSE */ } else { /* COUNT */ CurrentByteOffset += textbegin - text; text = textbegin; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ CONT: if (m == 1) shift = 0; else shift = 1; /* ZZZZZZZZZZZZZZZZ check it out later */ } else shift = d1; WCONT: ; } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0; } /* initmask() initializes the mask table for the pattern */ /* endposition is a mask for the endposition of the pattern */ /* endposition will contain k mask bits if the pattern contains k fragments */ static void initmask(pattern, Mask, m, D, endposition) CHARTYPE *pattern; unsigned *Mask; register int m, D; unsigned *endposition; { register unsigned Bit1, c; register int i, j, frag_num; /* Bit1 = 1 << 31;*/ /* the first bit of Bit1 is 1, others 0. */ Bit1 = (unsigned)0x80000000; frag_num = D+1; *endposition = 0; for (i = 0; i < frag_num; i++) *endposition = *endposition | (Bit1 >> i); *endposition = *endposition >> (m - frag_num); for(i = 0; i < m; i++) if (pattern[i] == '^' || pattern[i] == '$') { pattern[i] = '\n'; } for(i = 0; i < MAXSYM; i++) Mask[i] = ~0; for(i = 0; i < m; i++) /* initialize the mask table */ { c = pattern[i]; for ( j = 0; j < m; j++) if( c == pattern[j] ) Mask[c] = Mask[c] & ~( Bit1 >> j ) ; } } static void prep(Pattern, M, D) /* preprocessing for partitioning_bm */ CHARTYPE *Pattern; /* can be fine-tuned to choose a better partition */ register int M, D; { register int i, j, k, p, shift; register unsigned m; unsigned hash, b_size = 3; m = M/(D+1); p = M - m*(D+1); for (i = 0; i < MAXSYM; i++) SHIFT[i] = m; for (i = M-1; i>=p ; i--) { shift = (M-1-i)%m; hash = Pattern[i]; if((int)(SHIFT[hash]) > (int)(shift)) SHIFT[hash] = shift; } #ifdef DEBUG for(i=0; i Candidate[cdx][1]) { Candidate[++cdx][0] = i-M-D-2; Candidate[cdx][1] = i+M+D; } else Candidate[cdx][1] = i+M+D; shift = d1; } else shift = d1; } CurrentByteOffset += (textbegin - text); text = textbegin; n = textend - textbegin; r_newline = '\n'; /* for those candidate areas, find the D-error matches */ if(Candidate[1][0] < 0) Candidate[1][0] = 0; endpos = endposition; /* the mask table and the endposition */ /* Bit1 = (1 << 31); */ Bit1 = (unsigned)0x80000000; oldbyteoffset = CurrentByteOffset; for(round = 0; round <= cdx; round++) { i = Candidate[round][0] ; if(Candidate[round][1] > n) Candidate[round][1] = n; if(i < 0) i = 0; CurrentByteOffset = oldbyteoffset+i; R1[0] = R2[0] = ~0; R1[1] = R2[1] = ~Bit1; for(k = 1; k <= D; k++) R1[k] = R2[k] = (R1[k-1] >> 1) & R1[k-1]; while (i < Candidate[round][1]) { c = text[i++]; CurrentByteOffset ++; if(c == r_newline) { for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 ); } r1 = Mask[c]; R1[0] = (R2[0] >> 1) | r1; for(k=1; k<=D; k++) R1[k] = ((R2[k] >> 1) | r1) & R2[k-1] & ((R1[k-1] & R2[k-1]) >> 1); if((R1[D] & endpos) == 0) { num_of_matched++; if(FILENAMEONLY) return 0; currentpos = i; if(i <= lastend) { CurrentByteOffset += lastend - i; i = lastend; } else { int oldcurrentpos = currentpos; if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1; CurrentByteOffset += currentpos - oldcurrentpos; i = currentpos; } lastend = i; for(k=0; k<=D; k++) R1[k] = R2[k] = ~0; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } /* copying the code to save a few instructions. you need to understand the shift-or algorithm to figure this one... */ c = text[i++]; CurrentByteOffset ++; if(c == r_newline) { for(k = 0 ; k <= D; k++) R1[k] = R2[k] = (~0 ); } r1 = Mask[c]; R2[0] = (R1[0] >> 1) | r1; for(k = 1; k <= D; k++) R2[k] = ((R1[k] >> 1) | r1) & R1[k-1] & ((R1[k-1] & R2[k-1]) >> 1); if((R2[D] & endpos) == 0) { currentpos = i; num_of_matched++; if(FILENAMEONLY) return 0; if(i <= lastend) { CurrentByteOffset += lastend - i; i = lastend; } else { int oldcurrentpos = currentpos; if (-1 == s_output(text, ¤tpos, textbegin, textend, &lastout, pat, M, oldpat, oldM)) return -1; CurrentByteOffset += currentpos - oldcurrentpos; i = currentpos; } lastend = i; for(k=0; k<=D; k++) R1[k] = R2[k] = ~0; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } } } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0; } /* Don't update CurrentByteOffset here: done by caller */ int s_output(text, i, textbegin, textend, lastout, pat, m, oldpat, oldm) int *i; /* in, out */ int m, oldm; CHARTYPE *text, *textbegin, *textend, *pat, *oldpat; CHARTYPE **lastout; /* in, out */ { int PRINTED = 0; int newlen; int oldi; CHARTYPE *curtextbegin; CHARTYPE *curtextend; #if MEASURE_TIMES struct timeval initt, finalt; #endif if(SILENT) return 0; if (TCOMPRESSED == ON) { if (!DELIMITER) { curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text + *i, text, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { if (!DELIMITER) { curtextbegin = text + *i; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text + *i, text, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m + *i*/ /* + 1 agrep() has i++ */, textend, D_pattern, D_length, OUTTAIL); } } if (TCOMPRESSED == ON) { #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text + *i - curtextbegin + m, EASYSEARCH)) { num_of_matched --; return 0; } #if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ oldi = *i; *i += textbegin - (text + *i); if(COUNT) return 0; if (INVERSE) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, *lastout, curtextbegin - *lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } *lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(*lastout, 1, curtextbegin-*lastout, agrep_finalfp); else { if (curtextbegin - *lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, *lastout, curtextbegin-*lastout); agrep_outpointer += (curtextbegin - *lastout); } *lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ return 0; } if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d= ", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } #if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer + agrep_outpointer, curtextbegin, curtextend - curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } return 0; } static void prep_bm(Pattern, m) unsigned char *Pattern; register m; { int i; unsigned hash; unsigned char lastc; for (i = 0; i < MAXSYM; i++) SHIFT[i] = m; for (i = m-1; i>=0; i--) { hash = TR[Pattern[i]]; if((int)(SHIFT[hash]) >= (int)(m - 1)) SHIFT[hash] = m-1-i; } shift_1 = m-1; /* shift_1 records the previous occurrence of the last character of the pattern. When we match this last character but do not have a match, we can shift until we reach the next occurrence from the right. */ lastc = TR[Pattern[m-1]]; for (i= m-2; i>=0; i--) { if(TR[Pattern[i]] == lastc ) { shift_1 = m-1 - i; i = -1; } } if(shift_1 == 0) shift_1 = 1; /* can never happen - Udi 11/7/94 */ if(NOUPPER) for(i=0; i textend) return 0; /* Udi: used to be >= for some reason */ /* added by Udi 11/7/94 */ if(WORDBOUND) { /* if(isalnum(*(unsigned char *)(text+1))) goto CONT; --> fixed by SHIOZAKI Takehiko */ if((text+1 <= textend) && isalnum(*(unsigned char *)(text+1)) && isalnum(*(unsigned char *)text)) { goto CONT; /* as if there was no match */ } /* if(isalnum(*(unsigned char *)(text-m))) goto CONT; --> fixed by SHIOZAKI Takehiko */ if((textbegin <= (text-m)) && isalnum(*(unsigned char *)(text-m)) && isalnum(*(unsigned char *)(text-m+1))) { goto CONT; /* as if there was no match */ } /* changed by Udi 11/7/94 to avoid having to set TR[] to W_delim */ } if (TCOMPRESSED == ON) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text-m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL); } } if (TCOMPRESSED == ON) { #if MEASURE_TIMES gettimeofday(&initt, NULL); #endif /*MEASURE_TIMES*/ if (-1 == exists_tcompressed_word(pat, m, curtextbegin, text - curtextbegin + m, EASYSEARCH)) goto CONT; /* as if there was no match */ #if MEASURE_TIMES gettimeofday(&finalt, NULL); FILTERALGO_ms += (finalt.tv_sec *1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ num_of_matched++; if(FILENAMEONLY) return 0; if (!COUNT) { if (!INVERSE) { if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d= ", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } #if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } } else { /* INVERSE */ if (!SILENT) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp); else { if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout); agrep_outpointer += (curtextbegin - lastout); } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ } /* !SILENT */ } /* INVERSE */ } else { /* COUNT */ CurrentByteOffset += textbegin - text; text = textbegin; } /* Counteract the ++ below */ text --; CurrentByteOffset --; if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } CONT: text++; CurrentByteOffset ++; } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0; } /* a_monkey() the approximate monkey move */ int a_monkey( pat, m, text, textend, D ) register int m, D ; register CHARTYPE *text, *textend, *pat; { int PRINTED = 0; register CHARTYPE *oldtext; CHARTYPE *curtextbegin; CHARTYPE *curtextend; register unsigned hash, hashmask, suffix_error; register int m1 = m-1-D, pos; CHARTYPE *textbegin = text; CHARTYPE *textstart; CHARTYPE *lastout = text; int newlen; hashmask = Hashmask; oldtext = text; while (text < textend) { textstart = text; text = text+m1; suffix_error = 0; while(suffix_error <= D) { hash = *text--; while(MEMBER_1[hash]) { hash = ((hash << LOG_ASCII) + *(text--)) & hashmask; } suffix_error++; } CurrentByteOffset += text - textstart; if(text <= oldtext) { if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) { CurrentByteOffset += (oldtext+pos - text); text = oldtext+pos; if(text > textend) return 0; /* Don't update CurrentByteOffset here: only before outputting properly */ if (TCOMPRESSED == ON) { if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin /*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin/*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL); } } textbegin = curtextend; /* (curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ num_of_matched++; if(FILENAMEONLY) return 0; if(!COUNT) { if (!INVERSE) { if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d= ", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } #if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } } else { /* INVERSE */ if (!SILENT) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp); else { if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout); agrep_outpointer += (curtextbegin - lastout); } lastout=textbegin; CurrentByteOffset += textbegin - text; text = textbegin; } /* TCOMPRESSED */ } /* !SILENT */ } /* INVERSE */ } else { /* COUNT */ CurrentByteOffset += textbegin - text; text = textbegin; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } else { CurrentByteOffset += (oldtext + m - text); text = oldtext + m; } } oldtext = text; } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0; } static void am_preprocess(Pattern) CHARTYPE *Pattern; { int i, m; m = strlen(Pattern); for (i = 1, Hashmask = 1 ; i<16 ; i++) Hashmask = (Hashmask << 1) + 1 ; for (i = 0; i < MAXMEMBER_1; i++) MEMBER_1[i] = 0; for (i = m-1; i>=0; i--) { MEMBER_1[Pattern[i]] = 1; } for (i = m-1; i > 0; i--) { MEMBER_1[(Pattern[i] << LOG_ASCII) + Pattern[i-1]] = 1; } } int verify(m, n, D, pat, text) register int m, n, D; CHARTYPE *pat, *text; { int A[MAXPATT], B[MAXPATT]; register int last = D; register int cost = 0; register int k, i, c; register int m1 = m+1; CHARTYPE *textend = text+n; CHARTYPE *textbegin = text; for (i = 0; i <= m1; i++) A[i] = B[i] = i; while (text < textend) { for (k = 1; k <= last; k++) { cost = B[k-1]+1; if (pat[k-1] != *text) { if (B[k]+1 < cost) cost = B[k]+1; if (A[k-1]+1 < cost) cost = A[k-1]+1; } else cost = cost -1; A[k] = cost; } if(pat[last] == *text++) { A[last+1] = B[last]; last++; } if(A[last] < D) A[last+1] = A[last++]+1; while (A[last] > D) last = last - 1; if(last >= m) return(text - textbegin - 1); if(*text == '\n') { last = D; for(c = 0; c<=m1; c++) A[c] = B[c] = c; } for (k = 1; k <= last; k++) { cost = A[k-1]+1; if (pat[k-1] != *text) { if (A[k]+1 < cost) cost = A[k]+1; if (B[k-1]+1 < cost) cost = B[k-1]+1; } else cost = cost -1; B[k] = cost; } if(pat[last] == *text++) { B[last+1] = A[last]; last++; } if(B[last] < D) B[last+1] = B[last++]+1; while (B[last] > D) last = last -1; if(last >= m) return(text - textbegin - 1); if(*text == '\n') { last = D; for(c = 0; c<=m1; c++) A[c] = B[c] = c; } } return(0); } /* preprocessing for monkey() */ static void m_preprocess(Pattern) CHARTYPE *Pattern; { int i, j, m; unsigned hash; m = strlen(Pattern); for (i = 0; i < MAX_SHIFT_2; i++) SHIFT_2[i] = m; for (i = m-1; i>=1; i--) { hash = TR[Pattern[i]]; hash = hash << 3; for (j = 0; j< MAXSYM; j++) { if(SHIFT_2[hash+j] == m) SHIFT_2[hash+j] = m-1; } hash = hash + TR[Pattern[i-1]]; if((int)(SHIFT_2[hash]) >= (int)(m - 1)) SHIFT_2[hash] = m-1-i; } shift_1 = m-1; for (i= m-2; i>=0; i--) { if(TR[Pattern[i]] == TR[Pattern[m-1]] ) { shift_1 = m-1 - i; i = -1; } } if(shift_1 == 0) shift_1 = 1; SHIFT_2[0] = 0; } /* monkey4() the approximate monkey move */ char *MEMBER_D = NULL; int monkey4( pat, m, text, textend, D ) register int m, D ; register unsigned char *text, *pat, *textend; { int PRINTED = 0; register unsigned char *oldtext; register unsigned hash, hashmask, suffix_error; register int m1=m-1-D, pos; CHARTYPE *textbegin = text; CHARTYPE *textstart; CHARTYPE *curtextbegin; CHARTYPE *curtextend; CHARTYPE *lastout = text; int newlen; hashmask = Hashmask; oldtext = text ; while (text < textend) { textstart = text; text = text + m1; suffix_error = 0; while(suffix_error <= D) { hash = char_map[*text--]; hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask; while(MEMBER_D[hash]) { hash = ((hash << LOG_DNA) + char_map[*(text--)]) & hashmask; } suffix_error++; } CurrentByteOffset += text - textstart; if(text <= oldtext) { if((pos = verify(m, 2*m+D, D, pat, oldtext)) > 0) { CurrentByteOffset += (oldtext+pos - text); text = oldtext+pos; if(text > textend) return 0; if (TCOMPRESSED == ON) { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, tc_D_pattern, tc_D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, tc_D_pattern, tc_D_length, OUTTAIL); } } else { /* Don't update CurrentByteOffset here: only before outputting properly */ if (!DELIMITER) { curtextbegin = text; while((curtextbegin > textbegin) && (*(--curtextbegin) != '\n')); if (*curtextbegin == '\n') curtextbegin ++; curtextend = curtextbegin /*text -m*/; while((curtextend < textend) && (*curtextend != '\n')) curtextend ++; if (*curtextend == '\n') curtextend ++; } else { curtextbegin = backward_delimiter(text, textbegin, D_pattern, D_length, OUTTAIL); curtextend = forward_delimiter(curtextbegin/*text -m*/, textend, D_pattern, D_length, OUTTAIL); } } textbegin = curtextend; /*(curtextend - 1 > textbegin ? curtextend - 1 : curtextend); */ num_of_matched++; if(FILENAMEONLY) return 0; if(!COUNT) { if (!INVERSE) { if(FNAME && (NEW_FILE || !POST_FILTER)) { char nextchar = (POST_FILTER == ON)?'\n':' '; char *prevstring = (POST_FILTER == ON)?"\n":""; if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s%s", prevstring, CurrentFileName); else { int outindex; if (prevstring[0] != '\0') { if(agrep_outpointer + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else agrep_outbuffer[agrep_outpointer ++] = prevstring[0]; } for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } if (PRINTFILETIME) { char *s = aprint_file_time(CurrentFileTime); if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%s", s); else { int outindex; for(outindex=0; (outindex+agrep_outpointer=agrep_outlen)) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += outindex; } } if (agrep_finalfp != NULL) fprintf(agrep_finalfp, ":%c", nextchar); else { if (agrep_outpointer+2>= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } else { agrep_outbuffer[agrep_outpointer++] = ':'; agrep_outbuffer[agrep_outpointer++] = nextchar; } } NEW_FILE = OFF; PRINTED = 1; } if(BYTECOUNT) { if (agrep_finalfp != NULL) fprintf(agrep_finalfp, "%d= ", CurrentByteOffset); else { char s[32]; int outindex; sprintf(s, "%d= ", CurrentByteOffset); for(outindex=0; (outindex+agrep_outpointer 0) { if (agrep_outpointer + newlen + 1 >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } #if MEASURE_TIMES gettimeofday(&finalt, NULL); OUTFILTER_ms += (finalt.tv_sec*1000 + finalt.tv_usec/1000) - (initt.tv_sec*1000 + initt.tv_usec/1000); #endif /*MEASURE_TIMES*/ } else { if (agrep_finalfp != NULL) { fwrite(curtextbegin, 1, curtextend - curtextbegin, agrep_finalfp); } else { if (agrep_outpointer + curtextend - curtextbegin >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, curtextbegin, curtextend-curtextbegin); agrep_outpointer += curtextend - curtextbegin; } } } else if (PRINTED) { if (agrep_finalfp != NULL) fputc('\n', agrep_finalfp); else agrep_outbuffer[agrep_outpointer ++] = '\n'; PRINTED = 0; } } else { /* INVERSE */ if (!SILENT) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, curtextbegin - lastout, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } lastout=textbegin; CurrentByteOffset += textbegin + 1 - text; text = textbegin + 1; } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, curtextbegin-lastout, agrep_finalfp); else { if (curtextbegin - lastout + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, curtextbegin-lastout); agrep_outpointer += (curtextbegin - lastout); } lastout=textbegin; CurrentByteOffset += textbegin + 1 - text; text = textbegin + 1; } /* TCOMPRESSED */ } /* !SILENT */ } /* INVERSE */ } else { /* COUNT */ CurrentByteOffset += textbegin + 1 - text; text = textbegin + 1 ; } if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) || ((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) return 0; /* done */ } else { CurrentByteOffset += (oldtext + m - text); text = oldtext + m; } } oldtext = text; } if (!SILENT && INVERSE && !COUNT && (lastout <= textend)) { if (TCOMPRESSED == ON) { /* INVERSE: Don't care about filtering time */ if (agrep_finalfp != NULL) newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_finalfp, -1, EASYSEARCH); else { if ((newlen = quick_tuncompress(FREQ_FILE, STRING_FILE, lastout, textend - lastout + 1, agrep_outbuffer, agrep_outlen - agrep_outpointer, EASYSEARCH)) > 0) { if (newlen + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } agrep_outpointer += newlen; } } } else { /* NOT TCOMPRESSED */ if (agrep_finalfp != NULL) fwrite(lastout, 1, textend-lastout + 1, agrep_finalfp); else { if (textend - lastout + 1 + agrep_outpointer >= agrep_outlen) { OUTPUT_OVERFLOW; return -1; } memcpy(agrep_outbuffer+agrep_outpointer, lastout, textend-lastout + 1); agrep_outpointer += (textend - lastout + 1); } } /* TCOMPRESSED */ } return 0; } static void prep4(Pattern, m) char *Pattern; int m; { int i, j, k; unsigned hash; for(i=0; i< MAXSYM; i++) char_map[i] = 0; char_map['a'] = char_map['A'] = 4; char_map['g'] = char_map['g'] = 1; char_map['t'] = char_map['t'] = 2; char_map['c'] = char_map['c'] = 3; char_map['n'] = char_map['n'] = 5; BSize = blog(4, m); for (i = 1, Hashmask = 1 ; i<(int)(BSize*LOG_DNA); i++) Hashmask = (Hashmask << 1) + 1 ; if (MEMBER_D != NULL) free(MEMBER_D); MEMBER_D = (char *) malloc((Hashmask+1) * sizeof(char)); #ifdef DEBUG printf("BSize = %d", BSize); #endif for (i=0; i <= Hashmask; i++) MEMBER_D[i] = 0; for (j=0; j < (int)BSize; j++) { for(i=m-1; i >= j; i--) { hash = 0; for(k=0; k <= j; k++) hash = (hash << LOG_DNA) +char_map[Pattern[i-k]]; #ifdef DEBUG printf("< %d >, ", hash); #endif MEMBER_D[hash] = 1; } } } int blog(base, m ) int base, m; { int i, exp; exp = base; m = m + m/2; for (i = 1; exp < m; i++) exp = exp * base; return(i); } agrep-4.17/utilities.c0100644001123100001460000000742107010116362013076 0ustar friurz/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */ /* this file contains various utility functions for accessing and manipulating regular expression syntax trees. */ #include #include #include "re.h" /************************************************************************/ /* */ /* the following routines implement an abstract data type "stack". */ /* */ /************************************************************************/ Stack Push(s, v) Stack *s; Re_node v; { Stack node; new_node(Stack, node, node); if (s == NULL || node == NULL) return NULL; /* can't allocate */ node->next = *s; node->val = v; if (*s == NULL) node->size = 1; else node->size = (*s)->size + 1; *s = node; return *s; } Re_node Pop(s) Stack *s; { Re_node node; Stack temp; if (s == NULL || *s == NULL) return NULL; else { temp = *s; node = (*s)->val; *s = (*s)->next; free(temp); return node; } } Re_node Top(s) Stack s; { if (s == NULL) return NULL; else return s->val; } int Size(s) Stack s; { if (s == NULL) return 0; else return s->size; } /************************************************************************/ /* */ /* the following routines manipulate sets of positions. */ /* */ /************************************************************************/ int occurs_in(n, p) int n; Pset p; { while (p != NULL) if (n == p->posnum) return 1; else p = p->nextpos; return 0; } /* pset_union() takes two position-sets and returns their union. */ Pset pset_union(s1, s2, dontreplicate) Pset s1, s2; int dontreplicate; { Pset hd, curr, new = NULL; Pset replicas2 = NULL, temps2 = s2; /* code added: 26/Aug/96 */ /* Code added on 26/Aug/96 */ if (dontreplicate) replicas2 = s2; else while (temps2 != NULL) { new_node(Pset, new, new); if (new == NULL) return NULL; new->posnum = temps2->posnum; if (replicas2 == NULL) replicas2 = new; else curr->nextpos = new; curr = new; temps2 = temps2->nextpos; } hd = NULL; curr = NULL; while (s1 != NULL) { if (!occurs_in(s1->posnum, s2)) { new_node(Pset, new, new); if (new == NULL) return NULL; new->posnum = s1->posnum; if (hd == NULL) hd = new; else curr->nextpos = new; } curr = new; s1 = s1->nextpos; } if (hd == NULL) hd = replicas2; /* changed from s2: 26/Aug/96 */ else curr->nextpos = replicas2; /* changed from s2: 26/Aug/96 */ return hd; } /* create_pos() creates a position node with the position value given, then returns a pointer to this node. */ Pset create_pos(n) int n; { Pset x; new_node(Pset, x, x); if (x == NULL) return NULL; x->posnum = n; x->nextpos = NULL; return x; } /* eq_pset() takes two position sets and checks to see if they are equal. It returns 1 if the sets are equal, 0 if they are not. */ int subset_pset(s1, s2) Pset s1, s2; { int subs = 1; while (s1 != NULL && subs != 0) { subs = 0; while (s2 != NULL && subs != 1) if (s1->posnum == s2->posnum) subs = 1; else s2 = s2->nextpos; s1 = s1->nextpos; } return subs; } int eq_pset(s1, s2) Pset s1, s2; { return subset_pset(s1, s2) && subset_pset(s2, s1); } int word_exists(word, wordlen, line, linelen) unsigned char *word, *line; int wordlen, linelen; { unsigned char oldchar, *lineend = line+linelen; int i; i = 0; while(line #include "agrep.h" #include extern int D; extern int FILENAMEONLY, APPROX, PAT_FILE, PAT_BUFFER, MULTI_OUTPUT, COUNT, INVERSE, BESTMATCH; extern FILEOUT; extern REGEX; extern DELIMITER; extern WHOLELINE; extern LINENUM; extern I, S, DD; extern JUMP; extern char Progname[MAXNAME]; extern int agrep_initialfd; extern int EXITONERROR; extern int errno; int compat() { if(BESTMATCH) if(COUNT || FILENAMEONLY || APPROX || PAT_FILE) { BESTMATCH = 0; fprintf(stderr, "%s: -B option ignored when -c, -l, -f, or -# is on\n", Progname); } if (COUNT && LINENUM) { LINENUM = 0; fprintf(stderr, "%s: -n option ignored with -c\n", Progname); } if(PAT_FILE || PAT_BUFFER) { if(APPROX && (D > 0)) { fprintf(stderr, "%s: approximate matching is not supported with -f option\n", Progname); } /* if(INVERSE) { fprintf(stderr, "%s: -f and -v are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } */ if(LINENUM) { fprintf(stderr, "%s: -f and -n are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } /* if(DELIMITER) { fprintf(stderr, "%s: -f and -d are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } */ } if (MULTI_OUTPUT && LINENUM) { fprintf(stderr, "%s: -M and -n are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } if(JUMP) { if(REGEX) { fprintf(stderr, "%s: -D#, -I#, or -S# option is ignored for regular expression pattern\n", Progname); JUMP = 0; } if(I == 0 || S == 0 || DD == 0) { fprintf(stderr, "%s: the error cost cannot be 0\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } if(DELIMITER) { if(WHOLELINE) { fprintf(stderr, "%s: -d and -x are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } } if (INVERSE && (PAT_FILE || PAT_BUFFER) && MULTI_OUTPUT) { fprintf(stderr, "%s: -v and -M are not compatible\n", Progname); if (!EXITONERROR) { errno = AGREP_ERROR; return -1; } else exit(2); } return 0; } agrep-4.17/compat.c.rej0100644001123100001460000000211607562357504013142 0ustar friurz*************** *** 20,28 **** int compat() { - if(BESTMATCH) if(COUNT || FILENAMEONLY || APPROX || PAT_FILE) { - BESTMATCH = 0; - fprintf(stderr, "%s: -B option ignored when -c, -l, -f, or -# is on\n", Progname); } if (COUNT && LINENUM) { LINENUM = 0; --- 20,36 ---- int compat() { + if (BESTMATCH) { + if (COUNT || FILENAMEONLY || APPROX || PAT_FILE) { + BESTMATCH = 0; + fprintf(stderr, "%s: -B option ignored when -c, -l, -f, or -# is on\n", Progname); + } + if (LINENUM) { + BESTMATCH = 0; + fprintf(stderr, "%s: -B option turned off when -n is on\n", Progname); + /* Currently, the BESTMATCH option disables -n but there doesn't seem to be a reason for it. + * Compat.c modified while testing continues 10-26-2002 KAM */ + } } if (COUNT && LINENUM) { LINENUM = 0; agrep-4.17/autoconf.h0100644001123100001460000001530307742743434012726 0ustar friurz/* libtemplate/include/autoconf.h. Generated automatically by configure. */ /* ** libtemplate/include/autoconf.h.in. */ /* ------------------------------------------------- SYSUH - begin update There are too many compiler DEFS. This will be a problem on some systems because the length limit of the comand line. Therefore, I moved the compiler DEFS here so that the make output is more readable. The following definitions will be automatically updated by configure. ----------------------------------------------------------------------*/ #ifndef _AUTOCONF_H_ #define _AUTOCONF_H_ #define HAVE_DIRENT_H 1 #define HAVE_FCNTL_H 1 #define HAVE_SYS_FILE_H 1 #define HAVE_SYS_TIME_H 1 #define HAVE_UNISTD_H 1 #define HAVE_SYS_SELECT_H 1 #define HAVE_SYS_DIR_H 1 #define TIME_WITH_SYS_TIME 1 #define HAVE_UTIME_NULL 1 #define HAVE_STRDUP 1 #define HAVE_STRERROR 1 #define HAVE_LIBM 1 #define STRUCTURED_QUERIES 0 #define ISO_CHAR_SET 1 #define SFS_COMPAT 0 #define AGREP_POINTER 1 #define FILE_END_MARK '\t' #define RETSIGTYPE void /* ----------------------------------------- SYSUH - end update. ---------------------------------------------------- */ /* Define if on AIX 3. System headers sometimes define this. We just want to avoid a redefinition error message. */ #ifndef _ALL_SOURCE /* #undef _ALL_SOURCE */ #endif /* Define if using alloca.c. */ /* #undef C_ALLOCA */ /* Define to empty if the keyword does not work. */ /* #undef const */ /* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. This function is required for alloca.c support on those systems. */ /* #undef CRAY_STACKSEG_END */ /* Define to the type of elements in the array set by `getgroups'. Usually this is either `int' or `gid_t'. */ /* #undef GETGROUPS_T */ /* Define to `int' if doesn't define. */ /* #undef gid_t */ /* Define if you have alloca.h and it should be used (not Ultrix). */ /* #undef HAVE_ALLOCA_H */ /* Define if you support file names longer than 14 characters. */ /* #undef HAVE_LONG_FILE_NAMES */ /* Define if your struct stat has st_blksize. */ /* #undef HAVE_ST_BLKSIZE */ /* Define if on MINIX. */ /* #undef _MINIX */ /* Define if you don't have dirent.h, but have ndir.h. */ /* #undef NDIR */ /* Define to `long' if doesn't define. */ /* #undef off_t */ /* Define if the system does not provide POSIX.1 features except with this defined. */ /* #undef _POSIX_1_SOURCE */ /* Define if you need to in order for stat and other things to work. */ /* #undef _POSIX_SOURCE */ /* Define as the return type of signal handlers (int or void). */ #define RETSIGTYPE void /* Define if the setvbuf function takes the buffering type as its second argument and the buffer pointer as the third, as on System V before release 3. */ /* #undef SETVBUF_REVERSED */ /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be automatically deduced at run-time. STACK_DIRECTION > 0 => grows toward higher addresses STACK_DIRECTION < 0 => gro ws toward lower addresses STACK_DIRECTION = 0 => direction of growth unknown */ /* #undef STACK_DIRECTION */ /* Define if the `S_IS*' macros in do not work properly. */ /* #undef STAT_MACROS_BROKEN */ /* Define if you have the ANSI C header files. */ #define STDC_HEADERS 1 /* Define if you don't have dirent.h, but have sys/dir.h. */ /* #undef SYSDIR */ /* Define if you don't have dirent.h, but have sys/ndir.h. */ /* #undef SYSNDIR */ /* Define to `int' if doesn't define. */ /* #undef uid_t */ /* Define if the closedir function returns void instead of int. */ /* #undef VOID_CLOSEDIR */ /* Define if your processor stores words with the most significant byte first (like Motorola and SPARC, unlike Intel and VAX). */ /* #undef WORDS_BIGENDIAN */ /* The number of bytes in a int. */ /* #undef SIZEOF_INT */ /* The number of bytes in a long. */ /* #undef SIZEOF_LONG */ /* Define if you have bcopy. */ /* #undef HAVE_BCOPY */ /* Define if you have bzero. */ /* #undef HAVE_BZERO */ /* Define if you have flock. */ /* #undef HAVE_FLOCK */ /* Define if you have fsync. */ /* #undef HAVE_FSYNC */ /* Define if you have ftruncate. */ /* #undef HAVE_FTRUNCATE */ /* Define if you have getcwd. */ /* #undef HAVE_GETCWD */ /* Define if you have getdtablesize. */ /* #undef HAVE_GETDTABLESIZE */ /* Define if you have lrand48. */ /* #undef HAVE_LRAND48 */ /* Define if you have memmove. */ /* #undef HAVE_MEMMOVE */ /* Define if you have mktime. */ /* #undef HAVE_MKTIME */ /* Define if you have nice. */ /* #undef HAVE_NICE */ /* Define if you have on_exit. */ /* #undef HAVE_ON_EXIT */ /* Define if you have random. */ /* #undef HAVE_RANDOM */ /* Define if you have rename. */ /* #undef HAVE_RENAME */ /* Define if you have setlinebuf. */ /* #undef HAVE_SETLINEBUF */ /* Define if you have setrlimit. */ /* #undef HAVE_SETRLIMIT */ /* Define if you have srand48. */ /* #undef HAVE_SRAND48 */ /* Define if you have srandom. */ /* #undef HAVE_SRANDOM */ /* Define if you have sysconf. */ /* #undef HAVE_SYSCONF */ /* Define if you have timegm. */ /* #undef HAVE_TIMEGM */ /* Define if you have usleep. */ /* #undef HAVE_USLEEP */ /* Define if you have vfork. */ /* #undef HAVE_VFORK */ /* Define if you have the header file. */ /* #undef HAVE_ARPA_INET_H */ /* Define if you have the header file. */ /* #undef HAVE_CONFIG_H */ /* Define if you have the header file. */ /* #undef HAVE_MEMORY_H */ /* Define if you have the header file. */ /* #undef HAVE_NETINET_IN_H */ /* Define if you have the header file. */ /* #undef HAVE_STDLIB_H */ /* Define if you have the header file. */ /* #undef HAVE_STRING_H */ /* Define if you have the header file. */ /* #undef HAVE_SYS_SYSLOG_H */ /* Define if you have the header file. */ /* #undef HAVE_SYS_TYPES_H */ /* Define if you have the header file. */ /* #undef HAVE_SYSLOG_H */ /* Define if you have the dbm library (-ldbm). */ /* #undef HAVE_LIBDBM */ /* Define if you have the fl library (-lfl). */ /* #undef HAVE_LIBFL */ /* Define if you have the malloc library (-lmalloc). */ /* #undef HAVE_LIBMALLOC */ /* Define if you have the ndbm library (-lndbm). */ /* #undef HAVE_LIBNDBM */ /* Define if you have the nsl library (-lnsl). */ /* #undef HAVE_LIBNSL */ /* Define if you have the resolv library (-lresolv). */ /* #undef HAVE_LIBRESOLV */ /* Define if you have the seq library (-lseq). */ /* #undef HAVE_LIBSEQ */ /* Define if you have the socket library (-lsocket). */ /* #undef HAVE_LIBSOCKET */ #endif /* _AUTOCONF_H_ */ agrep-4.17/Makefile.lnx0100644001123100001460000001065307742743711013200 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = ar #/usr/ccs/bin/ar #for Solaris RANLIB = ranlib #true #for Solaris # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). #HAVE_DIRENT_H = 1 #HAVE_SYS_DIR_H = 0 #HAVE_SYS_NDIR_H = 0 #HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 1 # You might have to change this depending on your machine configuration. CC = gcc -march=i486 SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = . TCOMP = cast TCOMPDIR = ../compress AGREPDIR = . # TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O2 #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) DEFINEFLAGS = -DHAVE_DIRENT_H=$(HAVE_DIRENT_H) -DHAVE_SYS_DIR_H=$(HAVE_SYS_DIR_H) -DHAVE_SYS_NDIR_H=$(HAVE_SYS_NDIR_H) -DHAVE_NDIR_H=$(HAVE_NDIR_H) \ -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) # cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.linux CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c agrep-4.17/Makefile0100644001123100001460000001042007742744255012374 0ustar friurz# Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. # You might have to change these depending on your machine configuration. # AR and RANLIB are the library-archive programs. On Solaris, RANLIB is not # required (define it to true) and AR is in /usr/ccs/bin/ar (on our machine!). AR = ar #/usr/ccs/bin/ar #for Solaris RANLIB = ranlib #true #for Solaris # Define HAVE_DIRENT_H to be 1 when you don't have else define it to be 0 (in this case, one of the other 3 flags may need to be defined to be 1). #HAVE_DIRENT_H = 1 #HAVE_SYS_DIR_H = 0 #HAVE_SYS_NDIR_H = 0 #HAVE_NDIR_H = 0 # Define UTIME to be 1 if you have the utime() routine on your system. Else define it to be 0. UTIME = 1 # Define ISO_CHAR_SET to be 1 if you want to use the international 8bit character set. Else define it to be 0. ISO_CHAR_SET = 1 # You might have to change this depending on your machine configuration. CC = gcc -march=i486 SHELL = /bin/sh # YOU DON'T HAVE TO CHANGE ANYTHING BELOW THIS LINE # The binaries will be made in ../bin/. and the agrep library in ../lib # You normally don't have to change them. BINDIR = ../bin LIBDIR = . TCOMP = cast TCOMPDIR = ../compress AGREPDIR = . # TEMPLATEDIR = ../libtemplate # You can change the target to use the "cast" (compression) library by changing: # all: $(NOTCPROG) # to: # all: $(PROG) # You must also define DOTCOMPRESSED below to be 1 instead of 0. DOTCOMPRESSED = 0 # Include flags is not a part of CLFAGS and LINKFLAGS since path names from subdirs can be different OPTIMIZEFLAGS = -O2 #PROFILEFLAGS = -p #DEBUGFLAGS = -g -DBG_DEBUG=1 -DDEBUG=1 INCLUDEFLAGS = -I$(AGREPDIR) -DUTIME=$(UTIME) -DISO_CHAR_SET=$(ISO_CHAR_SET) SUBDIRCFLAGS = -c $(DEFINEFLAGS) $(OPTIMIZEFLAGS) $(PROFILEFLAGS) $(DEBUGFLAGS) MYDEFINEFLAGS = -DMEASURE_TIMES=0 -DAGREP_POINTER=1 -DDOTCOMPRESSED=$(DOTCOMPRESSED) CFLAGS = $(MYDEFINEFLAGS) $(INCLUDEFLAGS) $(SUBDIRCFLAGS) SUBDIRLINKFLAGS = $(PROFILEFLAGS) LINKFLAGS = $(INCLUDEFLAGS) $(SUBDIRLINKFLAGS) OTHERLIBS = PROG = agrep NOTCPROG = notc$(PROG) all: $(NOTCPROG) # cp $(PROG) $(BINDIR)/. LIB = $(LIBDIR)/lib$(PROG).a HDRS = agrep.h checkfile.h re.h defs.h config.h TCOMPLIBOBJ = \ $(TCOMPDIR)/hash.o \ $(TCOMPDIR)/string.o \ $(TCOMPDIR)/misc.o \ $(TCOMPDIR)/quick.o \ $(TCOMPDIR)/cast.o \ $(TCOMPDIR)/uncast.o \ $(TCOMPDIR)/tsimpletest.o \ $(TCOMPDIR)/tbuild.o\ $(TCOMPDIR)/tmemlook.o OBJS = \ follow.o \ asearch.o \ asearch1.o \ agrep.o \ bitap.o \ checkfile.o \ compat.o \ maskgen.o \ parse.o \ checksg.o \ preprocess.o \ delim.o \ asplit.o \ recursive.o \ sgrep.o \ newmgrep.o \ utilities.o $(PROG): $(OBJS) main.o $(LIBDIR)/lib$(TCOMP).a $(CC) -L$(LIBDIR) $(LINKFLAGS) -o $@ $(OBJS) main.o -l$(TCOMP) $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) $(TCOMPLIBOBJ) $(RANLIB) $(LIB) $(LIBDIR)/lib$(TCOMP).a: cd $(TCOMPDIR) ; $(MAKE) -f Makefile.linux CC="$(CC)" SUBDIRCFLAGS="$(SUBDIRCFLAGS)" SUBDIRLINKFLAGS="$(SUBDIRLINKFLAGS)" SHELL="$(SHELL)" HAVE_DIRENT_H="$(HAVE_DIRENT_H)" HAVE_SYS_DIR_H="$(HAVE_SYS_DIR_H)" HAVE_SYS_NDIR_H="$(HAVE_SYS_NDIR_H)" HAVE_NDIR_H="$(HAVE_NDIR_H)" UTIME="$(UTIME)" STRUCTURED_QUERIES="$(STRUCTURED_QUERIES)" ISO_CHAR_SET="$(ISO_CHAR_SET)" SFS_COMPAT="$(SFS_COMPAT)" $(NOTCPROG): $(OBJS) dummyfilters.o main.o $(CC) $(LINKFLAGS) -o $(PROG) $(OBJS) dummyfilters.o main.o $(OTHERLIBS) $(AR) rcv $(LIB) $(OBJS) dummyfilters.o $(RANLIB) $(LIB) clean: -rm -f $(LIB) $(OBJS) dummyfilters.o main.o core a.out $(PROG) compat.o: agrep.h defs.h config.h asearch.o: agrep.h defs.h config.h asearch1.o: agrep.h defs.h config.h bitap.o: agrep.h defs.h config.h checkfile.o: agrep.h checkfile.h defs.h config.h follow.o: re.h agrep.h defs.h config.h main.o: agrep.h checkfile.h defs.h config.h dummysyscalls.c agrep.o: agrep.h checkfile.h defs.h config.h newmgrep.o: agrep.h defs.h config.h maskgen.o: agrep.h defs.h config.h next.o: agrep.h defs.h config.h parse.o: re.h agrep.h defs.h config.h preprocess.o: agrep.h defs.h config.h checksg.o: agrep.h checkfile.h defs.h config.h delim.o: agrep.h defs.h config.h asplit.o: agrep.h defs.h config.h sgrep.o: agrep.h defs.h config.h abm.o: agrep.h defs.h config.h utilities.o: re.h agrep.h defs.h config.h dummyfilters.o: dummyfilters.c