spp/0000755000176200001440000000000013473754204011065 5ustar liggesusersspp/configure.ac0000755000176200001440000000211113471426636013354 0ustar liggesusersAC_INIT([SPP], [1.7], [Peter_Kharchenko@hms.harvard.edu]) AC_CHECK_LIB(bz2, BZ2_bzDecompressInit) AC_SUBST(HAVE_LIBBZ2) # find R and set CC/CFLAGS : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then echo "could not determine R_HOME" exit 1 fi CC=`"${R_HOME}/bin/R" CMD config CC`; CXX11=`"${R_HOME}/bin/R" CMD config CXX11` CXX11STD=`"${R_HOME}/bin/R" CMD config CXX11STD` CXX="${CXX11} ${CXX11STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXX11FLAGS` AC_LANG(C++) CPPFLAGS="${CPPFLAGS} ${PKG_CPPFLAGS}" CXXFLAGS="${CXXFLAGS} ${PKG_CXXFLAGS}" CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` CFLAGS="${CFLAGS} ${PKG_CFLAGS}" RINC=`${R_HOME}/bin/R CMD config --cppflags` RCPP_CXX=`${R_HOME}/bin/Rscript -e "Rcpp:::CxxFlags()"` CPPFLAGS="${CPPFLAGS} ${RCPP_CXX}" RCPP_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"` LIBS="${LIBS} ${PKG_LIBS} ${RCPP_LIBS}" AC_SUBST(R_HOME) AC_SUBST(RINC) AC_SUBST(RLD) AC_ARG_VAR([PKG_CPPFLAGS],[additional pre-processor flags]) AC_ARG_VAR([PKG_LIBS],[additional linker library flags]) AC_CONFIG_FILES([src/Makevars]) cp confdefs.h src/config.h AC_OUTPUT spp/src/0000755000176200001440000000000013473022026011642 5ustar liggesusersspp/src/pc.h0000644000176200001440000000015313471426636012431 0ustar liggesusers#ifndef PC_H #define PC_H 1 #include #include #include #endif spp/src/maqread.cpp0000644000176200001440000001165713471426636014007 0ustar liggesusers#include "pc.h" #include #include #include #include #include #include #include #include #include #include extern "C" { //#include "R.h" //#include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" #include "maqmap.h" } using namespace std; class lessAbsoluteValue { public: bool operator()(int a, int b) const { return abs(a) < abs(b); } }; //#define DEBUG 1 extern "C" { // read in text version of maq map SEXP read_binmaqmap(SEXP filename,SEXP read_tag_names_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; gzFile f=gzopen(fname,"r"); maqmap_t *m = maqmap_read_header(f); maqmap1_t *m1, mm1; m1 = &mm1; if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(maqmap_read1(f, m1)) { string tagname=string(m1->name); string chr=string(m->ref_name[m1->seqid]); int len=m1->size; int fpos=(m1->pos>>1) + 1; if(m1->pos&1) { fpos=-1*(fpos+len-1); } int nm=m1->info1&0xf; #ifdef DEBUG Rprintf("read in map line chr=%s tagname=%s fpos=%d, nm=%d, len=%d\n",chr.c_str(),tagname.c_str(),fpos,nm,len); #endif // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } gzclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(sv=allocVector(STRSXP,csi->size())); //add declaration after the definition to avoid [-Wmaybe-uninitialized] if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } } spp/src/maqmap.c0000644000176200001440000001072513471426636013304 0ustar liggesusers#include #include #include #include #include "const.h" #include "maqmap.h" //#include "R.h" //#include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" maqmap_t *maq_new_maqmap() { maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t)); mm->format = MAQMAP_FORMAT_NEW; return mm; } void maq_delete_maqmap(maqmap_t *mm) { int i; if (mm == 0) return; for (i = 0; i < mm->n_ref; ++i) free(mm->ref_name[i]); free(mm->ref_name); free(mm->mapped_reads); free(mm); } void maqmap_write_header(gzFile fp, const maqmap_t *mm) { int i, len; gzwrite(fp, &mm->format, sizeof(int)); gzwrite(fp, &mm->n_ref, sizeof(int)); for (i = 0; i != mm->n_ref; ++i) { len = strlen(mm->ref_name[i]) + 1; gzwrite(fp, &len, sizeof(int)); gzwrite(fp, mm->ref_name[i], len); } gzwrite(fp, &mm->n_mapped_reads, sizeof(bit64_t)); } maqmap_t *maqmap_read_header(gzFile fp) { maqmap_t *mm; int k, len; mm = maq_new_maqmap(); gzread(fp, &mm->format, sizeof(int)); if (mm->format != MAQMAP_FORMAT_NEW) { if (mm->format > 0) { REprintf("** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n"); return 0; } assert(mm->format == MAQMAP_FORMAT_NEW); } gzread(fp, &mm->n_ref, sizeof(int)); mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*)); for (k = 0; k != mm->n_ref; ++k) { gzread(fp, &len, sizeof(int)); mm->ref_name[k] = (char*)malloc(len * sizeof(char)); gzread(fp, mm->ref_name[k], len); } /* read number of mapped reads */ gzread(fp, &mm->n_mapped_reads, sizeof(bit64_t)); return mm; } /* mapvalidate */ static void mapvalidate_core(gzFile fpin) { maqmap_t *m = maqmap_read_header(fpin); maqmap1_t *m1, mm1; bit64_t n = 0; int i, l; bit64_t *cnt; m1 = &mm1; cnt = (bit64_t*)calloc(m->n_ref, 8); Rprintf("[message] number of reference sequences: %d\n", m->n_ref); while ((l = maqmap_read1(fpin, m1)) != 0) { if (l != sizeof(maqmap1_t)) { REprintf("[fatal error] truncated map file.\n"); break; } ++n; if ((int)m1->seqid >= m->n_ref) { REprintf("[fatal error] maqmap1_t::seqid is invalid (%d >= %d).\n", m1->seqid, m->n_ref); break; } ++cnt[m1->seqid]; if (m1->size >= MAX_READLEN - 1) { REprintf("[faltal error] maqmap1_t::size is invalid (%d >= %d).\n", m1->size, MAX_READLEN - 1); break; } } if (m->n_mapped_reads != 0) { if (m->n_mapped_reads != n) { Rprintf("[warning] maqmap1_t::n_mapped_reads is set, but not equals the real number (%llu != %llu).\n", m->n_mapped_reads, n); } } for (i = 0; i != m->n_ref; ++i) Rprintf("[message] %s : %llu\n", m->ref_name[i], cnt[i]); free(cnt); maq_delete_maqmap(m); } /* mapview */ //static void mapview_core(FILE *fpout, gzFile fpin, int is_verbose, int is_mm) //{ // bit32_t j; // maqmap_t *m = maqmap_read_header(fpin); // maqmap1_t *m1, mm1; // m1 = &mm1; // while (maqmap_read1(fpin, m1)) { // fprintf(fpout, "%s\t%s\t%d\t%c\t%d\t%u\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d", // m1->name, m->ref_name[m1->seqid], (m1->pos>>1) + 1, // (m1->pos&1)? '-' : '+', m1->dist, m1->flag, m1->map_qual, (signed char)m1->seq[MAX_READLEN-1], // m1->alt_qual, m1->info1&0xf, m1->info2, m1->c[0], m1->c[1], m1->size); // if (is_verbose) { // fputc('\t', fpout); // for (j = 0; j != m1->size; ++j) { // if (m1->seq[j] == 0) fputc('n', fpout); // else if ((m1->seq[j]&0x3f) < 27) fputc("acgt"[m1->seq[j]>>6&3], fpout); // else fputc("ACGT"[m1->seq[j]>>6&3], fpout); // } // fputc('\t', fpout); // for (j = 0; j != m1->size; ++j) // fputc((m1->seq[j]&0x3f) + 33, fpout); // } // if (is_mm) { // bit64_t *p = (bit64_t*)(m1->seq + 55); // fprintf(fpout, "\t%llx", *p); // } // fputc('\n', fpout); // } // maq_delete_maqmap(m); //} //int ma_mapview(int argc, char *argv[]) //{ // int c, is_verbose = 1, is_mm = 0; // while ((c = getopt(argc, argv, "bN")) >= 0) { // switch (c) { // case 'b': is_verbose = 0; break; // case 'N': is_mm = 1; break; // } // } // if (argc == optind) { // REprintf("Usage: maq mapview [-bN] \n"); // return 1; // } // gzFile fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[optind], "r"); // mapview_core(stdout, fp, is_verbose, is_mm); // gzclose(fp); // return 0; //} int ma_mapvalidate(int argc, char *argv[]) { gzFile fp; if (argc < 2) { REprintf("Usage: maq mapvalidate \n"); return 1; } fp = (strcmp(argv[optind], "-") == 0)? gzdopen(STDIN_FILENO, "r") : gzopen(argv[1], "r"); mapvalidate_core(fp); gzclose(fp); return 0; } spp/src/peaks.cpp0000644000176200001440000004572213471603613013470 0ustar liggesusers#include #include #include #include #include #include #include extern "C" { //#include "R.h" //#include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" } using namespace std; /** * Calculate all local peaks */ //#define DEBUG 1 extern "C" { SEXP find_peaks(SEXP x_R,SEXP thr_R,SEXP max_span_R) { #ifdef DEBUG Rprintf("start\n"); #endif double* x=REAL(x_R); int nx=LENGTH(x_R); int max_span=*INTEGER(max_span_R); double thr=REAL(thr_R)[0]; #ifdef DEBUG Rprintf("n=%d; thr=%f; max_span=%d\n",nx,thr,max_span); #endif vector pos; double pv=x[0]; double ppv=0; // previous peak value int ppp=-max_span-1; // previous peak position for(int i=1;i<(nx-1);i++) { if(x[i]>pv && x[i]>=thr && x[i]>x[i+1]) { if(max_span>2) { //Rprintf("i=%d; ppp=%d\n",i,ppp); if(i-ppp > max_span) { if(ppp>=0) { pos.push_back(ppp); } //Rprintf("recorded %d; now %d\n",ppp,i); ppp=i; ppv=x[i]; } else { if(x[i]>ppv) { //Rprintf("reset from %d to %d\n",ppp,i); ppp=i; ppv=x[i]; } } } else { pos.push_back(i); } } if(x[i]!=x[i+1]) { pv=x[i]; } } // add remaining peak if(max_span>2 && ppp>=0) { pos.push_back(ppp); } SEXP nv; PROTECT(nv=allocVector(INTSXP,pos.size())); int* i_nv=INTEGER(nv); int i=0; for(vector ::const_iterator pi=pos.begin();pi!=pos.end();++pi) { i_nv[i++]=1+(*pi); } UNPROTECT(1); return(nv); } /************************************************************************/ // given a data vector d (positive values) and a set of signed center coordinates pos, // returns coordinates of data points relative to the centers // size is the size of the region around the centers // return: vector of relative coordinates (x) and indecies of centers relative the coordinate // was calculated (i). SEXP get_relative_coordinates(SEXP d_R, SEXP pos_R, SEXP size_R) { int *d, *pos; int npos,nd,size; d = INTEGER(d_R); pos = INTEGER(pos_R); npos=LENGTH(pos_R); nd=LENGTH(d_R); size = INTEGER(size_R)[0]; #ifdef DEBUG Rprintf("|d|=%d, |c|=%d, size=%d\n",nd,npos,size); #endif vector x; vector xi; int k=0; // current pos index for(int i=0;i=d[i] while((std::abs(pos[k])+size) < d[i]) { k++; if(k==npos) { break; }; #ifdef DEBUG Rprintf("advancing k to %d\n",k); #endif } if(k==npos) { break; }; // increment i until d[i]>=pos[k]-size while((std::abs(pos[k])-size) > d[i]) { i++; if(i==nd) { break; } #ifdef DEBUG Rprintf("advancing i to %d\n",i); #endif } if(i==nd) { break; } int l=k; while((l0) { x.push_back(pd); } else { x.push_back(-1*pd); } xi.push_back(j); #ifdef DEBUG Rprintf("recorded i=%d, j=%d\n",i,j); #endif } else { break; } } } SEXP xv_R,xiv_R; PROTECT(xv_R=allocVector(INTSXP,x.size())); PROTECT(xiv_R=allocVector(INTSXP,x.size())); int* xv=INTEGER(xv_R); int* xiv=INTEGER(xiv_R); int i=0; for(vector ::const_iterator pi=x.begin();pi!=x.end();++pi) { xv[i++]=*pi; } i=0; for(vector ::const_iterator pi=xi.begin();pi!=xi.end();++pi) { xiv[i++]=1+(*pi); } SEXP ans_R, names_R; PROTECT(names_R = allocVector(STRSXP, 2)); SET_STRING_ELT(names_R, 0, mkChar("x")); SET_STRING_ELT(names_R, 1, mkChar("i")); PROTECT(ans_R = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans_R, 0, xv_R); SET_VECTOR_ELT(ans_R, 1, xiv_R); setAttrib(ans_R, R_NamesSymbol, names_R); UNPROTECT(4); return(ans_R); } // determines a set of points within a set of fragments // note: all vectors sorted in ascending order // note: all vectors are integers // x_R - vector of point positions // se_R - vector of start and end positions // fi_R - vector of signed fragment indecies // return_list_R - whether a list of fragments should be returned for each point // return_unique_R - whether points in multiple fragments should be omitted SEXP points_withinC(SEXP x_R,SEXP se_R,SEXP fi_R,SEXP return_list_R,SEXP return_unique_R,SEXP return_point_counts_R) { #ifdef DEBUG Rprintf("start\n"); #endif int* x=INTEGER(x_R); int nx=LENGTH(x_R); int* se=INTEGER(se_R); int* fi=INTEGER(fi_R); int nf=LENGTH(se_R); int return_list=*(INTEGER(return_list_R)); int return_unique=*(INTEGER(return_unique_R)); int return_point_counts=*(INTEGER(return_point_counts_R)); #ifdef DEBUG Rprintf("nf=%d; nx=%d, return_list=%d, return_unique=%d, return_point_counts=%d\n",nf/2,nx,return_list,return_unique,return_point_counts); #endif set fset; SEXP nv; int *i_nv; int np=0; if(return_point_counts) { PROTECT(nv = allocVector(INTSXP, nf/2)); np++; i_nv=INTEGER(nv); for(int i=0;i0) { // insert fset.insert(frag); #ifdef DEBUG Rprintf("inserted frag %d, size=%d\n",frag,fset.size()); #endif } else { // remove fset.erase(-frag); #ifdef DEBUG Rprintf("removed frag %d, size=%d\n",-frag,fset.size()); #endif } j++; } #ifdef DEBUG Rprintf("i=%d j=%d\n",i,j); #endif if(return_list) { if(fset.empty() || (return_unique && fset.size()>1)) { // assign null list? } else { SEXP fil_R; PROTECT(fil_R=allocVector(INTSXP,fset.size())); np++; int* fil=INTEGER(fil_R); int k=0; for(set::const_iterator ki=fset.begin();ki!=fset.end();++ki) { fil[k]=*ki; k++; } SET_VECTOR_ELT(nv, i, fil_R); UNPROTECT(1); np--; } } else { if(return_point_counts) { for(set::const_iterator ki=fset.begin();ki!=fset.end();++ki) { i_nv[*ki-1]++; } } else { if(fset.empty() || (return_unique && fset.size()>1)) { i_nv[i]=-1; } else { i_nv[i]=*fset.begin(); } } } } UNPROTECT(np); return(nv); } SEXP expuni_lr(SEXP x_R, // positions and their number (assumed sorted in ascending order) SEXP mdist_R, // max distance at which points should be considered SEXP lambda_R, // lambda value SEXP spos_R, // starting position SEXP epos_R, // ending position SEXP step_R, // step size SEXP return_peaks_R, // whether peak positions should be returned, or entire score vector SEXP min_peak_lr_R // min peak height (lr) ) { #ifdef DEBUG Rprintf("start\n"); #endif int* x=INTEGER(x_R); int nx=LENGTH(x_R); int mdist=INTEGER(mdist_R)[0]; double lambda=*(REAL(lambda_R)); int return_peaks=*(INTEGER(return_peaks_R)); //double min_peak=*(REAL(min_peak_lr_R)); int spos=*(INTEGER(spos_R)); int epos=*(INTEGER(epos_R)); int step=*(INTEGER(step_R)); int nsteps=(int) (epos-spos)/step; #ifdef DEBUG Rprintf("n=%d; lambda=%f; mdist=%d; spos=%d; epos=%d; step=%d; nsteps=%d\n",nx,lambda,mdist,spos,epos,step,nsteps); #endif SEXP nv; double *d_nv; if(!return_peaks) { PROTECT(nv=allocVector(REALSXP,nsteps+1)); d_nv=REAL(nv); } int i=0; // current index of the first point being used in the calculations int j=0; // current index of the last point being used in the calculations int sx=0; // current sum of all positions int n=0; for(int k=0; k<=nsteps; k++) { int cpos=spos+k*step; // increase i until x[i]>=cpos-mdist; remove x from sx; decrement n; while(ij) { j=i; } // increase j until x[j]>cpos while(j dist; for(int i=0;i ::const_iterator pi=dist.begin();pi!=dist.end();++pi) { i_nv[i++]=*pi; } UNPROTECT(1); return(nv); } // same as above, but for two different sets SEXP allxpdist(SEXP x_R,SEXP y_R, SEXP max_dist_R) { #ifdef DEBUG Rprintf("start\n"); #endif double* x=REAL(x_R); double* y=REAL(y_R); int nx=LENGTH(x_R); int ny=LENGTH(y_R); double max_dist=*REAL(max_dist_R); #ifdef DEBUG Rprintf("nx=%d; ny=%d; max_dist=%d\n",nx,ny,max_dist); #endif vector dist; int yi=0; // latest y start index for(int i=0;i=x[i]-max_dist_R while(y[yi]<(x[i]-max_dist) && yi ::const_iterator pi=dist.begin();pi!=dist.end();++pi) { i_nv[i++]=*pi; } UNPROTECT(1); return(nv); } // returns a vector giving for each point, // number of points within a given max_dist SEXP nwithindist(SEXP x_R,SEXP max_dist_R) { #ifdef DEBUG Rprintf("start\n"); #endif double* x=REAL(x_R); int nx=LENGTH(x_R); double max_dist=*REAL(max_dist_R); SEXP nv; PROTECT(nv=allocVector(REALSXP,nx)); double* i_nv=REAL(nv); for(int i=0;i > contigs; // running indecies (start and end) int si=0; int ei=0; // current window coordinate double ws=pos[0]; // current window tag counts int cc[2]={0,0}; if(nt>0) { cc[flag[si]]++; // increment window end while(ei<(nt-1) && (pos[ei+1]-ws) <= wsize) { ei++; cc[flag[ei]]++; } // cluster start,end positions double cs,ce; int inclust=0; while(si (pos[ei+1] - ws - wsize) && ei!=(nt-1)) { // move end boudnary ei++; ws=pos[ei]-wsize; cc[flag[ei]]++; while(ei<(nt-1) && pos[ei+1]==ws+wsize) { ei++; cc[flag[ei]]++; } // increment window start while(si<(nt-1) && pos[si] < ws) { cc[flag[si]]--; si++; } } else { // move up start boundary ws=pos[si+1]; cc[flag[si]]--; si++; while(si<(nt-1) && pos[si+1]==ws) { cc[flag[si]]--; si++; } // increment window end while(ei<(nt-1) && (pos[ei+1] - ws) <= wsize) { ei++; cc[flag[ei]]++; } } // calculate z score double dc0=((double)cc[0])+0.5; double dc1=((double)cc[1])+0.5; double rte=dc0+dc1-0.25*thr*thr; double lb; if(rte<=0) { lb=0; } else { lb=(sqrt(dc1*dc0) - 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr); if(lb<0) { lb=0; } lb*=lb; } //Rprintf("%f=f(%f,%f,%f); %f=f(%f,%f,%f)\n",lb,1.0-thr,2.0*dc1,2.0*dc0,ub,thr,2.0*dc1,2.0*dc0); #ifdef DEBUG //double ub=gsl_cdf_fdist_Qinv(thr,2.0*dc1,2.0*dc0)*dc1/dc0; double ub=(sqrt(dc1*dc0) + 0.5*thr*sqrt(rte))/(dc0 - 0.25*thr*thr); ub*=ub; Rprintf("s=%d (%f); e=%d (%f); window: %f-%f; cc=[%d,%d]; lb=%f; ub=%f\n",si,pos[si],ei,pos[ei],ws,ws+wsize,cc[0],cc[1],lb,ub); #endif int bc=lb>=bgm && cc[1]>=mintag; if(either) { bc=lb>=bgm || cc[1]>=mintag; } if(bc) { if(inclust) { double nce=ws+wsize/2.0; if(nce-ce > wsize/2.0) { // next point is too far removed, end cluster if(ce-cs >= mcs) { contigs.push_back(pair(cs,ce)); #ifdef DEBUG Rprintf("recorded cluster %f-%f\n",cs,ce); #endif } inclust=0; } else { ce=nce; } } else { inclust=1; cs=ws+wsize/2.0; ce=cs; } } else { if(inclust) { if(ce-cs >= mcs) { contigs.push_back(pair(cs,ce)); #ifdef DEBUG Rprintf("recorded cluster %f-%f\n",cs,ce); #endif } inclust=0; } } } if(inclust) { if(ce-cs >= mcs) { contigs.push_back(pair(cs,ce)); #ifdef DEBUG Rprintf("recorded cluster %f-%f\n",cs,ce); #endif } inclust=0; } } SEXP cs_R,ce_R; PROTECT(cs_R=allocVector(REALSXP,contigs.size())); PROTECT(ce_R=allocVector(REALSXP,contigs.size())); double* csa=REAL(cs_R); double* cea=REAL(ce_R); int i=0; for(vector< pair >::const_iterator ci=contigs.begin(); ci!=contigs.end();++ci) { csa[i]=ci->first; cea[i]=ci->second; i++; } SEXP ans_R, names_R; PROTECT(names_R = allocVector(STRSXP, 2)); SET_STRING_ELT(names_R, 0, mkChar("s")); SET_STRING_ELT(names_R, 1, mkChar("e")); PROTECT(ans_R = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans_R, 0, cs_R); SET_VECTOR_ELT(ans_R, 1, ce_R); setAttrib(ans_R, R_NamesSymbol, names_R); UNPROTECT(4); return(ans_R); } // finds intersection between a list of regions // the flag has +n/-n value, corresponding to the start/end of a segment in n-th regionset // max_val: 1 - report max overlapping value, -1: report min, 0 - don't look at values // returns: $s, $e, ($v) lists SEXP region_intersection(SEXP n_R,SEXP pos_R,SEXP flags_R,SEXP vals_R,SEXP max_val_R,SEXP union_R) { const int max_val=*INTEGER(max_val_R); const int unionr=*INTEGER(union_R); const int n=*INTEGER(n_R); double* pos=REAL(pos_R); int* flags=INTEGER(flags_R); double* val=REAL(vals_R); #ifdef DEBUG Rprintf("n=%d; npos=%d; max_val=%d\n",n,LENGTH(pos_R),max_val); #endif //int s[n]; // flag status for each set int *s=new int[n]; //double mv[n]; // max/min value of current clusters for(int i=0;i starts; vector ends; vector values; int start=-1; double mval=0; for(int i=0;i0) { s[std::abs(f)-1]++; } else { s[std::abs(f)-1]--; } if(max_val!=0 && val[i]*max_val > mval*max_val) { mval=val[i]; } // joined status int all; if(unionr) { all=0; for(int j=0;j0) { all=1; break;} } } else { all=1; for(int j=0;j0); } } //Rprintf("i=%d; s=[",i); //for(int j=0;j=0) { // in fragment if(!all) { // end fragment starts.push_back(pos[start]); ends.push_back(pos[i]); start=-1; if(max_val!=0) { values.push_back(mval); } #ifdef DEBUG Rprintf("recorded new fragment (s=%f,e=%f,v=%f);\n",pos[start],pos[i],mval); #endif } } else { // should a fragment be started? if(all) { start=i; if(max_val!=0) { mval=val[i]; } #ifdef DEBUG Rprintf("starting new fragment (s=%f,i=%d);\n",pos[start],i); #endif } } } SEXP cs_R,ce_R,cv_R; PROTECT(cs_R=allocVector(REALSXP,starts.size())); PROTECT(ce_R=allocVector(REALSXP,ends.size())); double* csa=REAL(cs_R); int i=0; for(vector::const_iterator ci=starts.begin(); ci!=starts.end(); ++ci) { csa[i]=*ci; i++; } csa=REAL(ce_R); i=0; for(vector::const_iterator ci=ends.begin(); ci!=ends.end(); ++ci) { csa[i]=*ci; i++; } if(max_val!=0) { PROTECT(cv_R=allocVector(REALSXP,values.size())); csa=REAL(cv_R); i=0; for(vector::const_iterator ci=values.begin(); ci!=values.end(); ++ci) { csa[i]=*ci; i++; } } SEXP ans_R, names_R; if(max_val!=0) { PROTECT(names_R = allocVector(STRSXP, 3)); SET_STRING_ELT(names_R, 0, mkChar("s")); SET_STRING_ELT(names_R, 1, mkChar("e")); SET_STRING_ELT(names_R, 2, mkChar("v")); PROTECT(ans_R = allocVector(VECSXP, 3)); SET_VECTOR_ELT(ans_R, 0, cs_R); SET_VECTOR_ELT(ans_R, 1, ce_R); SET_VECTOR_ELT(ans_R, 2, cv_R); } else { PROTECT(names_R = allocVector(STRSXP, 2)); SET_STRING_ELT(names_R, 0, mkChar("s")); SET_STRING_ELT(names_R, 1, mkChar("e")); PROTECT(ans_R = allocVector(VECSXP, 2)); SET_VECTOR_ELT(ans_R, 0, cs_R); SET_VECTOR_ELT(ans_R, 1, ce_R); } setAttrib(ans_R, R_NamesSymbol, names_R); if(max_val!=0) { UNPROTECT(5); } else { UNPROTECT(4); } //free-ing allocated space for array delete[] s; return(ans_R); } } spp/src/bed2vector.cpp0000644000176200001440000020503413473022026014411 0ustar liggesusers#include "pc.h" #include "config.h" #include #include #include #include #include #include #include #include #include #include #ifdef HAVE_LIBBZ2 #include #endif extern "C" { //#include "R.h" //#include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" // for getline #include /* flockfile, getc_unlocked, funlockfile */ #include /* malloc, realloc */ #include /* errno */ #include /* ssize_t */ ssize_t getline_local(char **lineptr, size_t *n, FILE *stream); } using namespace std; class lessAbsoluteValue { public: bool operator()(int a, int b) const { return abs(a) < abs(b); } }; #ifdef HAVE_LIBBZ2 int get_bzline(BZFILE* b,string& line) { //char c; char c='a'; //int nBuf; int bzerror=BZ_OK; while(bzerror == BZ_OK) { //nBuf=BZ2_bzRead(&bzerror, b, &c, 1); if(bzerror==BZ_OK) { if(c=='\n') { return bzerror; } else { line+=c; } } } return bzerror; } int get_a_line(FILE *f,BZFILE *b,int bz2file,string& line) { line=""; if(bz2file) { int bzerror=get_bzline(b,line); if(bzerror==BZ_OK) { return(1); } else { if(bzerror!=BZ_STREAM_END) { REprintf("encountered BZERROR=",bzerror); //Rcpp::Rcerr<<"encountered BZERROR="< > pos; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); ifstream bed_file(fname); #ifdef DEBUG Rprintf("opened %s\n",fname); #endif Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(getline(bed_file,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string chr=*sit++; //chr=chr.substr(3,strlen(chr.c_str())); string str_start=*sit++; int fstart=atoi(str_start.c_str()); string str_end=*sit++; int fend=atoi(str_end.c_str()); int fpos=fstart; if(sit!=tok.end()) { string u0=*sit++; string nfield=*sit++; string strand=*sit++; if(strand=="-") { fpos=-1*fend; } } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d\n",chr.c_str(),cind,fpos); if(fcount>30) { break; } #endif } } bed_file.close(); #ifdef DEBUG Rprintf("done. read %d fragments\n",fcount); #endif Rprintf("done. read %d fragments\n",fcount); // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { sort(csi->begin(), csi->end(), lessAbsoluteValue()); } SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { SEXP nv; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; int* i_nv=INTEGER(nv); int i=0; for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_nv[i++]=*pi; } SET_VECTOR_ELT(ans, csi-pos.begin(), nv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } SEXP read_meland_old(SEXP filename) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > poslen; // length // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); ifstream bed_file(fname); Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(getline(bed_file,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { sit++; sit++; string str_nm=*sit++; int nm=0; if(str_nm[0]=='U') { nm=atoi((str_nm.c_str()+1)); } else { continue; } sit++; sit++; sit++; string str_len=*sit++; int len=atoi(str_len.c_str()); string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str())); string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); poslen.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); (poslen[cind]).push_back(len); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } bed_file.close(); #ifdef DEBUG Rprintf("done. read %d fragments\n",fcount); #endif Rprintf("done. read %d fragments\n",fcount); // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi,lsi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); lsi=poslen.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 3)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SET_STRING_ELT(dnames_R, 2, mkChar("l")); SEXP tv,nv,lv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(lv=allocVector(INTSXP,csi->size())); np++; int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int* i_lv=INTEGER(lv); int i=0; vector::const_iterator ini=nsi->begin(); vector::const_iterator ili=lsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i_lv[i]=*ili++; i++; } PROTECT(dv = allocVector(VECSXP, 3)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); SET_VECTOR_ELT(dv, 2, lv); setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } int get_a_line(FILE *f,string& line) { line=""; char cline[1024]; if(fgets(cline,1024,f)) { line+=cline; return(1); } else { return(0); } } SEXP read_meland(SEXP filename,SEXP read_tag_names_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > poslen; // length vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string tagname=*sit++; sit++; string str_nm=*sit++; int nm=0; if(str_nm[0]=='U') { nm=atoi((str_nm.c_str()+1)); } else { continue; } sit++; sit++; sit++; string str_len=*sit++; int len=atoi(str_len.c_str()); string chr=*sit++; chr=chr.substr(3,strlen(chr.c_str())); string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); poslen.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); (poslen[cind]).push_back(len); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } fclose(f); #ifdef DEBUG Rprintf("done. read %d fragments\n",fcount); #endif Rprintf("done. read %d fragments\n",fcount); // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi,lsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); lsi=poslen.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SET_STRING_ELT(dnames_R, 2, mkChar("l")); if(read_names) { SET_STRING_ELT(dnames_R, 3, mkChar("s")); } SEXP tv,nv,lv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(lv=allocVector(INTSXP,csi->size())); np++; PROTECT(sv=allocVector(STRSXP,csi->size())); //add this here to avoid [-Wmaybe-uninitialized] if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int* i_lv=INTEGER(lv); int i=0; vector::const_iterator ini=nsi->begin(); vector::const_iterator ili=lsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i_lv[i]=*ili++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 3+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); SET_VECTOR_ELT(dv, 2, lv); if(read_names) { SET_VECTOR_ELT(dv, 3, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // reads regular eland files, recording mismatch positions SEXP read_eland_mismatches(SEXP filename) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > mm1; // position of the first mismatch (or 0 for none) vector< vector > mm2; // position of the second mismatch // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { sit++; string seq=*sit++; string str_nm=*sit++; //int nm=0; if(str_nm[0]=='U') { int nm=0; //insert variable declaration here nm=atoi((str_nm.c_str()+1)); } else { continue; } sit++; sit++; sit++; string chr=*sit++; // extract chromosome name from this int chrp=chr.find("chr"); int pp=chr.find('.'); chr=chr.substr(chrp+3,pp-chrp-3); string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); string strand=*sit++; int nstrand=0; if(strand=="R") { fpos=-1*(fpos+seq.size()-1); nstrand=1; } sit++; int nm1=0; int nm2=0; if(sit!=tok.end()) { string nms=*sit++; nm1=atoi(nms.substr(0,nms.size()-1).c_str()); if(nstrand) { nm1=seq.size()-nm1+1; } } if(sit!=tok.end()) { string nms=*sit++; nm2=atoi(nms.substr(0,nms.size()-1).c_str()); if(nstrand) { nm2=seq.size()-nm2+1; } } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); mm1.push_back(vector()); mm2.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (mm1[cind]).push_back(nm1); (mm2[cind]).push_back(nm2); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm1=%d, nm2=%d\n",chr.c_str(),cind,fpos,nm1,nm2); if(fcount>30) { break; } #endif } } fclose(f); #ifdef DEBUG Rprintf("done. read %d fragments\n",fcount); #endif Rprintf("done. read %d fragments\n",fcount); // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi,lsi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=mm1.begin()+(csi-pos.begin()); lsi=mm2.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 3)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("f")); SET_STRING_ELT(dnames_R, 2, mkChar("s")); SEXP tv,nv,lv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(lv=allocVector(INTSXP,csi->size())); np++; int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int* i_lv=INTEGER(lv); int i=0; vector::const_iterator ini=nsi->begin(); vector::const_iterator ili=lsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i_lv[i]=*ili++; i++; } PROTECT(dv = allocVector(VECSXP, 3)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); SET_VECTOR_ELT(dv, 2, lv); setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in regular eland files, adjusting the negative strand coordinate by sequence length SEXP read_eland(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); int eland_tag_length=*(INTEGER(eland_tag_length_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string tagname=*sit++; string sequence=*sit++; int len=sequence.size(); // adjust probe length if eland length limit was specified if(eland_tag_length>0 && len>eland_tag_length) { len=eland_tag_length; } string str_nm=*sit++; int nm=0; if(str_nm[0]=='U') { nm=atoi((str_nm.c_str()+1)); } else { continue; } sit++; sit++; sit++; string chr=*sit++; string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); string str_strand=*sit++; if(str_strand[0]=='R') { fpos=-1*(fpos+len-1); } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(sv=allocVector(STRSXP,csi->size())); //putting declaration after definition to avoid[-Wmaybe-uninitialized] if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in extended eland files, adjusting the negative strand coordinate by sequence length SEXP read_eland_extended(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); int eland_tag_length=*(INTEGER(eland_tag_length_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string machinename=*sit++; string runnumber=*sit++; string lanenumber=*sit++; *sit++; string str_x=*sit++; string str_y=*sit++; string tagname=machinename+"."+runnumber+"."+lanenumber+"."+str_x+"."+str_y; *sit++; *sit++; string sequence=*sit++; *sit++; string chr=*sit++; string contig=*sit++; chr=chr+contig; int len=sequence.size(); // adjust probe length if eland length limit was specified if(eland_tag_length>0 && len>eland_tag_length) { len=eland_tag_length; } string str_pos=*sit++; if(str_pos.size()<1) { continue; } int fpos=atoi(str_pos.c_str()); string str_strand=*sit++; if(str_strand[0]=='R') { fpos=-1*(fpos+len-1); } string str_nm=*sit++; // count non-digit characters int nm=0; int iloop=str_nm.size();//creating dummy variable for the loop below to avoid [-Wsign-compare] for(int i=0;i,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in eland multi files, adjusting the negative strand coordinate by sequence length SEXP read_eland_multi(SEXP filename,SEXP read_tag_names_R,SEXP eland_tag_length_R) { #ifdef DEBUG Rprintf("read_eland_muti() : start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); int eland_tag_length=*(INTEGER(eland_tag_length_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t",""); boost::char_separator comsep(",","",boost::keep_empty_tokens); boost::char_separator colsep(":","",boost::keep_empty_tokens); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int nline=0; int fcount=0; while(get_a_line(f,line)) { nline++; // chomp size_t elpos = line.find_last_not_of("\n"); if(elpos != string::npos) { line = line.substr(0, elpos+1); } #ifdef DEBUG Rprintf("line %d: %s\n",nline,line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string tagname=*sit++; string sequence=*sit++; string mspec=*sit++; // parse out match spec if(mspec=="NM" || mspec=="QC") { continue; } #ifdef DEBUG Rprintf("parsing out spec \"%s\" : ",mspec.c_str()); #endif tokType stok(mspec, colsep); tokType::iterator ssit=stok.begin(); string str_nm0=*ssit++; int nm=0; int nm0=atoi(str_nm0.c_str()); if(nm0>1) { #ifdef DEBUG Rprintf("rejected for nm0\n"); #endif continue; } if(nm0==0) { string str_nm1=*ssit++; int nm1=atoi(str_nm1.c_str()); if(nm1>1) { #ifdef DEBUG Rprintf("rejected for nm1\n"); #endif continue; } if(nm1==0) { string str_nm2=*ssit++; int nm2=atoi(str_nm2.c_str()); if(nm2>1) { #ifdef DEBUG Rprintf("rejected for nm2\n"); #endif continue; } nm=2; } else { nm=1; } } #ifdef DEBUG Rprintf("accepted (nm=%d)\n",nm); #endif int npos=0; string mpos=*sit++; vector mposc; vector mposp; tokType ptok(mpos, comsep); string prevchr; for(tokType::iterator psit=ptok.begin();psit!=ptok.end();psit++) { string cpos=*psit; npos++; int strand=1; if(cpos.size()<5) { Rprintf("ERROR: line=%d, match %d is too short: \"%s\"; ",nline,npos,cpos.c_str()); } char lc=cpos.at(cpos.size()-1); if(atoi(&lc)==nm) { switch(cpos.at(cpos.size()-2)) { case 'R': strand=-1; break; case 'F': strand=1; break; default: Rprintf("ERROR: line=%d, match %d specifies an invalid strand %c\n",nline,npos,cpos.at(cpos.size()-2)); break; continue; } string chr,str_pos; size_t colpos=cpos.find(":"); if(colpos==string::npos) { if(npos>1) { chr=prevchr; str_pos=cpos.substr(0,cpos.size()-2); } else { Rprintf("ERROR: line=%d, match %d does not contain chromosome separator: \"%s\"\n",nline,npos,cpos.c_str()); continue; } } else { chr=cpos.substr(0,colpos); str_pos=cpos.substr(colpos+1,cpos.size()-3-colpos); } #ifdef DEBUG Rprintf("\"%s\" : chr=%s, pos=%s, strand=%d\n",cpos.c_str(),chr.c_str(),str_pos.c_str(),strand); #endif int pos=strand*atoi(str_pos.c_str()); mposc.push_back(chr); mposp.push_back(pos); } } string chr; int fpos; if(mposc.size()!=1) { if(mposc.size()==0) { Rprintf("ERROR: line=%d: no %d-mismatch matches were found in \"%s\"\n",nline,nm,mpos.c_str()); } else { Rprintf("ERROR: line=%d: more than one (%d) %d-mismatch matches were found in \"%s\"\n",nline,mposc.size(),nm,mpos.c_str()); } continue; } else { chr=*mposc.begin(); fpos=*mposp.begin(); } int len=sequence.size(); // adjust probe length if eland length limit was specified if(eland_tag_length>0 && len>eland_tag_length) { len=eland_tag_length; } if(fpos<0) { fpos=-1*(-1*fpos+len-1); } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in regular eland files, adjusting the negative strand coordinate by sequence length SEXP read_bowtie(SEXP filename,SEXP read_tag_names_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); boost::char_separator sep2(","); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { #ifdef HAVE_LIBBZ2 BZFILE* b=0; int bzerror; int bz2file=0; if(strstr(fname,".bz2")) { bz2file=1; b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0); if (bzerror != BZ_OK) { Rprintf("bzerror=",bzerror); } } #endif Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; #ifdef HAVE_LIBBZ2 while(get_a_line(f,b,bz2file,line)) { #else while(get_a_line(f,line)) { #endif #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string tagname=*sit++; string str_strand=*sit++; string chr=*sit++; string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); string sequence=*sit++; sit++; sit++; string mm=*sit++; int len=sequence.size(); if(str_strand[0]=='-') { fpos=-1*(fpos+len-1); } // determine number of mismatches int nm=0; if(mm.size()>0) { nm++; string::size_type tp(0); while(tp!=string::npos) { tp = mm.find(",",tp); if(tp!=string::npos) { tp++; ++nm; } } } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } #ifdef HAVE_LIBBZ2 BZ2_bzReadClose( &bzerror, b); #endif fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in helicos tab-separated alignment output (regular or bz2) SEXP read_helicostabf(SEXP filename,SEXP read_tag_names_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > poslen; // length of the match vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); boost::char_separator sep2(","); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { #ifdef HAVE_LIBBZ2 BZFILE* b=0; int bzerror; int bz2file=0; if(strstr(fname,".bz2")) { bz2file=1; b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0); if (bzerror != BZ_OK) { Rprintf("bzerror=",bzerror); } } #endif Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; int nlines=0; #ifdef HAVE_LIBBZ2 while(get_a_line(f,b,bz2file,line)) { #else while(get_a_line(f,line)) { #endif #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif nlines++; // skip comments if(line[0]=='#') { continue; } if(line.compare(0,12,"Reference_ID")==0) { #ifdef DEBUG Rprintf("matched header on line %d\n",nlines); #endif continue; } tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string chr=*sit++; string tagname=*sit++; string str_startpos=*sit++; string str_endpos=*sit++; string str_tstart=*sit++; string str_tend=*sit++; int len=atoi(str_tend.c_str())-atoi(str_tstart.c_str()); sit++; sit++; string str_ndel=*sit++; string str_nins=*sit++; string str_nsub=*sit++; string str_strand=*sit++; int fpos; if(str_strand[0]=='-') { fpos=-1*atoi(str_endpos.c_str()); } else { fpos=atoi(str_startpos.c_str()); } // determine number of mismatches int nm=atoi(str_ndel.c_str())+atoi(str_nins.c_str())+atoi(str_nsub.c_str()); // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); poslen.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); (poslen[cind]).push_back(len); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d\n",chr.c_str(),cind,fpos,nm); if(fcount>30) { break; } #endif } } #ifdef HAVE_LIBBZ2 BZ2_bzReadClose( &bzerror, b); #endif fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator lsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); lsi=poslen.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 3+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SET_STRING_ELT(dnames_R, 2, mkChar("l")); if(read_names) { SET_STRING_ELT(dnames_R, 3, mkChar("s")); } SEXP tv,nv,lv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(lv=allocVector(INTSXP,csi->size())); np++; if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int* i_lv=INTEGER(lv); int i=0; vector::const_iterator ini=nsi->begin(); vector::const_iterator lni=lsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i_lv[i]=*lni++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 3+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); SET_VECTOR_ELT(dv, 2, lv); if(read_names) { SET_VECTOR_ELT(dv, 3, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in text version of maq map SEXP read_maqmap(SEXP filename,SEXP read_tag_names_R) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); int read_names=*(INTEGER(read_tag_names_R)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > tagnames; // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep("\t","",boost::keep_empty_tokens); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string tagname=*sit++; string chr=*sit++; string str_pos=*sit++; int fpos=atoi(str_pos.c_str()); string str_strand=*sit++; sit++; sit++; sit++; sit++; sit++; string str_nm=*sit++; sit++; sit++; sit++; string str_len=*sit++; int nm=atoi(str_nm.c_str()); int len=atoi(str_len.c_str()); if(str_strand[0]=='-') { fpos=-1*(fpos+len-1); } // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); if(read_names) { tagnames.push_back(vector()); } #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); if(read_names) { (tagnames[cind]).push_back(tagname); } #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d, nm=%d, len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator ssi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2+read_names)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); if(read_names) { SET_STRING_ELT(dnames_R, 2, mkChar("s")); } SEXP tv,nv,sv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; if(read_names) { PROTECT(sv=allocVector(STRSXP,csi->size())); np++; } int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } if(read_names) { int i=0; ssi=tagnames.begin()+(csi-pos.begin()); for(vector::const_iterator si=ssi->begin();si!=ssi->end();++si) { SET_STRING_ELT(sv,i,mkChar(si->c_str())); i++; } } PROTECT(dv = allocVector(VECSXP, 2+read_names)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); if(read_names) { SET_VECTOR_ELT(dv, 2, sv); } setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // read in tagalign file SEXP read_tagalign(SEXP filename) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; while(get_a_line(f,line)) { #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string chr=*sit++; string str_spos=*sit++; string str_epos=*sit++; sit++; string str_qual=*sit++; string str_strand=*sit; int fpos; if(str_strand[0]=='+') { fpos=atoi(str_spos.c_str()); } else { fpos=-1*atoi(str_epos.c_str()); } int nm=atoi(str_qual.c_str()); // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm); if(fcount>30) { break; } #endif } } fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SEXP tv,nv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } PROTECT(dv = allocVector(VECSXP, 2)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // arachne madness SEXP read_arachne(SEXP filename) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { #ifdef HAVE_LIBBZ2 BZFILE* b=0; int bzerror; int bz2file=0; if(strstr(fname,".bz2")) { bz2file=1; b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0); if (bzerror != BZ_OK) { Rprintf("bzerror=",bzerror); } } #endif Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; #ifdef HAVE_LIBBZ2 while(get_a_line(f,b,bz2file,line)) { #else while(get_a_line(f,line)) { #endif #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string chr=*sit++; string str_spos=*sit++; int nm=0; if(sit!=tok.end()) { string str_mm=*sit; nm=atoi(str_mm.c_str()); } int fpos=atoi(str_spos.c_str());; // determine the chromosome index unordered_map,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d\n",chr.c_str(),cind,fpos,nm); if(fcount>30) { break; } #endif } } #ifdef HAVE_LIBBZ2 BZ2_bzReadClose( &bzerror, b); #endif fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 2)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SEXP tv,nv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int i=0; vector::const_iterator ini=nsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i++; } PROTECT(dv = allocVector(VECSXP, 2)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } // arachne madness SEXP read_arachne_long(SEXP filename) { #ifdef DEBUG Rprintf("start\n"); #endif const char* fname=CHAR(asChar(filename)); #ifdef DEBUG Rprintf("fname=%s\n",fname); #endif // main data vector // chr - pos vector< vector > pos; vector< vector > posnm; // number of mismatches vector< vector > poslen; // length of the match // chromosome map unordered_map,equal_to > cind_map; vector cnames; typedef boost::tokenizer > tokType; boost::char_separator sep(" \t"); FILE *f=fopen(fname,"rb"); if (!f) { Rprintf("can't open input file \"",fname,"\"\n"); } else { #ifdef HAVE_LIBBZ2 BZFILE* b=0; int bzerror; int bz2file=0; if(strstr(fname,".bz2")) { bz2file=1; b=BZ2_bzReadOpen (&bzerror, f, 0, 0, NULL, 0); if (bzerror != BZ_OK) { Rprintf("bzerror=",bzerror); } } #endif Rprintf("opened %s\n",fname); // read in bed line string line; int fcount=0; #ifdef HAVE_LIBBZ2 while(get_a_line(f,b,bz2file,line)) { #else while(get_a_line(f,line)) { #endif #ifdef DEBUG Rprintf("line: %s\n",line.c_str()); #endif tokType tok(line, sep); tokType::iterator sit=tok.begin(); if(sit!=tok.end()) { string query=*sit++; if(query!="QUERY") { continue; } *sit++; *sit++; *sit++; *sit++; string str_strand=*sit++; string chr=*sit++; string str_startpos=*sit++; string str_endpos=*sit++; int fpos; if(str_strand[0]=='1') { fpos=-1*atoi(str_endpos.c_str()); } else { fpos=atoi(str_startpos.c_str()); } #ifdef DEBUG Rprintf("chr=%s, fpos=%d\n",chr.c_str(),fpos); #endif *sit++; string str_nblocks=*sit++; int nblocks=atoi(str_nblocks.c_str()); #ifdef DEBUG Rprintf("nblocks=%d\n",nblocks); #endif // tally up the read length and the number of mismatches for all blocks int len=0; int nm=0; for(int i=0;i,equal_to >::const_iterator li=cind_map.find(chr); int cind=-1; if(li==cind_map.end()) { // register new chromosome cind=cnames.size(); cnames.push_back(chr); cind_map[chr]=cind; // allocate new pos vector pos.push_back(vector()); posnm.push_back(vector()); poslen.push_back(vector()); #ifdef DEBUG Rprintf("registered new chromosome %s with cind=%d, pos.size=%d\n",chr.c_str(),cind,pos.size()); #endif } else { cind=li->second; } fcount++; (pos[cind]).push_back(fpos); (posnm[cind]).push_back(nm); (poslen[cind]).push_back(len); #ifdef DEBUG Rprintf("read in position chr=%s cind=%d fpos=%d nm=%d len=%d\n",chr.c_str(),cind,fpos,nm,len); if(fcount>30) { break; } #endif } } #ifdef HAVE_LIBBZ2 BZ2_bzReadClose( &bzerror, b); #endif fclose(f); Rprintf("done. read %d fragments\n",fcount); } // construct output structures SEXP chnames; int np=0; // number of protections PROTECT(chnames = allocVector(STRSXP, cnames.size())); for(vector::const_iterator csi=cnames.begin();csi!=cnames.end();++csi) { SET_STRING_ELT(chnames, csi-cnames.begin(), mkChar(csi->c_str())); } np++; // sort //for(vector >::iterator csi=pos.begin();csi!=pos.end();++csi) { // sort(csi->begin(), csi->end(), lessAbsoluteValue()); //} SEXP ans; PROTECT(ans = allocVector(VECSXP, cnames.size())); np++; vector >::const_iterator nsi; vector >::const_iterator lsi; for(vector >::const_iterator csi=pos.begin();csi!=pos.end();++csi) { nsi=posnm.begin()+(csi-pos.begin()); lsi=poslen.begin()+(csi-pos.begin()); SEXP dv,dnames_R; PROTECT(dnames_R = allocVector(STRSXP, 3)); np++; SET_STRING_ELT(dnames_R, 0, mkChar("t")); SET_STRING_ELT(dnames_R, 1, mkChar("n")); SET_STRING_ELT(dnames_R, 2, mkChar("l")); SEXP tv,nv,lv; PROTECT(tv=allocVector(INTSXP,csi->size())); np++; PROTECT(nv=allocVector(INTSXP,csi->size())); np++; PROTECT(lv=allocVector(INTSXP,csi->size())); np++; int* i_tv=INTEGER(tv); int* i_nv=INTEGER(nv); int* i_lv=INTEGER(lv); int i=0; vector::const_iterator ini=nsi->begin(); vector::const_iterator lni=lsi->begin(); for(vector ::const_iterator pi=csi->begin();pi!=csi->end();++pi) { i_tv[i]=*pi; i_nv[i]=*ini++; i_lv[i]=*lni++; i++; } PROTECT(dv = allocVector(VECSXP, 3)); np++; SET_VECTOR_ELT(dv, 0, tv); SET_VECTOR_ELT(dv, 1, nv); SET_VECTOR_ELT(dv, 2, lv); setAttrib(dv, R_NamesSymbol, dnames_R); SET_VECTOR_ELT(ans, csi-pos.begin(), dv); } setAttrib(ans,R_NamesSymbol,chnames); #ifdef DEBUG Rprintf("unprotecting %d elements\n",np); #endif UNPROTECT(np); return(ans); } } /* PASTE REMAINDER AT BOTTOM OF FILE */ ssize_t getline_local(char **linep, size_t *np, FILE *stream) { char *p = NULL; size_t i = 0; if (!linep || !np) { errno = EINVAL; return -1; } if (!(*linep) || !(*np)) { *np = 120; *linep = (char *)malloc(*np); if (!(*linep)) { return -1; } } flockfile(stream); p = *linep; for (int ch = 0; (ch = getc_unlocked(stream)) != EOF;) { if (i >= *np) { /* Grow *linep. */ size_t m = *np * 2; char *s = (char *)realloc(*linep, m); if (!s) { int error = errno; funlockfile(stream); errno = error; return -1; } *linep = s; *np = m; p = *linep; } p[i] = ch; if ('\n' == ch) break; i += 1; } funlockfile(stream); /* Null-terminate the string. */ if (i >= *np) { /* Grow *linep. */ size_t m = *np * 2; char *s = (char *)realloc(*linep, m); if (!s) { return -1; } *linep = s; *np = m; p = *linep; } p[i + 1] = '\0'; return ((i > 0)? i : -1); } spp/src/api_global.h0000644000176200001440000000140613471426636014122 0ustar liggesusers// *************************************************************************** // api_global.h (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- // Last modified: 19 November 2010 (DB) // --------------------------------------------------------------------------- // Provides macros for exporting & importing BamTools API library symbols // *************************************************************************** #ifndef API_GLOBAL_H #define API_GLOBAL_H #include "bamtools_global.h" #ifdef BAMTOOLS_API_LIBRARY # define API_EXPORT BAMTOOLS_LIBRARY_EXPORT #else # define API_EXPORT BAMTOOLS_LIBRARY_IMPORT #endif #endif // API_GLOBAL_H spp/src/wdl.cpp0000644000176200001440000004011113471602305013133 0ustar liggesusers#include #include #include #include #include #include #include extern "C" { //#include "R.h" //#include "Rmath.h" #include "Rinternals.h" #include "Rdefines.h" } using namespace std; //#define DEBUG 1 extern "C" { /************************************************************************/ /* * lwcc - calculate local window cross-correlation */ SEXP spp_lwcc(SEXP x_R, // positive strand hist SEXP y_R, // negative strand hist of the same length SEXP osize_R, // outer boundary distance SEXP isize_R, // inner boundary distance SEXP return_peaks_R, // whether all correlation values, or just peaks should be returned SEXP min_peak_dist_R, // distance between closest peaks SEXP min_peak_val_R, // min peak threshold SEXP tag_weight_R, // tag weight SEXP bg_subtract_R, // a flag whether do background subtractio SEXP bgp_R, // optional background hist for positive strand SEXP bgn_R, // optional background hist for negative strand SEXP bg_wsize_R, // window size for the background counts SEXP bg_weight_R, // optional weighting for the background tags, must compensate for window size difference (including is cutout) SEXP round_up_R // whether to round up fractional signal tag counts ) { #ifdef DEBUG Rprintf("start\n"); #endif int is=INTEGER(isize_R)[0]; int os=INTEGER(osize_R)[0]; double rs=((double)(2*os+1)); int* x=INTEGER(x_R); int* y=INTEGER(y_R); int n_x=LENGTH(x_R); // background-related int* bgp=INTEGER(bgp_R); int* bgn=INTEGER(bgn_R); int bg_whs=INTEGER(bg_wsize_R)[0]; int return_peaks=*(INTEGER(return_peaks_R)); double min_peak_val=*(REAL(min_peak_val_R)); int min_peak_dist=*(INTEGER(min_peak_dist_R)); double tag_weight=*(REAL(tag_weight_R)); const int round_up=*(INTEGER(round_up_R)); const int bg_subtract=*(INTEGER(bg_subtract_R)); const double bg_weight=*(REAL(bg_weight_R)); int i; // point at which the value is being calculated int start=os; int end=n_x-os-1; // bg tag counts within bg window int bg_pn1=0; int bg_nn1=0; int bg_pn2=0; int bg_nn2=0; // illustration for counting: // // 012345678901234567890123456789012 // ==========------|------========== // // osize=16; isize=6; SEXP nv; //UNPROTECT(nv=allocVector(REALSXP,n_x)); //putting declaration also here to avoid[-Wmaybe-uninitialized] nv=allocVector(REALSXP,n_x); //putting declaration also here to avoid[-Wmaybe-uninitialized] double *d_nv; vector ppos; vector pval; if(!return_peaks) { //UNPROTECT(nv=allocVector(REALSXP,n_x)); nv=allocVector(REALSXP,n_x); d_nv=REAL(nv); for(int i=0;i=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr= start) { // update counts, taking into account masked out regions pn1=pn2=nn1=nn2=0; for(int k=0;k<=(os-is);k++) { int xp1=x[i-os+k]; int xp2=x[i+os-k]; int xn1=y[i+os-k]; int xn2=y[i-os+k]; if(xp1!=-1 && xn1!=-1) { pn1+=xp1; nn1+=xn1; } if(xp2!=-1 && xn2!=-1) { pn2+=xp2; nn2+=xn2; } } // calculate the means double mp=((double)(pn1+pn2))/rs; double mn=((double)(nn1+nn2))/rs; #ifdef DEBUG Rprintf("mp=%f mn=%f\n",mp,mn); #endif // calculate correlation double varp=0; double varn=0; double num=0; double val=-1e3; if((mp>0) & (mn>0)) { for(int k=0;k<=(os-is);k++) { int xp1=x[i-os+k]; int xp2=x[i+os-k]; int xn1=y[i+os-k]; int xn2=y[i-os+k]; if(xp1!=-1 && xn1!=-1) { double nnp1=((double) xp1)-mp; double nnn1=((double) xn1)-mn; num+=nnp1*nnn1; varp+=nnp1*nnp1; varn+=nnn1*nnn1; } if(xp2!=-1 && xn2!=-1) { double nnp2=((double) xp2)-mp; double nnn2=((double) xn2)-mn; num+=nnp2*nnn2; varp+=nnp2*nnp2; varn+=nnn2*nnn2; } } double tagw; double spn1=((double)pn1)*tag_weight; double snn1=((double)nn1)*tag_weight; double spn2=((double)pn2)*tag_weight; double snn2=((double)nn2)*tag_weight; if(round_up) { if(pn1>0 && spn1<1) { spn1=1.0; } //if(pn2>0 && spn2<1) { spn2=1.0; } if(nn1>0 && snn1<1) { snn1=1.0; } //if(nn2>0 && snn2<1) { snn2=1.0; } } if(bg_subtract) { spn1-=((double)bg_pn1)*bg_weight; snn1-=((double)bg_nn2)*bg_weight; spn2-=((double)bg_pn2)*bg_weight; snn2-=((double)bg_nn1)*bg_weight; if(spn2<0) spn2=0; if(snn2<0) snn2=0; if(spn1>0 && snn1>0) { tagw=(2.0*sqrt(spn1*snn1)-(spn2+snn2+1.0)); } else { tagw=-(spn2+snn2+1.0); } //cout<<"bg_pn1="<min_peak_val && ppv>val && ppv>pppv) { if(lpp>0 && (i-lpp+1)>min_peak_dist) { // record previous peak position ppos.push_back(lpp); pval.push_back(lpv); #ifdef DEBUG Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp)); #endif lpp=i-1; lpv=ppv; #ifdef DEBUG Rprintf("updated peak to x=%d y=%f\n",lpp,lpv); #endif } else { if(ppv>lpv) { // update last peak positions #ifdef DEBUG Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv); #endif lpp=i-1; lpv=ppv; } } } // update previous values if(val!=ppv) { pppv=ppv; ppv=val; } } else { d_nv[i]=val; } } } if(return_peaks) { // record last position if(lpp>0) { #ifdef DEBUG Rprintf("recording last peak x=%d y=%f\n",lpp,lpv); #endif ppos.push_back(lpp); pval.push_back(lpv); } SEXP rpp_R,rpv_R; //UNPROTECT(rpp_R=allocVector(INTSXP,ppos.size())); //UNPROTECT(rpv_R=allocVector(REALSXP,ppos.size())); rpp_R=allocVector(INTSXP,ppos.size()); rpv_R=allocVector(REALSXP,ppos.size()); int* rpp=INTEGER(rpp_R); double* rpv=REAL(rpv_R); int iloop=ppos.size(); //creating dummy variable for the loop below to avoid [-Wsign-compare] for(int i=0;i ppos; vector pval; if(!return_peaks) { //UNPROTECT(nv=allocVector(REALSXP,n_x)); nv=allocVector(REALSXP,n_x); d_nv=REAL(nv); for(int i=0;i=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr=0) { bg_pn1-=bgp[nl]; bg_nn1-=bgn[nl]; } bg_pn1+=bgp[i]; bg_nn1+=bgn[i]; if(i>0) { bg_pn2-=bgp[i-1]; bg_nn2-=bgn[i-1]; } int nr=i+bg_whs; if(nr0 && spn1<1) { spn1=1.0; } //if(pn2>0 && spn2<1) { spn2=1.0; } //if(nn1>0 && snn1<1) { snn1=1.0; } if(nn2>0 && snn2<1) { snn2=1.0; } } if(direct_count) { val=spn1+snn2; if(round_up && val<1) { val=1.0; } if(bg_subtract) { val-=((double) (bg_pn1+bg_nn2))*bg_weight; } } else { if(bg_subtract) { spn1-=((double)bg_pn1)*bg_weight; snn1-=((double)bg_nn1)*bg_weight; spn2-=((double)bg_pn2)*bg_weight; snn2-=((double)bg_nn2)*bg_weight; if(spn2<0) spn2=0; if(snn1<0) snn1=0; if(spn1>0 && snn2>0) { val=(2.0*sqrt(spn1*snn2)-(spn2+snn1+1.0)); } else { val=-(spn2+snn1+1.0); } } else { val=2.0*sqrt(spn1*snn2)-(spn2+snn1+tag_weight); } } //double val=sqrt(pn1*nn2); //if(pn2>nn1) { val-=pn2; } else { val-=pn1; } #ifdef DEBUG Rprintf("update: i=%d pn1=%d pn2=%d nn1=%d nn2=%d val=%f\n",i,pn1,pn2,nn1,nn2,val); #endif if(return_peaks) { // determine if previous position was a peak if(ppv>min_peak_val && ppv>val && ppv>pppv) { if(lpp>0 && (i-lpp+1)>min_peak_dist) { // record previous peak position ppos.push_back(lpp); pval.push_back(lpv); #ifdef DEBUG Rprintf("recording peak x=%d y=%f d=%d\n",lpp,lpv,(i-lpp)); #endif if(ppl!=-1 && ppl!=i-1) { lpp=(int) round((ppl+i-1)/2); } else { lpp=i-1; } lpv=ppv; #ifdef DEBUG Rprintf("updated peak to x=%d y=%f\n",lpp,lpv); #endif } else { if(ppv>lpv) { // update last peak positions #ifdef DEBUG Rprintf("skipping peak x=%d y=%f d=%d in favor of x=%d y=%f\n",lpp,lpv,(i-lpp),i-1,ppv); #endif if(ppl!=-1 && ppl!=i-1) { lpp=(int) round((ppl+i-1)/2); } else { lpp=i-1; } lpv=ppv; } } } // update previous values if(val!=ppv) { pppv=ppv; ppv=val; ppl=i; } } else { d_nv[i]=val; } } if(return_peaks) { // record last position if(lpp>0) { #ifdef DEBUG Rprintf("recording last peak x=%d y=%f\n",lpp,lpv); #endif ppos.push_back(lpp); pval.push_back(lpv); } SEXP rpp_R,rpv_R; //UNPROTECT(rpp_R=allocVector(INTSXP,ppos.size())); //UNPROTECT(rpv_R=allocVector(REALSXP,ppos.size())); rpp_R=allocVector(INTSXP,ppos.size()); rpv_R=allocVector(REALSXP,ppos.size()); int* rpp=INTEGER(rpp_R); double* rpv=REAL(rpv_R); int iloop=ppos.size(); //creating dummy variable for the loop below to avoid [-Wsign-compare] for(int i=0;i #include "R.h" #include "Rmath.h" #include "Rinternals.h" #undef DEBUG // dout is npos-length output array. // n - number of positions in pos (and length of tc count array) // spos - starting position void cdensum(int *n, double *pos, double *tc, double *spos, int *bw,int *dw, int *npos, int *step,double *dout) { int i,j; //double epos= *spos + ((double) *npos); double dbw=(double) *bw; for(i = 0; i< *n; i++) { // size of the window to which the contributions should be added int in=(int) (pos[i]- *spos); int ic=tc[i]; int whs=(*dw)*(*bw)*ic; int ws=(int) floor((in-whs)/(*step)); int we=(int) ceil((in+whs)/(*step)); if(ws<0) { ws=0; } if(we>= *npos) { we= *npos -1; } for(j=ws;j= npos) { we= npos -1; } for(j=ws;j(nw-2)) { Rprintf("-i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc); } // advance end if needed double ep=wpos[i]+whs; while(ce(nw-2)) { Rprintf("+i=%d; cs=%d, ce=%d; ctc=%d\n",i,cs,ce,ctc); } } UNPROTECT(1); return(tc_R); } spp/src/maqmap.h0000644000176200001440000000326013471426636013305 0ustar liggesusers#ifndef MAQMAP_H_ #define MAQMAP_H_ #ifdef MAQ_LONGREADS # define MAX_READLEN 128 #else # define MAX_READLEN 64 #endif #define MAX_NAMELEN 36 #define MAQMAP_FORMAT_OLD 0 #define MAQMAP_FORMAT_NEW -1 #define PAIRFLAG_FF 0x01 #define PAIRFLAG_FR 0x02 #define PAIRFLAG_RF 0x04 #define PAIRFLAG_RR 0x08 #define PAIRFLAG_PAIRED 0x10 #define PAIRFLAG_DIFFCHR 0x20 #define PAIRFLAG_NOMATCH 0x40 #define PAIRFLAG_SW 0x80 #include #include #include "const.h" /* name: read name size: the length of the read seq: read sequence (see also below) seq[MAX_READLEN-1]: single end mapping quality (equals to map_qual if not paired) map_qual: the final mapping quality alt_qual: the lower quality of the two ends (equals to map_qual if not paired) flag: status of the pair dist: offset of the mate (zero if not paired) info1: mismatches in the 24bp (higher 4 bits) and mismatches (lower 4 bits) info2: sum of errors of the best hit c[2]: count of all 0- and 1-mismatch hits on the reference */ typedef struct { bit8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */ bit8_t size, map_qual, info1, info2, c[2], flag, alt_qual; bit32_t seqid, pos; int dist; char name[MAX_NAMELEN]; } maqmap1_t; typedef struct { int format, n_ref; char **ref_name; bit64_t n_mapped_reads; maqmap1_t *mapped_reads; } maqmap_t; #define maqmap_read1(fp, m1) gzread((fp), (m1), sizeof(maqmap1_t)) #ifdef __cplusplus extern "C" { #endif maqmap_t *maq_new_maqmap(); void maq_delete_maqmap(maqmap_t *mm); void maqmap_write_header(gzFile fp, const maqmap_t *mm); maqmap_t *maqmap_read_header(gzFile fp); #ifdef __cplusplus } #endif #endif spp/src/spp_init.c0000644000176200001440000000633413471603636013653 0ustar liggesusers#include #include #include // for NULL #include /* The following symbols/expressions for .NAME have been omitted callfunction Most likely possible values need to be added below. */ /* FIXME: Check these declarations against the C/Fortran source code. */ /* .C calls */ extern void cdensum(void *, void *, void *, void *, void *, void *, void *, void *, void *); /* .Call calls */ extern SEXP ccdensum(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP cwindow_n_tags(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP cwindow_n_tags_around(SEXP, SEXP, SEXP, SEXP); extern SEXP find_poisson_enrichment_clusters(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP get_relative_coordinates(SEXP, SEXP, SEXP); extern SEXP spp_lwcc(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP points_withinC(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP read_arachne(SEXP); extern SEXP read_arachne_long(SEXP); extern SEXP read_binmaqmap(SEXP, SEXP); extern SEXP read_bowtie(SEXP, SEXP); extern SEXP read_helicostabf(SEXP, SEXP); extern SEXP read_maqmap(SEXP, SEXP); extern SEXP read_meland(SEXP, SEXP); extern SEXP read_tagalign(SEXP); extern SEXP region_intersection(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP spp_wtd(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CMethodDef CEntries[] = { {"cdensum", (DL_FUNC) &cdensum, 9}, {NULL, NULL, 0} }; static const R_CallMethodDef CallEntries[] = { {"ccdensum", (DL_FUNC) &ccdensum, 7}, {"cwindow_n_tags", (DL_FUNC) &cwindow_n_tags, 5}, {"cwindow_n_tags_around", (DL_FUNC) &cwindow_n_tags_around, 4}, {"find_poisson_enrichment_clusters", (DL_FUNC) &find_poisson_enrichment_clusters, 8}, {"get_relative_coordinates", (DL_FUNC) &get_relative_coordinates, 3}, {"spp_lwcc", (DL_FUNC) &spp_lwcc, 14}, {"points_withinC", (DL_FUNC) &points_withinC, 6}, {"read_arachne", (DL_FUNC) &read_arachne, 1}, {"read_arachne_long", (DL_FUNC) &read_arachne_long, 1}, {"read_binmaqmap", (DL_FUNC) &read_binmaqmap, 2}, {"read_bowtie", (DL_FUNC) &read_bowtie, 2}, {"read_helicostabf", (DL_FUNC) &read_helicostabf, 2}, {"read_maqmap", (DL_FUNC) &read_maqmap, 2}, {"read_meland", (DL_FUNC) &read_meland, 2}, {"read_tagalign", (DL_FUNC) &read_tagalign, 1}, {"region_intersection", (DL_FUNC) ®ion_intersection, 6}, {"spp_wtd", (DL_FUNC) &spp_wtd, 15}, {NULL, NULL, 0} }; void R_init_spp(DllInfo *dll) { R_registerRoutines(dll, CEntries, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } spp/NAMESPACE0000644000176200001440000000442613471602450012303 0ustar liggesusersimport(caTools, parallel, graphics, stats, Rcpp) importFrom("utils", "installed.packages", "write.table") #export(t.find.min.saturated.enr) #export(points_withinFunction) export(points_within) #export(t.plotavcc) #export(t.plotavccl) #export(t.plotcc) #export(t.plotchrcc) #export(t.plotchrccl) #export(t.precalculate.ref.peak.agreement) #export(window.chr.call.mirror.binding) #export(window.call.mirror.binding) #export(window.tag.count) #export(window.tag.count.around) #export(lwcc) #export(wtd) export(read.eland.tags) export(read.tagalign.tags) export(read.short.arachne.tags) export(read.arachne.tags) export(read.bowtie.tags) export(read.bam.tags) export(read.helicos.tags) export(read.maqmap.tags) export(read.bin.maqmap.tags) export(read.meland.tags) # export(remove.tag.anomalies) export(remove.local.tag.anomalies) export(get.binding.characteristics) export(select.informative.tags) export(find.binding.positions) export(get.smoothed.tag.density) export(get.smoothed.enrichment.mle) export(get.smoothed.enrichment.mle2) export(get.conservative.fold.enrichment.profile) export(get.conservative.fold.enrichment.profile2) export(writewig) export(get.mser) export(get.mser.interpolation) export(output.binding.results) # export(tag.scc) # export(show.scc) # export(find.significantly.enriched.regions) # export(tag.enrichment.clusters) # export(lwcc.prediction) # export(tag.wtd) # export(tag.block.shuffle) # export(tag.lwcc) # export(generate.randomized.data) # export(determine.lwcc.threshold) # export(get.relative.coordinates) # export(get.eval.fdr.vectors) # export(filter.binding.sites) # export(get.subsample.chain.calls) # export(mser.chain.interpolation) # export(chain.to.reference.comparison) # export(calculate.enrichment.estimates) # export(dataset.density.ratio) # export(dataset.density.size) # export(old.dataset.density.ratio) export(densum) # export(filter.singular.positions.by.local.density) # export(mbs.enrichment.bounds) # export(binomial.proportion.ratio.bounds) # export(write.probe.wig) # export(regionset.intersection.c) export(add.broad.peak.regions) export(write.narrowpeak.binding) export(get.broad.enrichment.clusters) export(write.broadpeak.info) # export(get.clusters2) useDynLib(spp,.registration = TRUE) spp/R/0000755000176200001440000000000013471426636011271 5ustar liggesusersspp/R/zroutines.R0000644000176200001440000030643213471604077013463 0ustar liggesusers#library(caTools) #dyn.load("src/bed2vector.so"); #dyn.load("src/wdl.so"); #dyn.load("src/peaks.so"); #dyn.load("src/cdensum.so"); # -------- ROUTINES FOR READING IN THE DATA FILES ------------ # fix.chromosome.names : remove ".fa" suffix from match sequence names read.eland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T,max.eland.tag.length=-1,extended=F,multi=F) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; storage.mode(max.eland.tag.length) <- "integer"; # callfunction <- "read_eland"; # if(extended) { callfunction <- "read_eland_extended"; }; # if(multi) { callfunction <- "read_eland_multi"; }; # tl <- lapply(.Call(callfunction,path.expand(filename),rtn,max.eland.tag.length),function(d) { # xo <- order(abs(d$t)); # d$t <- d$t[xo]; # d$n <- d$n[xo]; # if(read.tag.names) { # d$s <- d$s[xo]; # } # return(d); # }); ##substitute of the commented code-junk above, to address a warning during compilation if (multi) { tl <- lapply(.Call("read_eland_multi",path.expand(filename),rtn,max.eland.tag.length),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); }else if (extended) { tl <- lapply(.Call("read_eland_extended",path.expand(filename),rtn,max.eland.tag.length),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); }else{ tl <- lapply(.Call("read_eland",path.expand(filename),rtn,max.eland.tag.length),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); }; if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } } read.tagalign.tags <- function(filename,fix.chromosome.names=T,fix.quality=T) { tl <- lapply(.Call("read_tagalign",path.expand(filename)),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(fix.quality) { if (min(d$n)<0.5){ d$n = ceiling(1000/4^d$n); } break.vals <- unique(sort(c(0,unique(d$n)))); d$n <- length(break.vals)-1-cut(d$n,breaks=break.vals,labels=F); } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } read.short.arachne.tags <- function(filename,fix.chromosome.names=F) { tl <- lapply(.Call("read_arachne",path.expand(filename)),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } read.arachne.tags <- function(filename,fix.chromosome.names=F) { tl <- lapply(.Call("read_arachne_long",path.expand(filename)),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; d$l <- d$l[xo]; return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l))); } read.bowtie.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; tl <- lapply(.Call("read_bowtie",path.expand(filename),rtn),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } } read.bam.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F) { #require(Rsamtools) if(!is.element("Rsamtools", installed.packages()[, 1])) { stop("Rsamtools Bioconductor package is now required for BAM file support. Please install") } ww <- c("flag","rname","pos","isize","strand","mapq","qwidth"); if(read.tag.names) { ww <- c(ww,"qname") }; bam <- Rsamtools::scanBam(filename,param=Rsamtools::ScanBamParam(what=ww,flag=Rsamtools::scanBamFlag(isUnmappedQuery=FALSE)))[[1]]; if(is.null(bam$pos) || length(bam$pos)==0) { return(list(tags=c(),quality=c())) } strm <- as.integer(bam$strand=="+") if(any(bitwAnd(bam$flag,0x1))) { # paired-end data # use only positive strand mappings posvi <- which(strm==1); rl <- list(tags=tapply(posvi,bam$rname[posvi],function(ii) as.numeric(bam$pos[ii])), flen=tapply(posvi,bam$rname[posvi],function(ii) as.numeric(abs(bam$isize[ii])))) # alternatively, handle reads with NA isize (unpaired?) just like single-ended reads #pos <- tapply(1:length(bam$pos),bam$rname,function(ii) ifelse(is.na(bam$isize[ii]), bam$pos[ii]*strm[ii] - (1-strm[ii])*(bam$pos[ii]+bam$qwidth[ii]), strm[ii]*bam$pos[ii] - (1-strm[ii])*(bam$pos[ii]+bam$isize[ii]))) } else { rl <- list(tags=tapply(1:length(bam$pos),bam$rname,function(ii) bam$pos[ii]*strm[ii] - (1-strm[ii])*(bam$pos[ii]+bam$qwidth[ii]))) } rl <- c(rl,list(quality=tapply(1:length(bam$pos),bam$rname,function(ii) bam$mapq[ii]))) if(read.tag.names) { rl <- c(rl,list(names=tapply(1:length(bam$pos),bam$rname,function(ii) bam$qname[ii]))) } if(fix.chromosome.names) { # remove ".fa" names(rl) <- gsub("\\.fa","",names(rl)) } return(rl) } read.helicos.tags <- function(filename,read.tag.names=F,fix.chromosome.names=F,include.length.info=T) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; tl <- lapply(.Call("read_helicostabf",path.expand(filename),rtn),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; d$l <- d$l[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l),names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),length=lapply(tl,function(d) d$l))); } } read.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; tl <- lapply(.Call("read_maqmap",path.expand(filename),rtn),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } } read.bin.maqmap.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; tl <- lapply(.Call("read_binmaqmap",path.expand(filename),rtn),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n),names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=lapply(tl,function(d) d$n))); } } # read in tags from an extended eland format with match length information read.meland.tags <- function(filename,read.tag.names=F,fix.chromosome.names=T) { if(read.tag.names) { rtn <- as.integer(1); } else { rtn <- as.integer(0); }; tl <- lapply(.Call("read_meland",path.expand(filename),rtn),function(d) { xo <- order(abs(d$t)); d$t <- d$t[xo]; d$n <- d$n[xo]; d$l <- d$l[xo]; if(read.tag.names) { d$s <- d$s[xo]; } return(d); }); if(fix.chromosome.names) { # remove ".fa" names(tl) <- gsub("\\.fa","",names(tl)) } # separate tags and quality chrl <- names(tl); names(chrl) <- chrl; # reformulate quality scores into monotonic integers ml <- max(unlist(lapply(tl,function(d) max(d$l)))); qual <- lapply(chrl,function(chr) (ml-tl[[chr]]$l)+tl[[chr]]$n/10); if(read.tag.names) { return(list(tags=lapply(tl,function(d) d$t),quality=qual,names=lapply(tl,function(d) d$s))); } else { return(list(tags=lapply(tl,function(d) d$t),quality=qual)); } } # -------- ROUTINES FOR ASSESSING BINDING PATTERN AND SELECTING INFORMATIVE TAGS ------------ # removes tag positions that have anomalously high counts on both strands # z - z-score used to determine anomalous bins # zo - z used to filter out one-strand matches # trim.fraction - fraction of top bins to discard when calculating overall background density # var.base - minimal base variability of tag counts (for processing of flattened datasets with close to 0 variance) remove.tag.anomalies <- function(data, bin=1,trim.fraction=1e-3,z=5,zo=3*z,var.base=0.1) { t.remove.tag.anomalies <- function(tv,bin=1,trim.fraction=1e-3,z=5,zo=3*z,return.indecies=F) { tt <- table(floor(tv/bin)); # trim value stt <- sort(as.numeric(tt)); stt <- stt[1:(length(stt)*(1-trim.fraction))]; mtc <- mean(stt); tcd <- sqrt(var(stt)+var.base); thr <- max(1,ceiling(mtc+z*tcd)); thr.o <- max(1,ceiling(mtc+zo*tcd)); # filter tt tt <- tt[tt>thr] # get + and - tags tp <- as.numeric(names(tt)); pti <- tp>0; it <- intersect(tp[pti],(-1)*tp[!pti]); # add one-strand matches it <- unique(c(it,tp[tt>thr.o])); sit <- c(it,(-1)*it); if(bin>1) { sit <- sit*bin; sit <- c(sit,unlist(lapply(1:bin,function(i) sit+i))) } if(return.indecies) { return(!tv %in% sit); } else { return(tv[!tv %in% sit]); } } vil <- lapply(data$tags,t.remove.tag.anomalies,return.indecies=T,bin=bin,trim.fraction=trim.fraction,z=z,zo=zo); chrl <- names(data$tags); names(chrl) <- chrl; data$tags <- lapply(chrl,function(chr) data$tags[[chr]][vil[[chr]]]); # count tags to remove empty chromosomes nt <- unlist(lapply(data$tags,length)); if(any(nt==0)) { data$tags <- data$tags[nt!=0] } if(!is.null(data$quality)) { data$quality <- lapply(chrl,function(chr) data$quality[[chr]][vil[[chr]]]); data$quality <- data$quality[nt!=0]; } if(!is.null(data$names)) { data$names <- lapply(chrl,function(chr) data$names[[chr]][vil[[chr]]]); data$names <- data$names[nt!=0]; } return(data); } # caps or removes tag positions that are significantly higher than local background remove.local.tag.anomalies <- function(tags,window.size=200,eliminate.fold=10,cap.fold=4,z.threshold=3) { lapply(tags,filter.singular.positions.by.local.density,window.size=2e2,eliminate.fold=10,cap.fold=4,z.threshold=3); } # assess strand cross-correlation, determine peak position, determine appropriate window size # for binding detection. get.binding.characteristics <- function(data,srange=c(50,500),bin=5,cluster=NULL,debug=F,min.tag.count=1e3,acceptance.z.score=3,remove.tag.anomalies=T,anomalies.z=5,accept.all.tags=F) { if(remove.tag.anomalies) { data <- remove.tag.anomalies(data,z=anomalies.z); } # take highest quality tag bin if(!is.null(data$quality) && !accept.all.tags) { min.bin <- min(unlist(lapply(data$quality,min))) chrl <- names(data$tags); names(chrl) <- chrl; otl <- lapply(chrl,function(chr) data$tags[[chr]][data$quality[[chr]]==min.bin]); } else { otl <- data$tags; } # remove empty chromosomes otl <- otl[unlist(lapply(otl,length))!=0]; # calculate strand scc if(!is.null(cluster)) { cc <- clusterApplyLB(cluster,otl,tag.scc,srange=srange,bin=bin); names(cc) <- names(otl); } else { cc <- lapply(otl,tag.scc,srange=srange,bin=bin); } ccl<-list(sample=cc); ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,return.ac=T,ttl=list(sample=otl),plot=F)[[1]] ccl.av <- data.frame(x=as.numeric(names(ccl.av)),y=as.numeric(ccl.av)); # find peak pi <- which.max(ccl.av$y); # determine width at third-height th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/3+ccl.av$y[length(ccl.av$y)] whs <- max(ccl.av$x[ccl.av$y>=th]); # if (! is.integer(whs)) { # Anshul: added this to avoid situations where whs ends up being -Inf if (!is.finite(whs)) { # fixed to avoid a TRUE with numeric values whs <- ccl.av$x[ min(c(2*pi,length(ccl.av$y))) ] } # determine acceptance of different quality bins # calculates tag scc for the best tags, and combinations of best tag category with every other category # for subsequent selection of acceptable categories scc.acceptance.calc <- function() { qr <- range(unlist(lapply(data$quality,range))) # start with best tags # determine half-width for scc calculations pi <- which.max(ccl.av$y); # determine width at half-height th <- (ccl.av$y[pi]-ccl.av$y[length(ccl.av$y)])/2+ccl.av$y[length(ccl.av$y)] lwhs <- max(ccl.av$x[ccl.av$y>=th])-ccl.av$x[pi]; lwhs <- max(c(20,bin*10,lwhs)); srange <- ccl.av$x[pi]+c(-lwhs,lwhs) # calculate chromosome-average scc t.scc <- function(tags) { if(is.null(cluster)) { cc <- lapply(tags,tag.scc,srange=srange,bin=bin); } else { cc <- clusterApplyLB(cluster,tags,tag.scc,srange=srange,bin=bin); names(cc) <- names(tags); } return(t.plotavcc(1,type='l',ccl=list(cc),ttl=list(tags),plot=F,return.ac=T)) } # returns info list for a given tag length (lv), mismatch count (nv) t.cat <- function(qual) { # construct tag set if(qual==qr[1]) { ts <- otl; } else { nts <- names(otl); names(nts) <- nts; # select tags at <- lapply(nts,function(chr) data$tags[[chr]][data$quality[[chr]]==qual]); ntags <- sum(unlist(lapply(at,length))); if(ntags1) { mle.columns <- grep("enr.mle",colnames(prd$npl[[1]])); if(length(mle.columns)>1) { prd$npl <- lapply(prd$npl,function(d) d[apply(d[,mle.columns],1,function(x) all(x>min.mle.threshold)),]) } } } } prd$whs <- whs; return(prd); } # -------- ROUTINES FOR WRITING OUT TAG DENSITY AND ENRICHMENT PROFILES ------------ # calculate smoothed tag density, optionally subtracting the background get.smoothed.tag.density <- function(signal.tags,control.tags=NULL,bandwidth=150,bg.weight=NULL,tag.shift=146/2,step=round(bandwidth/3),background.density.scaling=T,rngl=NULL,scale.by.dataset.size=F) { chrl <- names(signal.tags); if(!is.null(rngl)) { chrl <- names(rngl); } names(chrl) <- chrl; if(!is.null(control.tags) && is.null(bg.weight)) { bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling); } if(scale.by.dataset.size) { den.scaling <- 1/(dataset.density.size(signal.tags,background.density.scaling=background.density.scaling)/1e6); } else { den.scaling <- 1; } lapply(chrl,function(chr) { ad <- abs(signal.tags[[chr]]+tag.shift); rng <- NULL; if(!is.null(rngl)) { rng <- rngl[[chr]]; } if(is.null(rng)) { rng <- range(ad); } ds <- densum(ad,bw=bandwidth,from=rng[1],to=rng[2],return.x=T,step=step); if(!is.null(control.tags)) { if(!is.null(control.tags[[chr]])) { bsd <- densum(abs(control.tags[[chr]]+tag.shift),bw=bandwidth,from=rng[1],to=rng[2],return.x=F,step=step); ds$y <- ds$y-bsd*bg.weight; } } return(data.frame(x=seq(ds$x[1],ds$x[2],by=step),y=den.scaling*ds$y)) }) } # get smoothed maximum likelihood estimate of the log2 signal to control enrichment ratio get.smoothed.enrichment.mle <- function(signal.tags, control.tags, tag.shift=146/2, background.density.scaling=F, pseudocount=1,bg.weight=NULL, rngl=NULL, chrl=NULL, ... ) { # determine common range if(is.null(chrl)) { chrl <- intersect(names(signal.tags),names(control.tags)); names(chrl) <- chrl; } if(is.null(rngl)) { rngl <- lapply(chrl,function(chr) range(c(range(abs(signal.tags[[chr]]+tag.shift)),range(abs(control.tags[[chr]]+tag.shift))))) } else { chrl <- names(rngl); names(chrl) <- chrl; } ssd <- get.smoothed.tag.density(signal.tags, rngl=rngl, ..., scale.by.dataset.size=F) csd <- get.smoothed.tag.density(control.tags, rngl=rngl, ..., scale.by.dataset.size=F) if(is.null(bg.weight)) { bg.weight <- dataset.density.ratio(signal.tags,control.tags,background.density.scaling=background.density.scaling); } cmle <- lapply(chrl,function(chr) { d <- ssd[[chr]]; d$y <- log2(d$y+pseudocount*bg.weight) - log2(csd[[chr]]$y+pseudocount) - log2(bg.weight); return(d); }) } # same as get.smoothed.enrichment.mle, but correcting for the backgroudnd (input) for each of the experiment # returns the ratio of enrichment1 to enrichment2 # params: # signal.tags1,control.tags1 - per-chromosome tag lists for IP and input in experiment 1 # signal.tags2,control.tags2 - per-chromosome tag lists for IP and input in experiment 2 # bg.weight1,bg.weight2 - optional explicit signal/control (IP/input) normalization factors for experiments 1 and 2 get.smoothed.enrichment.mle2 <- function(signal.tags1, control.tags1, signal.tags2,control.tags2, tag.shift=146/2, background.density.scaling=F, pseudocount=1,bg.weight1=NULL,bg.weight2=NULL, rngl=NULL, chrl=NULL, ... ) { # determine common range if(is.null(chrl)) { chrl <- intersect(names(signal.tags1),names(signal.tags2)); names(chrl) <- chrl; } if(is.null(rngl)) { rngl <- lapply(chrl,function(chr) range(c(range(abs(signal.tags1[[chr]]+tag.shift)),range(abs(signal.tags2[[chr]]+tag.shift))))) } else { chrl <- names(rngl); names(chrl) <- chrl; } ssd1 <- get.smoothed.tag.density(signal.tags1, rngl=rngl, ..., scale.by.dataset.size=F) ssd2 <- get.smoothed.tag.density(signal.tags2, rngl=rngl, ..., scale.by.dataset.size=F) csd1 <- get.smoothed.tag.density(control.tags1, rngl=rngl, ..., scale.by.dataset.size=F) csd2 <- get.smoothed.tag.density(control.tags2, rngl=rngl, ..., scale.by.dataset.size=F) if(is.null(bg.weight1)) { bg.weight1 <- dataset.density.ratio(signal.tags1,control.tags1,background.density.scaling=background.density.scaling); } if(is.null(bg.weight2)) { bg.weight2 <- dataset.density.ratio(signal.tags2,control.tags2,background.density.scaling=background.density.scaling); } cmle <- lapply(chrl,function(chr) { d <- ssd1[[chr]]; d$y <- log2(ssd1[[chr]]$y+pseudocount*bg.weight1) - log2(csd1[[chr]]$y+pseudocount) - log2(bg.weight1) - log2(ssd2[[chr]]$y+pseudocount*bg.weight2) + log2(csd2[[chr]]$y+pseudocount) + log2(bg.weight2); return(d); }) } # returns a conservative upper/lower bound profile (log2) given signal tag list, background tag list and window scales get.conservative.fold.enrichment.profile <- function(ftl,btl,fws,bwsl=c(1,5,25,50)*fws,step=50,tag.shift=146/2,alpha=0.05,use.most.informative.scale=F,quick.calculation=T,background.density.scaling=T,bg.weight=NULL,posl=NULL,return.mle=F) { # include only chromosomes with more than 2 reads ftl <- ftl[unlist(lapply(ftl,length))>2] chrl <- names(ftl); names(chrl) <- chrl; if(!is.null(posl)) { chrl <- chrl[chrl %in% names(posl)]; } # calculate background tag ratio if(is.null(bg.weight)) { bg.weight <- dataset.density.ratio(ftl,btl,background.density.scaling=background.density.scaling); } lapply(chrl,function(chr) { if(is.null(btl[[chr]])) { bt <- c(); } else { bt <- abs(btl[[chr]]+tag.shift); } if(is.null(posl)) { x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha); } else { x <- mbs.enrichment.bounds(abs(ftl[[chr]]+tag.shift),bt,fws=fws,bwsl=bwsl,step=step,calculate.upper.bound=T,bg.weight=bg.weight,use.most.informative.scale=use.most.informative.scale,quick.calculation=quick.calculation,alpha=alpha,pos=posl[[chr]]); } # compose profile showing lower bound for enriched, upper bound for depleted regions ps <- rep(1,length(x$mle)); vi <- which(!is.na(x$lb) & x$lb>1); ps[vi] <- x$lb[vi]; vi <- which(!is.na(x$ub) & x$ub<1); ps[vi] <- x$ub[vi]; ps <- log2(ps); if(is.null(posl)) { if(return.mle) { return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub))); } else { return(data.frame(x=seq(x$x$s,x$x$e,by=x$x$step),y=ps)); } } else { if(return.mle) { return(data.frame(x=posl[[chr]],y=ps,mle=log2(x$mle),lb=log2(x$lb),ub=log2(x$ub))); } else { return(data.frame(x=posl[[chr]],y=ps)); } } }) } # same as above, controlling for input, and supporting only a single background scale get.conservative.fold.enrichment.profile2 <- function(ftl1,ftl2,btl1,btl2,fws,bws=1*fws,step=50,tag.shift=146/2,alpha=0.05,background.density.scaling=T,bg.weight1=NULL,bg.weight2=NULL,posl=NULL,return.mle=F) { # include only chromosomes with more than 2 reads ftl1 <- ftl1[unlist(lapply(ftl1,length))>2] chrl <- names(ftl1); names(chrl) <- chrl; if(!is.null(posl)) { chrl <- chrl[chrl %in% names(posl)]; } # calculate background tag ratio if(is.null(bg.weight1)) { bg.weight1 <- dataset.density.ratio(ftl1,btl1,background.density.scaling=background.density.scaling); } if(is.null(bg.weight2)) { bg.weight2 <- dataset.density.ratio(ftl2,btl2,background.density.scaling=background.density.scaling); } lapply(chrl,function(chr) { x <- binomial.proportion.ratio.bounds(abs(ftl1[[chr]]+tag.shift),abs(btl1[[chr]]+tag.shift),abs(ftl2[[chr]]+tag.shift),abs(btl2[[chr]]+tag.shift),fws=fws,bws=bws,step=step,bg.weight1=bg.weight1,bg.weight2=bg.weight2,alpha=alpha,pos=if(is.null(posl)) { NULL; } else { posl[[chr]] }); # compose profile showing lower bound for enriched, upper bound for depleted regions ps <- rep(0,length(x$mle)); vi <- which(!is.na(x$lb) & x$lb>0); ps[vi] <- x$lb[vi]; vi <- which(!is.na(x$ub) & x$ub<0); ps[vi] <- x$ub[vi]; if(is.null(posl)) { if(return.mle) { return(data.frame(x=x$x,y=ps,mle=x$mle,lb=x$lb,ub=x$ub)); } else { return(data.frame(x=x$x,y=ps)); } } else { if(return.mle) { return(data.frame(x=posl[[chr]],y=ps,mle=x$mle,lb=x$lb,ub=x$ub)); } else { return(data.frame(x=posl[[chr]],y=ps)); } } }) } # write a per-chromosome $x/$y data structure into a wig file writewig <- function(dat,fname,feature,threshold=5,zip=F) { chrl <- names(dat); names(chrl) <- chrl; invisible(lapply(chrl,function(chr) { bdiff <- dat[[chr]]; ind <- seq(1,length(bdiff$x)); ind <- ind[!is.na(bdiff$y[ind])]; header <- chr==chrl[1]; write.probe.wig(chr,bdiff$x[ind],bdiff$y[ind],fname,append=!header,feature=feature,header=header); })) if(zip) { zf <- paste(fname,"zip",sep="."); system(paste("zip \"",zf,"\" \"",fname,"\"",sep="")); system(paste("rm \"",fname,"\"",sep="")); return(zf); } else { return(fname); } } # -------- ROUTINES FOR ANALYZING SATURATION PROPERTIES ------------ # PUBLIC # calculate minimal saturation enrichment ratios (MSER) get.mser <- function(signal.data,control.data,n.chains=5,step.size=1e5, chains=NULL, cluster=NULL, test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), n.steps=1, ...) { if(is.null(chains)) { ci <- c(1:n.chains); names(ci) <- ci; if(is.null(cluster)) { chains <- lapply(ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...); } else { chains <- clusterApplyLB(cluster,ci,get.subsample.chain.calls,signal.data=signal.data,control.data=control.data,n.steps=n.steps,step.size=step.size,subsample.control=F, enrichment.background.scales=enrichment.background.scales, ...); names(chains) <- ci; } } cvl <- mser.chain.interpolation(chains=chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=F); if(n.steps>1) { msers <- cvl; } else { msers <- unlist(lapply(cvl,function(d) d$me)) } if(return.chains) { return(list(mser=msers,chains=chains)); } else { return(msers); } } # PUBLIC # interpolate MSER dependency on tag counts get.mser.interpolation <- function(signal.data,control.data,target.fold.enrichment=5,n.chains=10,n.steps=6,step.size=1e5, chains=NULL, test.agreement=0.99, return.chains=F, enrichment.background.scales=c(1), excluded.steps=c(seq(2,n.steps-2)), ...) { msers <- get.mser(signal.data,control.data,n.chains=n.chains,n.steps=n.steps,step.size=step.size,chains=chains,test.agrement=test.agreement,return.chains=T,enrichment.background.scales=enrichment.background.scales,excluded.steps=excluded.steps, ...); # adjust sizes in case a subset of chromosomes was used mser <- mser.chain.interpolation(chains=msers$chains,enrichment.background.scales=enrichment.background.scales,test.agreement=test.agreement,return.lists=T); sr <- sum(unlist(lapply(signal.data,length)))/mser[[1]][[1]]$n[1]; # Subsampling each chain requires removing a fraction of each chromosome's # tag list. To get the exact step.size, this often leaves chromosomes with # a non-integer number of tags. The non-integer values are floored, so each # chr can contribute at most 0.999.. <= 1 error to the step.size. floor.error <- length(msers$chains[[1]][[1]]$npl) intpn <- lapply(mser,function(ms) { lmvo <- do.call(rbind,ms) lmvo$n <- lmvo$n*sr; # Don't select rows corresponding to excluded.steps # Keep in mind that nd values are negative. lmvo <- lmvo[lmvo$nd <= (lmvo$nd[1] + floor.error) & lmvo$nd >= (lmvo$nd[1] - floor.error),]; lmvo <- na.omit(lmvo); if(any(lmvo$me==1)) { return(list(prediction=NA)); } lmvo$n <- log10(lmvo$n); lmvo$me <- log10(lmvo$me-1) # remove non-standard steps emvf <- lm(me ~ n,data=lmvo); tfe <- (log10(target.fold.enrichment-1)-coef(emvf)[[1]])/coef(emvf)[[2]]; tfen <- 10^tfe; return(list(prediction=tfen,log10.fit=emvf)); }) if(return.chains) { return(list(interpolation=intpn,chains=msers$chains)) } else { return(intpn); } return(msers); } # output binding detection results to a text file # the file will contain a table with each row corresponding # to a detected position, with the following columns: # chr - chromosome or target sequence # pos - position of detected binding site on the chromosome/sequence # score - a score reflecting magnitude of the binding # Evalue - E-value corresponding to the peak magnitude # FDR - FDR corresponding to the peak magnitude # enrichment.lb - lower bound of the fold-enrichment ratio # enrichment.mle - maximum likelihood estimate of the fold-enrichment ratio output.binding.results <- function(results,filename) { write(file=filename,"chr\tpos\tscore\tEvalue\tFDR\tenrichment.lb\tenrichment.mle",append=F); chrl <- names(results$npl); names(chrl) <- chrl; x <- lapply(chrl,function(chr) { d <- results$npl[[chr]]; if(dim(d)[1]>0) { if(results$thr$type=="topN") { od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c("x","y","enr","enr.mle"))) } else { od <- cbind(rep(chr,dim(d)[1]),subset(d,select=c("x","y","evalue","fdr","enr","enr.mle"))) } write.table(od,file=filename,col.names=F,row.names=F,sep="\t",append=T,quote=F) } }) } # -------- LOW-LEVEL ROUTINES ------------ # calculates tag strand cross-correlation for a range of shifts (on positive strand) tag.scc <- function(tags,srange=c(50,250),bin=1,tt=NULL,llim=10) { if(is.null(tt)) { tt <- table(sign(tags)*as.integer(floor(abs(tags)/bin+0.5))); } if(!is.null(llim)) { l <- mean(tt); tt <- tt[tt0]<-0; pti <- which(tc>0) nti <- which(tc<0); ptc <- tc[pti]; ntc <- (-1)*tc[nti]; ptv <- tt[pti]; ntv <- tt[nti]; trng <- range(c(range(ptc),range(ntc))) l <- diff(trng)+1; rm(tc,tt); mp <- sum(ptv)*bin/l; mn <- sum(ntv)*bin/l; ptv <- ptv-mp; ntv <- ntv-mn; ss <- sqrt((sum(ptv*ptv)+(l-length(ptv))*mp^2) * (sum(ntv*ntv)+(l-length(ntv))*mn^2)); t.cor <- function(s) { smi <- match(ptc+s,ntc); return((sum(ptv[!is.na(smi)]*ntv[na.omit(smi)]) - mn*sum(ptv[is.na(smi)]) - mp*sum(ntv[-na.omit(smi)]) + mp*mn*(l-length(ptv)-length(ntv)+length(which(!is.na(smi)))))/ss); } shifts <- floor(seq(srange[1],srange[2],by=bin)/bin+0.5); scc <- unlist(lapply(shifts,t.cor)); names(scc) <- shifts*bin; return(scc); } # plot tag cross-correlation t.plotcc <- function(ac, lab=c(10,5,7), ylab="correlation", xlab="lag", pch=19, grid.i=c(-5:5), grid.s=10, type='b', plot.grid=F, cols=c(1,2,4,"orange",8,"pink"), min.peak.x=NULL, xlim=NULL, plot.147=F, plot.max=T, rmw=1, rescale=F, legendx="right", ltys=rep(1,length(ac)), ...) { if(is.list(ac)) { cols <- cols[1:length(ac)]; if(!is.null(xlim)) { vx <- as.numeric(names(ac[[1]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]); ac[[1]] <- (ac[[1]])[vx]; } else { xlim <- range(as.numeric(names(ac[[1]]))); } plot(as.numeric(names(ac[[1]])),runmean(ac[[1]],rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, col=cols[1], xlim=xlim, lty=ltys[1], ...); if(length(ac)>1) { for(i in seq(2,length(ac))) { irng <- range(ac[[i]]); vx <- as.numeric(names(ac[[i]])); vx <- which(vx>=xlim[1] & vx<=xlim[2]); if(rescale) { lines(as.numeric(names(ac[[i]])[vx]),runmean((ac[[i]][vx]-irng[1])/diff(irng)*diff(range(ac[[1]]))+min(ac[[1]]),rmw),col=cols[i],lty=ltys[i]); } else { lines(as.numeric(names(ac[[i]]))[vx],runmean(ac[[i]][vx],rmw),col=cols[i],lty=ltys[i]); } } } if(is.null(min.peak.x)) { m <- as.numeric(names(ac[[1]])[which.max(ac[[1]])]); } else { sac <- (ac[[1]])[which(as.numeric(names(ac[[1]]))>min.peak.x)] m <- as.numeric(names(sac)[which.max(sac)]); } legend(x="topright",bty="n",legend=c(names(ac)),col=cols,lty=ltys) } else { if(!is.null(xlim)) { vx <- as.numeric(names(ac)); vx <- which(vx>=xlim[1] & vx<=xlim[2]); ac <- ac[vx]; } else { xlim <- range(as.numeric(names(ac))); } plot(names(ac),runmean(ac,rmw),type=type,pch=pch,xlab=xlab,ylab=ylab,lab=lab, xlim=xlim, ...); if(is.null(min.peak.x)) { m <- as.numeric(names(ac)[which.max(ac)]); } else { sac <- ac[which(names(ac)>min.peak.x)] m <- as.numeric(names(sac)[which.max(sac)]); } } if(plot.147) { abline(v=147,lty=2,col=8); } if(plot.grid) { abline(v=m+grid.i*grid.s,lty=3,col="pink"); } if(plot.max) { abline(v=m,lty=2,col=2); legend(x=legendx,bty="n",legend=c(paste("max at ",m,"bp",sep=""))); return(m); } } # plot chromosome-acerage cross-correlation t.plotavcc <- function(ci, main=paste(ci,"chromosome average"), ccl, return.ac=F, ttl, plot=T, ... ) { cc <- ccl[[ci]]; if(length(cc)==1) { return(cc[[1]]) }; if(length(cc)==0) { return(c()) }; ac <- do.call(rbind,cc); # omit NA chromosomes ina <- apply(ac,1,function(d) any(is.na(d))); tags <- ttl[[ci]]; avw <- unlist(lapply(tags,length)); avw <- avw/sum(avw); ac <- ac[!ina,]; avw <- avw[!ina]; ac <- apply(ac,2,function(x) sum(x*avw)); if(plot) { m <- t.plotcc(ac, main=main, ...); if(!return.ac) { return(m) } } if(return.ac) { return(ac) } } t.plotchrcc <- function(ci,ncol=4, ccl, ... ) { cc <- ccl[[ci]]; ac <- do.call(rbind,cc); par(mfrow = c(length(cc)/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8) lapply(names(cc),function(ch) { t.plotcc(cc[[ch]],main=paste(ci,": chr",ch,sep=""), ...) }) } t.plotavccl <- function(ci, ccl, main=paste(ci,"chromosome average"), rtl, ... ) { #cc <- lapply(ccl[[ci]],function(x) { if(!is.null(x$M)) { x$M <- NULL;}; return(x); }); cc <- ccl[[ci]]; chrs <- names(cc[[1]]); names(chrs) <- chrs; acl <- lapply(cc,function(x) do.call(rbind,x)); tags <- rtl[[ci]][chrs]; avw <- unlist(lapply(tags,length)); avw <- avw/sum(avw); acl <- lapply(acl,function(ac) apply(ac,2,function(x) sum(x*avw))) t.plotcc(acl, main=main, ...); } t.plotchrccl <- function(ci,ccl,ncol=4, ... ) { cc <- ccl[[ci]]; par(mfrow = c(length(cc[[1]])/ncol,ncol), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8) lapply(names(cc[[1]]),function(ch) { t.plotcc(lapply(cc,function(x) x[[ch]]),main=paste(ci,": chr",ch,sep=""), ...) }) } show.scc <- function(tl,srange,cluster=NULL) { if(!is.null(cluster)) { cc <- clusterApplyLB(cluster,tl,tag.scc,srange=srange); names(cc) <- names(tl); } else { cc <- lapply(tl,tag.scc,srange=srange); } par(mfrow = c(1,1), mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8); ccl<-list(sample=cc); ccl.av <- lapply(names(ccl),t.plotavcc,type='l',ccl=ccl,xlim=srange,return.ac=F,ttl=list(sample=tl),main="")[[1]] } # find regions of significant tag enrichment find.significantly.enriched.regions <- function(signal.data,control.data,window.size=500,multiplier=1,z.thr=3,mcs=0,debug=F,background.density.scaling=T,masking.window.size=window.size,poisson.z=0,poisson.ratio=4,either=F,tag.shift=146/2,bg.weight=NULL) { if(is.null(bg.weight)) { bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling); } if(debug) { cat("bg.weight=",bg.weight,"\n"); } chrl <- names(signal.data); names(chrl) <- chrl; tec <- lapply(chrl,function(chr) { d <- tag.enrichment.clusters(signal.data[[chr]],control.data[[chr]],bg.weight=bg.weight*multiplier,thr=z.thr,wsize=window.size,mcs=mcs,min.tag.count.z=poisson.z,min.tag.count.ratio=poisson.ratio,either=either,tag.shift=tag.shift); d$s <- d$s-masking.window.size/2; d$e <- d$e+masking.window.size/2; return(d); }) } # given tag position vectors, find contigs of significant enrichment of signal over background # thr - z score threshold # mcs - minimal cluster size # bg.weight - fraction by which background counts should be multipled # min.tag.count.z will impose a poisson constraint based on randomized signal in parallel of background constaint (0 - no constraint) tag.enrichment.clusters <- function(signal,background,wsize=200,thr=3,mcs=1,bg.weight=1,min.tag.count.z=0,tag.av.den=NULL,min.tag.count.thr=0,min.tag.count.ratio=4,either=F,tag.shift=146/2) { if(is.null(tag.av.den)) { tag.av.den <- length(signal)/diff(range(abs(signal))); } if(min.tag.count.z>0) { min.tag.count.thr <- qpois(pnorm(min.tag.count.z,lower.tail=F),min.tag.count.ratio*tag.av.den*wsize,lower.tail=F) } else { min.tag.count.thr <- 0; } #if(bg.weight!=1) { # background <- sample(background,length(background)*(bg.weight),replace=T); #} # make up combined position, flag vectors pv <- abs(c(signal,background)+tag.shift); fv <- c(rep(1,length(signal)),rep(0,length(background))); po <- order(pv); pv <- pv[po]; fv <- fv[po]; #thr <- pnorm(thr,lower.tail=F); storage.mode(wsize) <- storage.mode(mcs) <- storage.mode(fv) <- "integer"; storage.mode(thr) <- storage.mode(pv) <- "double"; storage.mode(bg.weight) <- "double"; storage.mode(min.tag.count.thr) <- "double"; either <- as.integer(either); storage.mode(either) <- "integer"; z <- .Call("find_poisson_enrichment_clusters",pv,fv,wsize,thr,mcs,bg.weight,min.tag.count.thr,either) return(z); } # estimates threshold, calculates predictions on complete data and randomized data # input: tvl # control - a list of control tag datasets # no randomization is done if control is supplied # return.rtp - return randomized tag peaks - do not fit thresholds or do actual predictions # topN - use min threshold to do a run, return topN peaks from entire genome # threshold - specify a user-defined threshold lwcc.prediction <- function(tvl,e.value=NULL, fdr=0.01, chrl=names(tvl), min.thr=0, n.randomizations=1, shuffle.window=1, debug=T, predict.on.random=F, shuffle.both.strands=T,strand.shuffle.only=F, return.rtp=F, control=NULL, print.level=0, threshold=NULL, topN=NULL, bg.tl=NULL, tec.filter=T, tec.window.size=1e3,tec.z=3, tec.masking.window.size=tec.window.size, tec.poisson.z=3,tec.poisson.ratio=4, bg.reverse=T, return.control.predictions=F, return.core.data=F, background.density.scaling=T, ... ) { control.predictions <- NULL; core.data <- list(); if(!is.null(bg.tl) && tec.filter) { if(debug) { cat("finding background exclusion regions ... "); } tec <- find.significantly.enriched.regions(bg.tl,tvl,window.size=tec.window.size,z.thr=tec.z,masking.window.size=tec.masking.window.size,poisson.z=tec.poisson.z,poisson.ratio=tec.poisson.ratio,background.density.scaling=background.density.scaling,either=T); if(return.core.data) { core.data <- c(core.data,list(tec=tec)); } if(debug) { cat("done\n"); } } if(is.null(threshold) && is.null(topN)) { # threshold determination is needed # generate control predictions if(!is.null(control)) { if(debug) { cat("determining peaks on provided",length(control),"control datasets:\n"); } if(!is.null(bg.tl)) { if(bg.reverse) { if(debug) { cat("using reversed signal for FDR calculations\n"); } rbg.tl <- tvl; } else { if(debug) { cat("generating randomized (within chromosome) background ... "); } rbg.tl <- lapply(bg.tl,function(d) { if(length(d)<2) { return(d); } rng <- range(abs(d)); rd <- round(runif(length(d),rng[1],rng[2])); nrd <- sample(1:length(rd),length(which(d<0))); rd[nrd] <- rd[nrd]*(-1); return(rd); }) if(debug) { cat("done\n"); } } } else { rbg.tl <- NULL; } n.randomizations <- length(control); #signal.size <- sum(unlist(lapply(tvl,length))); rtp <- lapply(control,function(d) { # calculate tag.weight #tag.weight <- sum(unlist(lapply(tvl,length)))/sum(unlist(lapply(d,length))); tag.weight <- dataset.density.ratio(tvl,d,background.density.scaling=background.density.scaling); #cat("tag.weight=",tag.weight," "); return(window.call.mirror.binding(d,min.thr=min.thr, tag.weight=tag.weight,bg.tl=rbg.tl, debug=debug, round.up=T,background.density.scaling=background.density.scaling, ...)); #return(window.call.mirror.binding(d,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster)) }); if(return.core.data) { core.data <- c(core.data,list(rtp.unfiltered=rtp)); } if(tec.filter) { if(debug) { cat("excluding systematic background anomalies ... "); } rtp <- lapply(rtp,filter.binding.sites,tec,exclude=T); if(debug) { cat("done\n"); } } } else { if(debug) { cat("determining peaks on ",n.randomizations,"randomized datasets:\n"); } rtp <- lapply(1:n.randomizations,function(i) { rd <- generate.randomized.data(tvl,shuffle.window=shuffle.window,shuffle.both.strands=shuffle.both.strands,strand.shuffle.only=strand.shuffle.only); return(window.call.mirror.binding(rd,min.thr=min.thr,bg.tl=bg.tl, debug=debug, ...)); #return(window.call.mirror.binding(rd,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist)) }); } if(return.control.predictions) { control.predictions <- rtp; } rtp <- do.call(rbind,lapply(rtp,function(d) do.call(rbind,d))); # merge tables # generate real data predictions if(debug) { cat("determining peaks on real data:\n"); } npl <- window.call.mirror.binding(tvl,min.thr=min.thr,bg.tl=bg.tl, debug=debug, background.density.scaling=background.density.scaling, ...); #npl <- window.call.mirror.binding(tvl,min.thr=min.thr, method=tag.wtd,wsize=200,bg.tl=control.data,window.size=window.size,debug=T,min.dist=min.dist,cluster=cluster); if(return.core.data) { core.data <- c(core.data,list(npl.unfiltered=npl)); } if(!is.null(bg.tl) && tec.filter) { if(debug) { cat("excluding systematic background anomalies ... "); } npl <- filter.binding.sites(npl,tec,exclude=T); if(debug) { cat("done\n"); } } # calculate E-value and FDRs for all of the peaks if(debug) { cat("calculating statistical thresholds\n"); } chrl <- names(npl); names(chrl) <- chrl; npld <- do.call(rbind,lapply(names(npl),function(chr) { k <- npl[[chr]]; if(!is.null(k) && dim(k)[1]>0) { k$chr <- rep(chr,dim(k)[1]) }; return(k) })) npld <- cbind(npld,get.eval.fdr.vectors(npld$y,rtp$y)); # correct for n.randomizations npld$fdr <- npld$fdr/n.randomizations; npld$evalue <- npld$evalue/n.randomizations; if(return.core.data) { core.data <- c(core.data,list(npld=npld)); } # determine actual thresholds if(is.null(e.value)) { if(is.null(fdr)) { fdr <- 0.01; } thr <- list(root=min(npld$y[npld$fdr<=fdr]),type="FDR",fdr=fdr) if(debug) { cat("FDR",fdr,"threshold=",thr$root,"\n"); } } else { # determine threshold based on e-value thr <- list(root=min(npld$y[npld$evalue<=e.value]),type="Evalue",e.value=e.value) if(debug) { cat("E-value",e.value,"threshold=",thr$root,"\n"); } } npld <- npld[npld$y>=thr$root,]; if(dim(npld)[1]>0) { npl <- tapply(c(1:dim(npld)[1]),as.factor(npld$chr),function(ii) {df <- npld[ii,]; df$chr <- NULL; return(df) }); } else { npl <- list(); } } else { if(is.null(threshold)) { thr <- list(root=min.thr,type="minimal"); } else { thr <- list(root=threshold,type="user specified"); } cat("calling binding positions using",thr$type,"threshold (",thr$root,") :\n"); npl <- window.call.mirror.binding(tvl=tvl,min.thr=thr$root,bg.tl=bg.tl, debug=debug, ...); if(!is.null(bg.tl) && tec.filter) { if(debug) { cat("excluding systematic background anomalies ... "); } npl <- filter.binding.sites(npl,tec,exclude=T); if(debug) { cat("done\n"); } } if(!is.null(topN)) { # determine threshold based on topN peaks ay <- unlist(lapply(npl,function(d) d$y)); if(length(ay)>topN) { thr <- list(root=sort(ay,decreasing=T)[topN],type="topN",topN=topN); cat(paste("determined topN threshold :",thr$root,"\n")); npl <- lapply(npl,function(d) d[d$y>thr$root,]); } } } if(return.core.data) { return(c(list(npl=npl,thr=thr),core.data)); } if(return.control.predictions && !is.null(control.predictions)) { return(list(npl=npl,thr=thr,control.predictions=control.predictions)); } return(list(npl=npl,thr=thr)); } # window tag difference method wtd <- function(x,y,s,e,whs=200,return.peaks=T,min.thr=5,min.dist=200,step=1,direct.count=F,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=1,mask.x=NULL,mask.y=NULL,ignore.masking=F, bg.whs=whs, round.up=F, ...) { ignore.masking <- ignore.masking | (is.null(mask.x) & is.null(mask.y)); if(step>1) { x <- floor(x/step+0.5); y <- floor(y/step+0.5) if(!is.null(bg.x)) { bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5) } if(!is.null(mask.x)) { mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5) } whs <- floor(whs/step+0.5); bg.whs <- floor(bg.whs/step+0.5); min.dist <- floor(min.dist/step +0.5); s <- floor(s/step+0.5) e <- floor(e/step+0.5) } # scale bg.weight, since within calculation they are considered independent bg.weight <- bg.weight*tag.weight; rx <- c(s-whs,e+whs); # compile tag vectors xt <- table(x); xh <- integer(diff(rx)+1); xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt); yt <- table(y); yh <- integer(diff(rx)+1); yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt); # compile background vectors if(!is.null(bg.x) && length(bg.x)>0) { bg.subtract <- 1; bg.xt <- table(bg.x); bg.xh <- integer(diff(rx)+1); bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt); rm(bg.xt); bg.yt <- table(bg.y); bg.yh <- integer(diff(rx)+1); bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt); rm(bg.yt); # adjust bg.weight according to bg.whs if(bg.whs!=whs) { bg.weight <- bg.weight*whs/bg.whs; } } else { bg.subtract <- 0; bg.xh <- bg.yh <- c(); } # record masked positions if(!ignore.masking) { if(!is.null(mask.x) && length(mask.x)>0) { mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt))); mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]]; xh[mvx-rx[1]+1] <- -1; } if(!is.null(mask.y) && length(mask.y)>0) { mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt))); mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]]; yh[mvy-rx[1]+1] <- -1; } } rm(xt,yt); if(round.up) { round.up <- 1; } else { round.up <- 0; } storage.mode(xh) <- storage.mode(yh) <- "integer"; storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer"; nx <- length(xh); storage.mode(nx) <- storage.mode(whs) <- storage.mode(bg.whs) <- "integer"; rp <- as.integer(return.peaks); dcon <- as.integer(direct.count); storage.mode(rp) <- storage.mode(min.dist) <- "integer"; storage.mode(min.thr) <- "double"; storage.mode(dcon) <- "integer"; storage.mode(tag.weight) <- "double"; storage.mode(bg.weight) <- "double"; storage.mode(bg.subtract) <- "integer"; storage.mode(round.up) <- "integer"; im <- as.integer(ignore.masking); storage.mode(im) <- "integer"; z <- .Call("spp_wtd",xh,yh,whs,rp,min.dist,min.thr,dcon,tag.weight,im,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up,PACKAGE="spp"); if(return.peaks) { return(data.frame(x=(z$x+rx[1])*step,y=z$v)); } else { return(list(x=rx*step,y=z)); } } tag.wtd <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL, mask.ctv=NULL, ...) { x <- ctv[ctv>=s & ctv<=e]; y <- (-1)*ctv[ctv<=-s & ctv>=-e]; if(!is.null(bg.ctv)) { bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e]; bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e]; } else { bg.x <- bg.y <- NULL; } if(!is.null(mask.ctv)) { mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e]; mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e]; } else { mask.x <- mask.y <- NULL; } if(length(x)==0 | length(y) ==0) { if(return.peaks) { return(data.frame(x=c(),y=c())); } else { rx <- range(c(x,y)); return(list(x=rx,y=numeric(diff(rx)+1))); } } else { return(wtd(x,y,s,e,return.peaks=return.peaks, bg.x=bg.x,bg.y=bg.y, mask.x=mask.x,mask.y=mask.y, ...)) } } # shuffles tags in chromosome blocks of a specified size # note: all coordinates should be positive tag.block.shuffle <- function(tags,window.size=100) { if(length(tags)<3) { warning("too few tags for shuffling"); return(tags); } rng <- range(tags); #if(rng[1]<0) { stop("negative tag coordinates found") } if(diff(rng)<=window.size) { warning(paste("tag range (",diff(rng),") is smaller than shuffle window size")); return(tags); } if(window.size==0) { return(as.integer(runif(length(tags),min=rng[1],max=rng[2]))) } else if(window.size==1) { tt <- table(tags); return(rep(runif(length(tt),min=rng[1],max=rng[2]),as.integer(tt))) } else { # block positions bp <- tags %/% window.size; # block-relative tag positions rp <- tags %% window.size; # shuffle block positions bpu <- unique(bp); rbp <- range(bpu); bps <- as.integer(runif(length(bpu),min=rbp[1],max=rbp[2])); bpi <- match(bp,bpu); sbp <- bps[bpi]; #sbp <- rbp[1]+match(bp,sample(rbp[1]:rbp[2])) return(sbp*window.size+rp); } } # calculate window cross-correlation lwcc <- function(x,y,s,e,whs=100,isize=20,return.peaks=T,min.thr=1,min.dist=100,step=1,tag.weight=1,bg.x=NULL,bg.y=NULL,bg.weight=NULL,mask.x=NULL,mask.y=NULL,bg.whs=whs,round.up=F) { if(step>1) { x <- floor(x/step+0.5); y <- floor(y/step+0.5) if(!is.null(bg.x)) { bg.x <- floor(bg.x/step+0.5); bg.y <- floor(bg.y/step+0.5) } if(!is.null(mask.x)) { mask.x <- floor(mask.x/step+0.5); mask.y <- floor(mask.y/step+0.5) } whs <- floor(whs/step+0.5); bg.whs <- floor(bg.whs/step+0.5); isize <- floor(isize/step+0.5); min.dist <- floor(min.dist/step +0.5); s <- floor(s/step+0.5) e <- floor(e/step+0.5) } # scale bg.weight, since within calculation they are considered independent bg.weight <- bg.weight*tag.weight; rx <- c(s-whs,e+whs); xt <- table(x); xh <- integer(diff(rx)+1); xh[as.integer(names(xt))-rx[1]+1] <- as.integer(xt); yt <- table(y); yh <- integer(diff(rx)+1); yh[as.integer(names(yt))-rx[1]+1] <- as.integer(yt); # compile background vectors if(!is.null(bg.x) && length(bg.x)>0) { bg.subtract <- 1; bg.xt <- table(bg.x); bg.xh <- integer(diff(rx)+1); bg.xh[as.integer(names(bg.xt))-rx[1]+1] <- as.integer(bg.xt); rm(bg.xt); bg.yt <- table(bg.y); bg.yh <- integer(diff(rx)+1); bg.yh[as.integer(names(bg.yt))-rx[1]+1] <- as.integer(bg.yt); rm(bg.yt); # adjust bg.weight according to bg.whs bg.weight <- bg.weight*(whs-isize)/bg.whs; } else { bg.subtract <- 0; bg.xh <- bg.yh <- c(); } # record masked positions if(!is.null(mask.x) && length(mask.x)>0) { mvx <- unique(mask.x); mvx <- setdiff(mvx,as.numeric(names(xt))); mvx <- mvx[mvx>=rx[1] & mvx<=rx[2]]; xh[mvx-rx[1]+1] <- -1; } if(!is.null(mask.y) && length(mask.y)>0) { mvy <- unique(mask.y); mvy <- setdiff(mvy,as.numeric(names(yt))); mvy <- mvy[mvy>=rx[1] & mvy<=rx[2]]; yh[mvy-rx[1]+1] <- -1; } rm(xt,yt); if(round.up) { round.up <- 1; } else { round.up <- 0; } storage.mode(xh) <- storage.mode(yh) <- "integer"; storage.mode(bg.xh) <- storage.mode(bg.yh) <- "integer"; nx <- length(xh); storage.mode(nx) <- storage.mode(whs) <- storage.mode(isize) <- storage.mode(bg.whs) <- "integer"; rp <- as.integer(return.peaks); storage.mode(rp) <- storage.mode(min.dist) <- "integer"; storage.mode(min.thr) <- "double"; storage.mode(tag.weight) <- "double"; storage.mode(bg.weight) <- "double"; storage.mode(bg.subtract) <- "integer"; storage.mode(round.up) <- "integer"; # allocate return arrays #cc <- numeric(nx); storage.mode(cc) <- "double"; z <- .Call("spp_lwcc",xh,yh,whs,isize,rp,min.dist,min.thr,tag.weight,bg.subtract,bg.xh,bg.yh,bg.whs,bg.weight,round.up,PACKAGE="spp"); if(return.peaks) { return(data.frame(x=(z$x+rx[1])*step,y=z$v)); } else { return(list(x=rx*step,y=z)); } } tag.lwcc <- function(ctv,s,e,return.peaks=T, bg.ctv=NULL, mask.ctv=NULL, ...) { x <- ctv[ctv>=s & ctv<=e]; y <- (-1)*ctv[ctv<=-s & ctv>=-e]; if(!is.null(bg.ctv)) { bg.x <- bg.ctv[bg.ctv>=s & bg.ctv<=e]; bg.y <- (-1)*bg.ctv[bg.ctv<=-s & bg.ctv>=-e]; } else { bg.x <- bg.y <- NULL; } if(!is.null(mask.ctv)) { mask.x <- mask.ctv[mask.ctv>=s & mask.ctv<=e]; mask.y <- (-1)*mask.ctv[mask.ctv<=-s & mask.ctv>=-e]; } else { mask.x <- mask.y <- NULL; } if(length(x)==0 | length(y) ==0) { if(return.peaks) { return(data.frame(x=c(),y=c())); } else { rx <- range(c(x,y)); return(list(x=rx,y=numeric(diff(rx)+1))); } } else { return(lwcc(x,y, s,e,return.peaks=return.peaks, bg.x=bg.x,bg.y=bg.y, mask.x=mask.x,mask.y=mask.y, ...)) } } # determine mirror-based binding positions using sliding window along each chromosome # extra parameters are passed on to call.nucleosomes() window.call.mirror.binding <- function(tvl,window.size=4e7, debug=T, cluster=NULL, bg.tl=NULL, mask.tl=NULL, background.density.scaling=T, ...) { chrl <- names(tvl); # determine bg.weight if(!is.null(bg.tl)) { bg.weight <- dataset.density.ratio(tvl,bg.tl,background.density.scaling=background.density.scaling); } else { bg.weight <- NULL; } if(debug) { cat("bg.weight=",bg.weight," "); } names(chrl) <- chrl; if(is.null(cluster)) { return(lapply(chrl,function(chr) { bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; }; mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; }; window.chr.call.mirror.binding(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv),window.size=window.size,chr=chr,debug=debug, bg.weight=bg.weight, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ...); })); } else { # add bg.ctv and mask.ctv to parallel call tvll <- lapply(chrl,function(chr) { bg.ctv <- NULL; if(!is.null(bg.tl)) { bg.ctv <- bg.tl[[chr]]; }; mask.ctv <- NULL; if(!is.null(mask.tl)) { mask.ctv <- mask.tl[[chr]]; }; return(list(ctv=tvl[[chr]],bg.ctv=bg.ctv,mask.ctv=mask.ctv)) }); bl <- clusterApplyLB(cluster,tvll,window.chr.call.mirror.binding,window.size=window.size,debug=debug, bg.weight=bg.weight, ...); names(bl) <- chrl; return(bl); } } window.chr.call.mirror.binding <- function(ctvl,window.size,debug=T, chr="NA", cluster=NULL, method=tag.wtd, bg.ctv=NULL, mask.ctv=NULL, ...) { ctv <- ctvl$ctv; bg.ctv <- ctvl$bg.ctv; mask.ctv <- ctvl$mask.ctv; if(is.null(ctv)) { return(data.frame(x=c(),y=c())) } if(length(ctv)<2) { return(data.frame(x=c(),y=c())) } dr <- range(unlist(lapply(ctv,function(x) range(abs(x))))) n.windows <- ceiling(diff(dr)/window.size); pinfo <- c(); if(debug) { cat(paste("processing ",chr," in ",n.windows," steps [",sep="")); } for(i in 1:n.windows) { s <- dr[1]+(i-1)*window.size; npn <- method(s=s, e=s+window.size,ctv=ctv, return.peaks=T, bg.ctv=bg.ctv, mask.ctv=mask.ctv, ... ); if(length(npn) > 0) { pinfo <- rbind(pinfo,npn) } if(debug) { cat("."); } } if(debug) { cat(paste("] done (",dim(pinfo)[1],"positions)\n")); } else { cat("."); } return(data.frame(x=pinfo[,1],y=pinfo[,2])); } generate.randomized.data <- function(data,shuffle.window=1,shuffle.both.strands=T,strand.shuffle.only=F,chrl=names(data)) { names(chrl) <- unlist(chrl); if(strand.shuffle.only) { # shuffle just strand assignment, not tag positions rt <- lapply(data[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T)); } else { if(shuffle.both.strands) { rt <- lapply(data[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window))) }); } else { rt <- lapply(data[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))}); } } } # determine threshold based on E value # for efficiency chrl should include just one or two small chromosomes # optional parameters are passed to call.nucleosomes() determine.lwcc.threshold <- function(tvl,chrl=names(tvl),e.value=100, n.randomizations=1, min.thr=1, debug=F, tol=1e-2, shuffle.window=1, shuffle.both.strands=T, return.rtp=F, control=NULL, strand.shuffle=F, ...) { names(chrl) <- unlist(chrl); # determine fraction of total tags contained in the specified nucleosomes ntags <- sum(unlist(lapply(tvl,function(cv) length(cv)))); nctags <- sum(unlist(lapply(chrl, function(cn) length(tvl[[cn]])))); # calculate actual target E value if(!is.null(control)) { n.randomizations <- length(control); } eval <- e.value*n.randomizations*nctags/ntags if(eval<1) { warning("specified e.value and set of chromosomes results in target e.value of less than 1"); eval <- 1; } if(debug) { cat(paste("randomizations =",n.randomizations," chromosomes =",length(chrl),"\n")) cat(paste("adjusted target eval =",eval,"\ngenerating randomized tag peaks ...")); } # get peaks on randomized tags if(is.null(control)) { rtp <- data.frame(do.call(rbind,lapply(1:n.randomizations,function(i) { if(strand.shuffle) { # shuffle just strand assignment, not tag positions rt <- lapply(tvl[unlist(chrl)],function(tv) tv*sample(c(-1,1),length(tv),replace=T)); } else { if(shuffle.both.strands) { rt <- lapply(tvl[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tag.block.shuffle(tv[-pti],window.size=shuffle.window))) }); } else { rt <- lapply(tvl[unlist(chrl)],function(tv) { pti <- which(tv>0); return(c(tag.block.shuffle(tv[pti],window.size=shuffle.window),tv[-pti]))}); } } if(debug) { cat("."); } rl <- window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...); return(do.call(rbind,rl)) #return(do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, whs=100,isize=10,window.size=3e7,min.dist=200))) }))); } else { if(debug) { cat(" using provided controls "); } rtp <- data.frame(do.call(rbind,lapply(control,function(rt) do.call(rbind,window.call.mirror.binding(rt,min.thr=min.thr, debug=F, ...))))) } if(return.rtp) { return(rtp) } if(debug) { cat(" done\nfinding threshold ."); } # determine range and starting value rng <- c(min.thr,max(na.omit(rtp$y))) # find E value threshold count.nucs.f <- function(nthr) { return(eval-length(which(rtp$y>=nthr))); } # estimate position of the root by downward bisection iterations mv <- c(eval); mvp <- c(rng[2]); ni <- 1; max.it <- 2*as.integer(log2(rng[2]/rng[1])+0.5); while((ni<=max.it) & (mv[1]>=0)) { np <- mvp[1]/2; npv <- count.nucs.f(np); mv <- c(npv,mv); mvp <- c(np,mvp); ni <- ni+1; } if(ni>max.it) { # determine lowest value if(debug) { cat(paste("exceeded max.it (",max.it,"), returning lowest point",signif(mvp[1],4))); } return(list(root=mvp[1])) } else { rng <- mvp[1:2]; if(mv[2]==0) rng[2] <- mvp[3]; if(debug) { cat(paste("bound to (",signif(rng[1],4),signif(rng[2],4),") ")); } } # find root on the right side x <- uniroot(count.nucs.f,rng,tol=tol); #x$max <- o$par; #x$f.max <- (-1)*o$value; if(debug) { cat(paste(" done (thr=",signif(x$root,4),")\n")); } return(x); } # determine cooridnates of points x relative to signed # positions pos within size range get.relative.coordinates <- function(x,pos,size,sorted=F) { if(!sorted) { op <- order(abs(pos)); x <- sort(x); pos <- pos[op]; } #dyn.load("~/zhao/sc/peaks.so"); storage.mode(x) <- storage.mode(pos) <- storage.mode(size) <- "integer"; rf <- .Call("get_relative_coordinates",x,pos,size); if(!sorted) { rf$i <- op[rf$i]; } else { return(rf$i); } return(rf); } # given list of magnitude values for signal(x) and control (y), # return a dataframe with $e.val and $fdr get.eval.fdr.vectors <- function(x,y) { nx <- length(x); ny <- length(y); if(nx==0) { return(data.frame(evalue=c(),fdr=c())) } if(ny==0) { return(data.frame(evalue=rep(0,nx),fdr=rep(1,nx))) } ex <- ecdf(x); ey <- ecdf(y); evals <- (1-ey(x))*ny; yvals <- (1-ex(x))*nx; fdr <- (evals+0.5)/(yvals+0.5); # with pseudo-counts fdr[yvals==0] <- min(fdr); # correct for undercounts # find a min x corresponding to a minimal FDR mfdr <- min(fdr); mfdrmx <- min(x[fdr==mfdr]); # correct fdr[x>=mfdrmx] <- mfdr; return(data.frame(evalue=(evals+1),fdr=fdr)); } # filter predictions to remove calls failling into the tag enrichment clusters ( chr list of $s/$e dfs) filter.binding.sites <- function(bd,tec,exclude=F) { chrl <- names(bd); names(chrl) <- chrl; lapply(chrl,function(chr) { cbd <- bd[[chr]]; if(is.null(cbd)) { return(NULL) }; if(length(cbd)==0) { return(NULL) }; if(dim(cbd)[1]>0) { ctec <- tec[[chr]]; if(length(ctec$s)>0) { if(exclude) { pwi <- which(points_within(cbd$x,ctec$s,ctec$e)== -1); } else { pwi <- which(points_within(cbd$x,ctec$s,ctec$e)> -1); } return(cbd[pwi,]); } else { if(exclude) { return(cbd); } else { return(data.frame(x=c(),y=c())); } } } else { return(cbd); } }); } # PUBLIC # generate predictions on sequential (chained) subsamples of data # if step.size <1, it is intepreted as a fraciton and a each subsequent subsample # is of a size (1-fraction.step)*N (N - size of the signal data); # otherwise the step.size is interpreted as a number of tags, and each subsample is of the size N-step.size get.subsample.chain.calls <- function(signal.data,control.data,n.steps=NULL,step.size=1e6,subsample.control=F,debug=F,min.ntags=1e3, excluded.steps=c(), test.chromosomes=NULL, ... ) { if(!is.null(test.chromosomes)) { # adjust step size sz <- sum(unlist(lapply(signal.data,length))) signal.data <- signal.data[test.chromosomes]; control.data <- control.data[test.chromosomes]; if(step.size>1) { step.size <- step.size*sum(unlist(lapply(signal.data,length)))/sz; # cat("adjusted step.size=",step.size,"\n"); } } if(is.null(n.steps)) { if(step.size<1) { # down to 10% n.steps <- log(0.1)/log(step.size); } else { n.steps <- floor(sum(unlist(lapply(signal.data,length)))/step.size) } } if(subsample.control && !is.null(control.data)) { # normalize control to the signal size if(debug) { cat("pre-subsampling control.\n"); } bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length))) control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight,replace=(bg.weight>1))) } calls <- list(); callnames <- c(); for(i in 0:n.steps) { if(debug) { cat("chained subsample step",i,":\n"); } if(!i %in% excluded.steps) { ans <- list(find.binding.positions(signal.data=signal.data,control.data=control.data,debug=debug, skip.control.normalization=T, ...)); names(ans) <- as.character(c(i)); calls <- c(calls,ans); callnames <- c(callnames,i); } # subsample if(step.size<1) { # fraction steps f <- 1-step.size; } else { # bin steps sz <- sum(unlist(lapply(signal.data,length))); f <- (sz-step.size)/sz; if(f<=0) break; } if(debug) { cat("chained subsampling using fraction",f,".\n"); } signal.data <- lapply(signal.data,function(d) sample(d,length(d)*f)); if(subsample.control && !is.null(control.data)) { control.data <- lapply(control.data,function(d) sample(d,length(d)*f)); } sz <- sum(unlist(lapply(signal.data,length))); if(sz1) { actual.enr.field <- paste(actual.enr.field,scale,sep="."); } cvl <- lapply(chains,function(chain) { nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T); nd <- diff(nn); nn <- nn[-length(nn)]; me <- lapply(c(2:length(chain)),function(i) { sla <- t.precalculate.ref.peak.agreement(chain[[i-1]],chain[i],agreement.distance=agreement.distance,enr.field=actual.enr.field) me <- t.find.min.saturated.enr(sla,thr=1-test.agreement) menr <- max(min(na.omit(unlist(lapply(chain[[i-1]]$npl,function(d) d[actual.enr.field])))),min(na.omit(unlist(lapply(chain[[i]]$npl,function(d) d[actual.enr.field])))),1) if(me<=menr) { me <- 1; }; return(me); }) data.frame(n=nn,me=unlist(me),nd=nd); }); if(return.lists) { return(cvl) } cvl <- na.omit(do.call(rbind,cvl)); if(return.median) { tv <- tapply(cvl$me,as.factor(cvl$n),median) } else { tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim); } df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv)); return(df[order(df$n,decreasing=T),]) }) } # returns agreement as a function of dataset size, possibly filtering peaks by min.enr threshold, and by max.fdr chain.to.reference.comparison <- function(chains,min.enr=NULL,debug=F,agreement.distance=50, return.median=F, mean.trim=0.1, enr.field="enr",max.fdr=NULL) { cvl <- lapply(chains,function(chain) { # filter chain by fdr if(!is.null(max.fdr)) { chain <- lapply(chain,function(d) { d$npl <- lapply(d$npl,function(cd) cd[cd$fdr<=max.fdr,]); return(d); }); } nn <- sort(unlist(lapply(chain,function(d) d$n)),decreasing=T); nn <- nn[-length(nn)]; me <- lapply(c(2:length(chain)),function(i) { sla <- t.precalculate.ref.peak.agreement(chain[[1]],chain[i],agreement.distance=agreement.distance,enr.field=enr.field) # calculate overlap x <- lapply(sla,function(mpd) { if(!is.null(min.enr)) { me <- mpd$re >= min.enr; me[is.na(me)] <- F; mpd <- mpd[me,]; ome <- mpd$oe < min.enr; ome[is.na(ome)] <- T; mpd$ov[ome] <- 0; } return(mean(mpd$ov)); }) }) data.frame(n=nn,me=unlist(me)); }); cvl <- na.omit(do.call(rbind,cvl)); if(return.median) { tv <- tapply(cvl$me,as.factor(cvl$n),median) } else { tv <- tapply(cvl$me,as.factor(cvl$n),mean,trim=mean.trim); } df <- data.frame(n=as.numeric(names(tv)),me=as.numeric(tv)); return(df[order(df$n,decreasing=T),]) } # estimates enrichment confidence interval based on 2*tag.count.whs window around each position, and a z-score (alpha/2) # if(multiple.background.scales=T) the enrichment is also estimated using 5- and 10-fold increased background tag window # adds $enr (lower bound), $enr.ub (upper bound) and $enr.mle fields calculate.enrichment.estimates <- function(binding.positions,signal.data=NULL,control.data=NULL,fraction=1,tag.count.whs=100,z=2,effective.genome.size=3e9,scale.down.control=F,background.scales=c(1),bg.weight=NULL) { f <- fraction; qv <- pnorm(z,lower.tail=F); cn <- names(binding.positions$npl); names(cn) <- cn; if(is.null(control.data)) { # estimate from gamma distribution fg.lambda <- f*sum(unlist(lapply(signal.data,length)))*2*tag.count.whs/effective.genome.size; binding.positions$npl <- lapply(binding.positions$npl,function(d) { d$enr <- qgamma(qv,d$nt,scale=1)/fg.lambda; d$enr.ub <- qgamma(1-qv,d$nt,scale=1)/fg.lambda; d$enr.mle <- d$nt/fg.lambda; return(d); }); } else { # estimate using beta distribution if(is.null(bg.weight)) { bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length))) } if(scale.down.control) { # sample down control to be the same size as true signal.data (bg.weight*f) control.data <- lapply(control.data,function(d) sample(d,length(d)*bg.weight*f,replace=(f*bg.weight>1))) #bg.weight <- sum(unlist(lapply(signal.data,length)))/sum(unlist(lapply(control.data,length))) bg.weight <- 1/f; } binding.positions$enrichment.bg.weight <- bg.weight; binding.positions$enrichment.whs <- tag.count.whs; binding.positions$enrichment.z <- z; binding.positions$npl <- lapply(cn,function(chr) { d <- binding.positions$npl[[chr]]; edf <- lapply(background.scales,function(background.width.multiplier) { sig.mult <- bg.weight*f/background.width.multiplier; nbg <- points_within(abs(control.data[[chr]]),d$x-tag.count.whs*background.width.multiplier,d$x+tag.count.whs*background.width.multiplier,return.point.counts=T,return.unique=F); nfg <- d$nt; # Poisson ratio Bayesian LB with non-informative prior (Clopper & Pearson 1934) nf <- ((nfg+0.5)/(nbg+0.5))*qf(1-qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F) nf <- nf/sig.mult; ub <- ((nfg+0.5)/(nbg+0.5))*qf(qv,2*(nfg+0.5),2*(nbg+0.5),lower.tail=F) ub <- ub/sig.mult; mle <- (nfg+0.5)/(nbg+0.5); mle <- mle/sig.mult; if(is.null(nbg)) { nbg <- numeric(0) } if(is.null(nf)) { nf <- numeric(0) } if(is.null(ub)) { ub <- numeric(0) } if(is.null(mle)) { mle <- numeric(0) } return(data.frame(nbg=nbg,lb=nf,ub=ub,mle=mle)) }) adf <- do.call(cbind,lapply(c(1:length(background.scales)),function(i) { df <- edf[[i]]; cn <- c("nbgt","enr","enr.ub","enr.mle"); if(background.scales[i]>1) { cn <- paste(cn,as.character(background.scales[i]),sep="."); } names(df) <- cn; return(df); })) return(cbind(d,adf)); }); } return(binding.positions); } # precalculate peak agreement of a sampling list given a reference t.precalculate.ref.peak.agreement <- function(ref,sf,agreement.distance=50,enr.field="enr") { ref <- ref$npl; cn <- names(ref); names(cn) <- cn; # for each sampling round lapply(sf,function(sd) { # calculate overlap ov <- data.frame(do.call(rbind,lapply(cn,function(chr) { if(dim(ref[[chr]])[1]<1) { return(cbind(ov=c(),re=c(),oe=c())) }; pwi <- points_within(ref[[chr]]$x,sd$npl[[chr]]$x-agreement.distance,sd$npl[[chr]]$x+agreement.distance); pwi[pwi==-1] <- NA; renr <- ref[[chr]][,enr.field] oenr <- sd$npl[[chr]][,enr.field][pwi]; if(length(oenr)==0) { oenr <- rep(NA,length(renr)); } return(cbind(ov=as.integer(!is.na(pwi)),re=renr,oe=oenr)); }))) }) } # find minimal saturated enrichment given a list of replicate agreement matrices (for one fraction) t.find.min.saturated.enr <- function(pal,thr=0.01,plot=F,return.number.of.peaks=F,plot.individual=T,return.median=F,return.vector=F) { nr <- length(pal); # merge replicate data frames mpd <- data.frame(do.call(rbind,pal)); mpd$re[is.na(mpd$re)] <- Inf; mpd$oe[is.na(mpd$oe)] <- Inf; # round up values to avoid miscounting mpd$re <- round(mpd$re,digits=2); mpd$oe <- round(mpd$oe,digits=2); me <- pmin(mpd$re,mpd$oe); ome <- order(me,decreasing=T); df <- data.frame(me=me[ome],ov=mpd$ov[ome]); recdf <- ecdf(-mpd$re); ren <- length(mpd$re); # collapse equal peak heights xk <- tapply(df$ov,as.factor(df$me),sum); xk <- data.frame(ov=as.numeric(xk),me=as.numeric(names(xk))); xk <- xk[order(xk$me,decreasing=T),]; cso <- cumsum(xk$ov)/(recdf(-xk$me)*ren); cso[is.na(cso)] <- 0; cso[!is.finite(cso)] <- 0; mv <- max(which(cso >= 1-thr)) menr <- xk$me[mv]; ir <- lapply(pal,function(d) { d$re[is.na(d$re)] <- Inf; d$oe[is.na(d$oe)] <- Inf; me <- pmin(d$re,d$oe); ome <- order(me,decreasing=T); df <- data.frame(me=me[ome],ov=d$ov[ome]); cso <- cumsum(df$ov)/c(1:length(df$ov)); mv <- max(which(cso >= 1-thr)) menr <- df$me[mv]; return(list(df=df,menr=menr)); }); if(plot) { par(mar = c(3.5,3.5,2.0,0.5), mgp = c(2,0.65,0), cex = 0.8); plot(df$me,cumsum(df$ov)/c(1:length(df$ov)),type='l',ylab="fraction of positions overlapping with reference",xlab="minimal enrichment of binding positions",xlim=c(min(df$me),2*menr)); abline(h=1-thr,lty=2,col=4) if(plot.individual) { lapply(ir,function(d) { df <- d$df; lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=8); abline(v=menr,col="pink",lty=3) }); lines(df$me,cumsum(df$ov)/c(1:length(df$ov)),col=1); } abline(v=menr,col=2,lty=2) legend(x="bottomright",lty=c(1,2,1,3,2),col=c(1,2,8,"pink",4),legend=c("combined samples","combined sample MSER","individual samples","individual MSERs","consistency threshold")); } if(return.number.of.peaks) { mpd <- data.frame(do.call(rbind,pal)); return(length(which(!is.na(mpd$re) & mpd$re >=menr))/nr); } else { if(return.vector) { return(unlist(lapply(ir,function(d) d$menr))); } if(return.median) { return(median(unlist(lapply(ir,function(d) d$menr)))); } else { return(menr); } } } # determine d1/d2 dataset size ratio. If background.density.scaling=F, the ratio of tag counts is returned. # if background.density.scaling=T, regions of significant tag enrichment are masked prior to ratio calculation. dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T,return.proportion=F) { if(!background.density.scaling) { return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length)))) } chrl <- intersect(names(d1),names(d2)); ntc <- do.call(rbind,lapply(chrl,function(chr) { x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F) x2 <- tag.enrichment.clusters(abs(d2[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F) return(c(length(which(points_within(abs(d1[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1)),length(which(points_within(abs(d2[[chr]]),c(x1$s,x2$s)-wsize/2,c(x1$e,x2$e)+wsize/2)==-1)))) })) ntcs <- apply(ntc,2,sum); #print(ntcs/c(sum(unlist(lapply(d1,length))),sum(unlist(lapply(d2,length))))); return(ntcs[1]/ntcs[2]) } # returns effective size of the dataset based on the same logic as dataset.density.ratio dataset.density.size <- function(d1,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) { if(!background.density.scaling) { return(sum(unlist(lapply(d1,length)))) } chrl <- names(d1); ntc <- lapply(chrl,function(chr) { x1 <- tag.enrichment.clusters(abs(d1[[chr]]),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F) return(length(which(points_within(abs(d1[[chr]]),x1$s-wsize/2,x1$e+wsize/2)==-1))) }) return(sum(unlist(ntc))) } old.dataset.density.ratio <- function(d1,d2,min.tag.count.z=4.3,wsize=1e3,mcs=0,background.density.scaling=T) { if(!background.density.scaling) { return(sum(unlist(lapply(d1,length)))/sum(unlist(lapply(d2,length)))) } t.chromosome.counts <- function(tl) { lapply(tl,function(d) { x <- tag.enrichment.clusters(abs(d),c(),wsize=wsize,bg.weight=0,min.tag.count.z=min.tag.count.z,mcs=mcs,either=F) x$s <- x$s-wsize/2; x$e <- x$e+wsize/2; x <- regionset.intersection.c(list(x),do.union=T) return(c(n=length(which(points_within(abs(d),x$s,x$e)==-1)),s=diff(range(abs(d))),m=sum(x$e-x$s))); }) } l1 <- t.chromosome.counts(d1); l2 <- t.chromosome.counts(d2); l2 <- data.frame(do.call(rbind,l2[names(l1)])); l1 <- data.frame(do.call(rbind,l1)); # genome size gs <- sum(pmax(l1$s,l2$s)) den1 <- sum(l1$n)/(gs-sum(l1$m)) den2 <- sum(l2$n)/(gs-sum(l2$m)) return(den1/den2); } # calculate cumulative density based on sum of scaled gaussian curves # (by Michael Tolstorukov) # # vin - input vector; bw -- standard deviation, dw-gaussina cutoff in stdev; dout - output "density") # output - if return.x=F vector of cumulative density values corresponding to integer positions described by range(vin) # output - if return.x=T a data structure with $x and $y corresponding to the cumulative density # optional match.wt.f is a function that will return weights for a tag vector densum <- function(vin,bw=5,dw=3,match.wt.f=NULL,return.x=T,from=min(vin),to=max(vin),step=1,new.code=T) { # construct vector of unique tags and their counts tc <- table(vin[vin>=from & vin<=to]); pos <- as.numeric(names(tc)); storage.mode(pos) <- "double"; tc <- as.numeric(tc); storage.mode(tc) <- "double"; n <- length(pos) # weight counts if(!is.null(match.wt.f)) { tc <- tc*match.wt.f(pos); } rng <- c(from,to); if(rng[1]<0) { stop("range extends into negative values") } if(range(pos)[1]<0) { stop("position vector contains negative values") } storage.mode(n) <- storage.mode(rng) <- storage.mode(bw) <- storage.mode(dw) <- storage.mode(step) <- "integer"; spos <- rng[1]; storage.mode(spos) <- "double"; dlength <- floor((rng[2] - rng[1])/step) + 1; # length of output array if(dlength<1) { stop("zero data range") } if(new.code) { storage.mode(step) <- storage.mode(dlength) <- storage.mode(bw) <- storage.mode(dw) <-"integer"; dout <- .Call("ccdensum",pos,tc,spos,bw,dw,dlength,step); } else { stop("Please set new.code=T to use the new ccdensum function. The old cdensum is deprecated") # dout <- numeric(dlength); storage.mode(dout) <- "double"; # storage.mode(dlength) <- "integer"; # #.C("cdensum",n,pos,tc,spos,bw,dw,dlength,step,dout,DUP=F); # .C("cdensum",n,pos,tc,spos,bw,dw,dlength,step,dout); } if(return.x) { return(list(x=c(rng[1],rng[1]+step*(dlength-1)),y=dout,step=step)) } else { return(dout) } } # count tags within sliding window of a specified size # vin - tag vector (postive values, pre-shifted) # window.size/window.step - window characteristics # tv - optional, pre-sorted, pre-trimmed tag vector window.tag.count <- function(vin,window.size,window.step=1,return.x=T,from=min(vin)+floor(window.size/2),to=max(vin)-floor(window.size/2),tv=NULL) { whs <- floor(window.size/2); # select tags with margins if(is.null(tv)) { tv <- sort(vin[vin>=from-whs-1 & vin<=to+whs+1]) } storage.mode(tv) <- "double"; n <- length(tv) nsteps <- ceiling((to-from)/window.step); storage.mode(n) <- storage.mode(nsteps) <- storage.mode(window.size) <- storage.mode(window.step) <- "integer"; spos <- from; storage.mode(spos) <- "double"; if(nsteps<1) { stop("zero data range") } #dout <- integer(nsteps); storage.mode(dout) <- "integer"; #.C("window_n_tags",n,tv,spos,window.size,window.step,nsteps,dout,DUP=F); dout <- .Call("cwindow_n_tags",tv,spos,window.size,window.step,nsteps); if(return.x) { return(list(x=c(from,from+(nsteps-1)*window.step),y=dout,step=window.step)) } else { return(dout) } } # count tags in windows around specified positions (pos) window.tag.count.around <- function(vin,window.size,pos,return.x=T,tc=NULL,sorted=F) { if(is.null(tc)) { tc <- table(vin); } if(!sorted) { op <- rank(pos); pos <- sort(pos); } storage.mode(pos) <- "double"; tpos <- as.integer(names(tc)); storage.mode(tpos) <- "double"; tc <- as.integer(tc); storage.mode(tc) <- "integer"; whs <- floor(window.size/2); storage.mode(whs) <- "integer"; twc <- .Call("cwindow_n_tags_around",tpos,tc,pos,whs); if(return.x) { if(sorted) { return(data.frame(x=pos,y=twc)); } else { return(data.frame(x=pos[op],y=twc[op])); } } else { if(sorted) { return(twc); } else { return(twc[op]); } } } # given a tag vector (signed), identify and clean up (either remove or cap) singular positions that exceed local tag density # vin - tag vector # cap.fold - maximal fold over enrichment over local density allowed for a single tag position, at which the tag count is capped # eliminate.fold - max fold enrichment that, when exceeded, results in exclusion of all the tags at that position (e.g. counted as anomaly) # z.threshold - Z-score used to determine max allowed counts filter.singular.positions.by.local.density <- function(tags,window.size=200,cap.fold=4,eliminate.fold=10,z.threshold=3) { # tabulate tag positions if(length(tags)<2) { return(tags); }; tc <- table(tags); pos <- as.numeric(names(tc)); storage.mode(pos) <- "double"; tc <- as.integer(tc); storage.mode(tc) <- "integer"; n <- length(pos); whs <- floor(window.size/2); storage.mode(n) <- storage.mode(whs) <- "integer"; twc <- .Call("cwindow_n_tags_around",pos,tc,pos,whs); twc <- (twc-tc+1)/window.size; # local density pv <- pnorm(z.threshold,lower.tail=F) # exclude max.counts <- qpois(pv,twc*eliminate.fold,lower.tail=F) tc[tc>max.counts] <- 0; # cap max.counts <- qpois(pv,twc*cap.fold,lower.tail=F) ivi <- which(tc>max.counts); tc[ivi] <- max.counts[ivi]+1; # reconstruct tag vector tv <- rep(pos,tc); to <- order(abs(tv)); tv <- tv[to]; return(tv); } # calculates enrichment bounds using multiple background scales # ft - foreground tags (pre-shifted, positive) # bt - background tags # fws - foreground window size # bwsl - background window size list # step - window step # rng - from/to coordinates (to will be adjusted according to step) # # returns: a list with $x ($s $e $step), $lb vector and $mle vector ($ub if calculate.upper.bound=T) mbs.enrichment.bounds <- function(ft,bt,fws,bwsl,step=1,rng=NULL,alpha=0.05,calculate.upper.bound=F,bg.weight=length(ft)/length(bt),use.most.informative.scale=F,quick.calculation=F,pos=NULL) { # determine range if(is.null(rng)) { rng <- range(range(ft)); } # foreground counts if(is.null(pos)) { fwc <- window.tag.count(ft,fws,window.step=step,from=rng[1],to=rng[2],return.x=T); } else { fwc <- window.tag.count.around(ft,fws,pos,return.x=T) } fwc$y <- fwc$y+0.5; zal <- qnorm(alpha/2,lower.tail=F); # background counts bt <- sort(bt); if(!is.null(pos)) { tc <- table(bt); } bgcm <- lapply(bwsl,function(bgws) { if(is.null(pos)) { window.tag.count(bt,bgws,window.step=step,from=rng[1],to=rng[2],return.x=F,tv=bt)+0.5; } else { window.tag.count.around(bt,bgws,pos,return.x=F,tc=tc)+0.5 } }) if(!is.null(pos)) { rm(tc); } # pick most informative scale if(use.most.informative.scale) { bgcm <- t(do.call(cbind,bgcm)) isi <- max.col(t((bgcm)/(bwsl/fws))) # add pseudo-counts to select lowest scale in case of a tie bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)] if(quick.calculation) { rte <- fwc$y+bgc-0.25*zal*zal; rte[rte<0] <- 0; dn <- bgc - 0.25*zal*zal; lbm=(sqrt(fwc$y*bgc) - 0.5*zal*sqrt(rte))/dn; ivi <- which(lbm<0); lbm <- lbm*lbm*bwsl[isi]/fws/bg.weight; lbm[rte<=0] <- 1; lbm[dn<=0] <- 1; lbm[ivi] <- 1; } else { lbm <- (fwc$y/bgc)*qf(1-alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight; } mle <- fwc$y/bgc*bwsl[isi]/fws/bg.weight; mle[is.nan(mle)] <- Inf; mle[is.na(mle)] <- Inf; rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle); if(calculate.upper.bound) { isi <- max.col(t((-bgcm)/(bwsl/fws))) # add pseudo-counts to select highest scale in case of a tie bgc <- c(bgcm)[isi+dim(bgcm)[1]*(c(1:length(isi))-1)] if(quick.calculation) { ubm=(sqrt(fwc$y*bgc) + 0.5*zal*sqrt(rte))/dn; ivi <- which(ubm<0); ubm <- ubm*ubm*bwsl[isi]/fws/bg.weight; ubm[rte<=0] <- 1; ubm[ivi] <- 1; lbm[dn<=0] <- 1; } else { ubm <- (fwc$y/bgc)*qf(alpha/2,2*fwc$y,2*bgc,lower.tail=F)*bwsl[isi]/fws/bg.weight; } rl <- c(rl,list(ub=ubm)); } return(rl); } else { # determine lower bounds lbm <- lapply(c(1:length(bgcm)),function(i) { nbg <- bgcm[[i]]; if(quick.calculation) { rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0; dn <- (nbg - 0.25*zal*zal); lbm=(sqrt(fwc$y*nbg) - 0.5*zal*sqrt(rte))/dn; ivi <- which(lbm<0); lbm <- lbm*lbm*bwsl[i]/fws/bg.weight; lbm[rte<=0] <- 1; lbm[dn<=0] <- 1; lbm[ivi] <- 1; return(lbm); } else { return((fwc$y/nbg)*qf(1-alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight); } }) lbm <- do.call(pmin,lbm); # calculate mle #mle <- do.call(pmin,lapply(bgcm,function(bgc) fwc/bgc)) mle <- do.call(pmin,lapply(c(1:length(bgcm)),function(i) { bgc <- bgcm[[i]]; x <- fwc$y/bgc*bwsl[i]/fws/bg.weight; x[is.nan(x)] <- Inf; x[is.na(x)] <- Inf; return(x); })) rl <- list(x=list(s=fwc$x[1],e=fwc$x[2],step=fwc$step),lb=lbm,mle=mle); if(calculate.upper.bound) { # determine upper bound ubm <- lapply(c(1:length(bgcm)),function(i) { nbg <- bgcm[[i]]; if(quick.calculation) { rte <- fwc$y+nbg-0.25*zal*zal; rte[rte<0] <- 0; dn <- (nbg - 0.25*zal*zal); ubm=(sqrt(fwc$y*nbg) + 0.5*zal*sqrt(rte))/dn; ivi <- which(ubm<0); ubm <- ubm*ubm*bwsl[i]/fws/bg.weight; ubm[rte<=0] <- 1; ubm[dn<=0] <- 1; ubm[ivi] <- 1; return(ubm); } else { return((fwc$y/nbg)*qf(alpha/2,2*fwc$y,2*nbg,lower.tail=F)*bwsl[i]/fws/bg.weight); } }) ubm <- do.call(pmax,ubm); rl <- c(rl,list(ub=ubm)); } return(rl); } } # calculates binomail proportion ratio bounds # returns: a list with $x, $lb vector and $mle, $ub vector binomial.proportion.ratio.bounds <- function(ft1,bt1,ft2,bt2,fws,bws,step=1,rng=NULL,alpha=0.05,bg.weight1=length(ft1)/length(bt1),bg.weight2=length(ft2)/length(bt2),pos=NULL,a=1.25,b=2.50) { # determine range if(is.null(rng)) { rng <- range(range(ft1)); } # counts if(is.null(pos)) { fwc1 <- window.tag.count(ft1,fws,window.step=step,from=rng[1],to=rng[2],return.x=T); fwc2 <- window.tag.count(ft2,fws,window.step=step,from=rng[1],to=rng[2],return.x=T); bwc1 <- window.tag.count(bt1,fws,window.step=step,from=rng[1],to=rng[2],return.x=T); bwc2 <- window.tag.count(bt2,fws,window.step=step,from=rng[1],to=rng[2],return.x=T); pos <- seq(fwc1$x[1],fwc1$x[2],by=fwc1$step) } else { fwc1 <- window.tag.count.around(ft1,bws,pos,return.x=T) fwc2 <- window.tag.count.around(ft2,bws,pos,return.x=T) bwc1 <- window.tag.count.around(bt1,bws,pos,return.x=T) bwc2 <- window.tag.count.around(bt2,bws,pos,return.x=T) } bg.weight1 <- bg.weight1*fws/bws; bg.weight2 <- bg.weight2*fws/bws; ls1 <- log2(1/(1+1/bg.weight1)); ls2 <- log2(1/(1+1/bg.weight2)); ltheta <- log2((fwc1$y+a-1)/(fwc1$y+bwc1$y+a+b-2)) - log2((fwc2$y+a-1)/(fwc2$y+bwc2$y+a+b-2)) - ls1 + ls2; vltheta <- 1/((fwc1$y+a-1) + (fwc1$y+a-1)^2/(bwc1$y+b-1)) + 1/((fwc2$y+a-1) + (fwc2$y+a-1)^2/(bwc2$y+b-1)) zal <- qnorm(alpha/2,lower.tail=F); rl <- list(x=pos,mle=ltheta,lb=ltheta-zal*sqrt(vltheta),ub=ltheta+zal*sqrt(vltheta)); } write.probe.wig <- function(chr,pos,val,fname,append=F,feature="M",probe.length=35,header=T) { min.dist <- min(diff(pos)); if(probe.length>=min.dist) { probe.length <- min.dist-1; cat("warning: adjusted down wig segment length to",probe.length,"\n"); } mdat <- data.frame(chr,as.integer(pos),as.integer(pos+probe.length),val) if(header) { write(paste("track type=wiggle_0 name=\"Bed Format\" description=\"",feature,"\" visibility=dense color=200,100,0 altColor=0,100,200 priority=20",sep=""),file=fname,append=append) write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=T); } else { write.table(mdat,file=fname,col.names=F,row.names=F,quote=F,sep=" ",append=append); } } # returns intersection of multiple region sets # each regionset needs to contain $s, $e and optional $v column regionset.intersection.c <- function(rsl,max.val=-1,do.union=F) { # translate into position/flag form rfl <- lapply(rsl,function(rs) { rp <- c(rs$s,rs$e); rf <- c(rep(c(1,-1),each=length(rs$s))); ro <- order(rp); rp <- rp[ro]; rf <- rf[ro]; if(!is.null(rs$v)) { rv <- c(rs$v,rs$v)[ro]; return(data.frame(p=as.numeric(rp),f=as.integer(rf),v=as.numeric(rv))); } else { return(data.frame(p=as.numeric(rp),f=as.integer(rf))); } }) rfd <- data.frame(do.call(rbind,lapply(1:length(rfl),function(i) { d <- rfl[[i]]; d$f <- d$f*i; return(d); }))) rfd <- rfd[order(rfd$p),]; if(is.null(rfd$v)) { max.val <- 0; } if(do.union) { ur <- 1; } else { ur <- 0; }; rl <- .Call("region_intersection",as.integer(length(rfl)),as.numeric(rfd$p),as.integer(rfd$f),as.numeric(rfd$v),as.integer(max.val),as.integer(ur)); return(data.frame(do.call(cbind,rl))); } # idenfity if binding peak falls within a larger region of significant tag enrichment, and if so record its booundaries add.broad.peak.regions <- function(chip.tags,input.tags,bp,window.size=500,z.thr=2) { se <- find.significantly.enriched.regions(chip.tags,input.tags,window.size=window.size,z.thr=z.thr,poisson.z=0,poisson.ratio=0,either=F) chrl <- names(bp$npl); names(chrl) <- chrl; bnpl <- lapply(chrl,function(chr) { npl <- bp$npl[[chr]]; if(is.null(npl) | dim(npl)[1]<1) { return(npl); } pi <- points_within(npl$x,se[[chr]]$s,se[[chr]]$e,return.list=T); pm <- do.call(rbind,lapply(pi,function(rl) { if(length(rl)>0) { return(range(c(se[[chr]]$s[rl],se[[chr]]$e[rl]))) } else { return(c(NA,NA)); } })) npl$rs <- pm[,1]; npl$re <- pm[,2]; return(npl); }) bp$npl <- bnpl; return(bp); } # writing out binding results in a narrowpeak format, incorporating broad region boundaries if they are present # if broad region info is not present, margin is used to determine region width. The default margin is equal # to the window half size used to call the binding peaks write.narrowpeak.binding <- function(bd,fname,margin=bd$whs,npeaks=NA) { if(is.null(margin)) { margin <- 50; } chrl <- names(bd$npl); names(chrl) <- chrl; md <- do.call(rbind,lapply(chrl,function(chr) { df <- bd$npl[[chr]]; x <- df$x; rs <- df$rs; if(is.null(rs)) { rs <- rep(NA,length(x)) } re <- df$re; if(is.null(re)) { re <- rep(NA,length(x)) } ivi <- which(is.na(rs)); if(any(ivi)) {rs[ivi] <- x[ivi]-margin;} ivi <- which(is.na(re)); if(any(ivi)) {re[ivi] <- x[ivi]+margin;} cbind(chr,rs,re,".","0",".",df$y,-1,-log10(df$fdr),x-rs) # Anshul: converted fdr to -log10 })) md <- md[order(as.numeric(md[,7]),decreasing=T),] if (!is.na(npeaks)) { # Anshul: added this option to print a limited number of peaks npeaks <- min(nrow(md),npeaks) md <- md[1:npeaks,] } write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F); } get.broad.enrichment.clusters <- function(signal.data,control.data,window.size=1e3,z.thr=3, tag.shift=146/2,background.density.scaling=F, ... ) { # find significantly enriched clusters bg.weight <- dataset.density.ratio(signal.data,control.data,background.density.scaling=background.density.scaling); se <- find.significantly.enriched.regions(signal.data,control.data,window.size=window.size,z.thr=z.thr,tag.shift=tag.shift, bg.weight=bg.weight, ...) chrl <- names(se); names(chrl) <- chrl; se <- lapply(chrl,function(chr) { d <- se[[chr]]; if(length(d$s>1)) { d <- regionset.intersection.c(list(d,d),do.union=T); sc <- points_within(abs(signal.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T); cc <- points_within(abs(control.data[[chr]]+tag.shift),d$s,d$e,return.point.counts=T); d$rv <- log2((sc+1)/(cc+1)/bg.weight); return(d); } else { return(d) } }) } write.broadpeak.info <- function(bp,fname) { chrl <- names(bp); names(chrl) <- chrl; chrl <- chrl[unlist(lapply(bp,function(d) length(d$s)))>0] md <- do.call(rbind,lapply(chrl,function(chr) { df <- bp[[chr]]; cbind(chr,df$s,df$e,".","0",".",df$rv,-1,-1) })) md <- md[order(as.numeric(md[,7]),decreasing=T),] write.table(md,file=fname,col.names=F,row.names=F,quote=F,sep="\t",append=F); } get.clusters2 <- function(x,CL) { temp <- which(diff(x) != 0) begin <- c(1, temp + 1) end <- c(temp, length(x)) size <- end - begin + 1 begin <- begin[size >= CL] end <- end[size >= CL] size <- size[size >= CL] size <- size[x[end] != 0] begin <- begin[x[end] != 0] end <- end[x[end] != 0] return (list(size=size,begin=begin,end=end)) } ##Deprecated function of points.within ##points.within <- function(x,fs,fe,return.list=F,return.unique=F,sorted=F,return.point.counts=F, ...) { ## .Deprecated("points_within",package="spp") #include a package argument, too ## points_within(x=x,fs=fs,fe=fe,return.list=return.list,return.unique=return.unique,sorted=sorted,return.point.counts=return.point.counts, ...) ##} ##new points_within for deprecated points.within # determine membership of points in fragments points_within <- function(x,fs,fe,return.list=F,return.unique=F,sorted=F,return.point.counts=F, ...) { if(is.null(x) | length(x) < 1) { return(c()) }; if(!sorted) { #ox <- rank(x,ties="first"); ox <- rank(x,ties.method="first"); x <- sort(x); } se <- c(fs,fe); fi <- seq(1:length(fs)); fi <- c(fi,-1*fi); fi <- fi[order(se)]; se <- sort(se); storage.mode(x) <- storage.mode(fi) <- storage.mode(se) <- "integer"; if(return.unique) { iu <- 1; } else { iu <- 0; } if(return.list) { il <- 1; } else { il <- 0; } if(return.point.counts) { rpc <- 1; } else { rpc <- 0; } storage.mode(iu) <- storage.mode(il) <- storage.mode(rpc) <- "integer"; result <- .Call("points_withinC",x,se,fi,il,iu,rpc); if(!sorted & !return.point.counts) { result <- result[ox]; } return(result); } spp/README.md0000644000176200001440000000166513473705063012353 0ustar liggesusers# ChIP-seq processing pipeline An [R](https://www.r-project.org/) package for analysis of ChIP-seq and other functional sequencing data. ## Requirements A unix-flavored OS with R (>= 3.3.0) installed. ## Installation Since version 1.15.4 spp is available on [CRAN](https://CRAN.R-project.org/package=spp) and can be installed using the command ``` install.pacakges("spp", dependencies=TRUE) ``` Alternatively you can use modtools to install spp: ``` require(devtools) devtools::install_github('hms-dbmi/spp', build_vignettes = FALSE) ``` Or download a .tar.gz containing the [latest release](https://github.com/hms-dbmi/spp/releases) and use the standard R installation command, e.g.: ``` R CMD INSTALL spp_1.13.tar.gz ``` Note: Since version 1.15.4 the Boost headers are incorporated and linked taking advantage of [BH package](https://CRAN.R-project.org/package=BH) to avoid problems due to non-standard Boost libraries installation. spp/MD50000644000176200001440000000524513473754204011403 0ustar liggesusers04e05c302dc601de573a0367a2082f81 *DESCRIPTION da12c5010f0f0daced416512bb6d4aa6 *NAMESPACE 116a5c63a386dd2cc4a36a749d666d64 *R/zroutines.R 587776fe7648e3513f23989408cc3579 *README.md 23d5bc5f564ed8481228ac5c60810d6c *configure 0c7eac5426be0a46be8ea26c167c7467 *configure.ac be2c5e8e0c3ecfb2e704ae7782f53fc8 *man/add.broad.peak.regions.Rd 4e831652f8327f701ceeb9e5877acded *man/densum.Rd 180f2ad88f4afb2dbe5b9423158bf609 *man/find.binding.positions.Rd c0310d39b0eae4edc6af6dc1581c6b12 *man/get.binding.characteristics.Rd ec053c69d5fa2610b5469aee503f11ff *man/get.broad.enrichment.clusters.Rd 120769400978a543a5330adb4c1aad8f *man/get.conservative.fold.enrichment.profile.Rd ca2da068879340379a05b404d73e9132 *man/get.conservative.fold.enrichment.profile2.Rd 3cdf7854ca636eb095af451f212829d1 *man/get.mser.Rd c85e1e627f6fd2668ed6eae6b8679a1b *man/get.mser.interpolation.Rd fe7fa4c525f2fc0422f0fa3e7fa1a824 *man/get.smoothed.enrichment.mle.Rd 0faded7b3cd93d37e85ae76322d753e9 *man/get.smoothed.enrichment.mle2.Rd f36594e834f536f09b767856c2d5f347 *man/get.smoothed.tag.density.Rd d0ef6fa196b7482412869ec7cfdc7980 *man/output.binding.results.Rd 166ab0cb2c125a13b4a75ec4de30a7d8 *man/points_within.Rd b21d19476c2291b3a0b320841f25456f *man/read.arachne.tags.Rd a714eaeb0cc803ad9fc6caff0c92f2cf *man/read.bam.tags.Rd 19ea9eb29a5ec1d3c307ef53f0c28276 *man/read.bin.maqmap.tags.Rd 21878f5fb0fd4a22b741728c60b1b2ea *man/read.bowtie.tags.Rd be89aa4c7f4bc07a7109ba9d5ca0ebbb *man/read.eland.tags.Rd c39962f0f6f4c619e1403c30545ec71f *man/read.helicos.tags.Rd 57910f9620caa82960ba92fa17597e6a *man/read.maqmap.tags.Rd 2e3d76a6a7b28adff8ce716d4801d41c *man/read.meland.tags.Rd d8c0a935295b883c7cd73ecc101631bf *man/read.short.arachne.tags.Rd 0babfb2b1e9833f1341accbffea1988b *man/read.tagalign.tags.Rd b640b6584e4d5f47c83173e1b23640c6 *man/remove.local.tag.anomalies.Rd 46a10b9b5e8019a41d72bfac5eac87a8 *man/select.informative.tags.Rd db41435b342984f3bfe76b8afb858a91 *man/spp-package.Rd d2b6c4a83c612aa2e73cfcd7c270e210 *man/write.broadpeak.info.Rd 718b280ad26d9575ba799a33df19828a *man/write.narrowpeak.binding.Rd 359ddcdb6181b853df7eb754bcec13b5 *man/writewig.Rd 3d9bac31e0bb6590ce8544003d52cf0a *src/Makevars.in 7563d9a17bfae0f4bb648d859da1e50f *src/api_global.h de1e654f079c85b8e00c3fb953c07420 *src/bed2vector.cpp 0d2294ac5a53bca2b3a538a3e9fbe6be *src/cdensum.c edff4c928e491daba8a54d5054a04090 *src/const.h 6ccf2ac5b9712bf69d59422ffb51ef06 *src/maqmap.c c9a8a87c72a0c995a34b633c78a67de5 *src/maqmap.h dd8bc59bde265c8cdf9e907c33a6fff4 *src/maqread.cpp 08bf369a220a2bf9e67ca4867c670638 *src/pc.h 88a5c7aeab4c28e3ab88281a5344ced4 *src/peaks.cpp 2ee25121f93953359214c30b75c3fbc9 *src/spp_init.c 1aa7d7f195d15ed9d58329d5843eda4d *src/wdl.cpp spp/DESCRIPTION0000644000176200001440000000117213473754204012574 0ustar liggesusersPackage: spp Type: Package Title: ChIP-Seq Processing Pipeline Version: 1.16.0 Author: Peter K Depends: R (>= 3.3.0), Rcpp Imports: Rsamtools, caTools, parallel, graphics, stats Suggests: methods LinkingTo: Rcpp, BH (>= 1.66) OS_type: unix Maintainer: Peter Kharchenko Description: Analysis of ChIP-seq and other functional sequencing data [Kharchenko PV (2008) ]. License: GPL-2 LazyLoad: yes Note: revised for compliance with CRAN by K.Pal and C.M.Livi NeedsCompilation: yes Packaged: 2019-05-30 07:46:25 UTC; clivi Repository: CRAN Date/Publication: 2019-05-30 13:20:04 UTC spp/configure0000755000176200001440000033157413473705121013003 0ustar liggesusers#! /bin/sh # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.69 for SPP 1.7. # # Report bugs to . # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. # # # This configure script is free software; the Free Software Foundation # gives unlimited permission to copy, distribute and modify it. ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then _as_can_reexec=no; export _as_can_reexec; # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 as_fn_exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST else case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi " as_required="as_fn_return () { (exit \$1); } as_fn_success () { as_fn_return 0; } as_fn_failure () { as_fn_return 1; } as_fn_ret_success () { return 0; } as_fn_ret_failure () { return 1; } exitcode=0 as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : else exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1" if (eval "$as_required") 2>/dev/null; then : as_have_required=yes else as_have_required=no fi if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. as_shell=$as_dir/$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : CONFIG_SHELL=$as_shell as_have_required=yes if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : break 2 fi fi done;; esac as_found=false done $as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : CONFIG_SHELL=$SHELL as_have_required=yes fi; } IFS=$as_save_IFS if test "x$CONFIG_SHELL" != x; then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also # works around shells that cannot unset nonexistent variables. # Preserve -v and -x to the replacement shell. BASH_ENV=/dev/null ENV=/dev/null (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV case $- in # (((( *v*x* | *x*v* ) as_opts=-vx ;; *v* ) as_opts=-v ;; *x* ) as_opts=-x ;; * ) as_opts= ;; esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. $as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi if test x$as_have_required = xno; then : $as_echo "$0: This script requires a shell more modern than all" $as_echo "$0: the shells that I found on your system." if test x${ZSH_VERSION+set} = xset ; then $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" $as_echo "$0: be upgraded to zsh 4.3.4 or later." else $as_echo "$0: Please tell bug-autoconf@gnu.org and $0: Peter_Kharchenko@hms.harvard.edu about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." fi exit 1 fi fi fi SHELL=${CONFIG_SHELL-/bin/sh} export SHELL # Unset more variables known to interfere with behavior of common tools. CLICOLOR_FORCE= GREP_OPTIONS= unset CLICOLOR_FORCE GREP_OPTIONS ## --------------------- ## ## M4sh Shell Functions. ## ## --------------------- ## # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits as_lineno_1=$LINENO as_lineno_1a=$LINENO as_lineno_2=$LINENO as_lineno_2a=$LINENO eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" && test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || { # Blame Lee E. McMahon (1931-1989) for sed's syntax. :-) sed -n ' p /[$]LINENO/= ' <$as_myself | sed ' s/[$]LINENO.*/&-/ t lineno b :lineno N :loop s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/ t loop s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall # in an infinite loop. This has already happened in practice. _as_can_reexec=no; export _as_can_reexec # Don't try to exec as it changes $[0], causing all sort of problems # (the dirname of $[0] is not the place where we might find the # original and so on. Autoconf is especially sensitive to this). . "./$as_me.lineno" # Exit status is that of the last command. exit } ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" test -n "$DJDIR" || exec 7<&0 &1 # Name of the host. # hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status, # so uname gets run too. ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` # # Initializations. # ac_default_prefix=/usr/local ac_clean_files= ac_config_libobj_dir=. LIBOBJS= cross_compiling=no subdirs= MFLAGS= MAKEFLAGS= # Identity of this package. PACKAGE_NAME='SPP' PACKAGE_TARNAME='spp' PACKAGE_VERSION='1.7' PACKAGE_STRING='SPP 1.7' PACKAGE_BUGREPORT='Peter_Kharchenko@hms.harvard.edu' PACKAGE_URL='' ac_subst_vars='LTLIBOBJS LIBOBJS PKG_LIBS PKG_CPPFLAGS RLD RINC R_HOME HAVE_LIBBZ2 OBJEXT EXEEXT ac_ct_CC CPPFLAGS LDFLAGS CFLAGS CC target_alias host_alias build_alias LIBS ECHO_T ECHO_N ECHO_C DEFS mandir localedir libdir psdir pdfdir dvidir htmldir infodir docdir oldincludedir includedir localstatedir sharedstatedir sysconfdir datadir datarootdir libexecdir sbindir bindir program_transform_name prefix exec_prefix PACKAGE_URL PACKAGE_BUGREPORT PACKAGE_STRING PACKAGE_VERSION PACKAGE_TARNAME PACKAGE_NAME PATH_SEPARATOR SHELL' ac_subst_files='' ac_user_opts=' enable_option_checking ' ac_precious_vars='build_alias host_alias target_alias CC CFLAGS LDFLAGS LIBS CPPFLAGS PKG_CPPFLAGS PKG_LIBS' # Initialize some variables set by options. ac_init_help= ac_init_version=false ac_unrecognized_opts= ac_unrecognized_sep= # The variables have the same names as the options, with # dashes changed to underlines. cache_file=/dev/null exec_prefix=NONE no_create= no_recursion= prefix=NONE program_prefix=NONE program_suffix=NONE program_transform_name=s,x,x, silent= site= srcdir= verbose= x_includes=NONE x_libraries=NONE # Installation directory options. # These are left unexpanded so users can "make install exec_prefix=/foo" # and all the variables that are supposed to be based on exec_prefix # by default will actually change. # Use braces instead of parens because sh, perl, etc. also accept them. # (The list follows the same order as the GNU Coding Standards.) bindir='${exec_prefix}/bin' sbindir='${exec_prefix}/sbin' libexecdir='${exec_prefix}/libexec' datarootdir='${prefix}/share' datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE_TARNAME}' infodir='${datarootdir}/info' htmldir='${docdir}' dvidir='${docdir}' pdfdir='${docdir}' psdir='${docdir}' libdir='${exec_prefix}/lib' localedir='${datarootdir}/locale' mandir='${datarootdir}/man' ac_prev= ac_dashdash= for ac_option do # If the previous option needs an argument, assign it. if test -n "$ac_prev"; then eval $ac_prev=\$ac_option ac_prev= continue fi case $ac_option in *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;; *=) ac_optarg= ;; *) ac_optarg=yes ;; esac # Accept the important Cygnus configure options, so we can diagnose typos. case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; -bindir | --bindir | --bindi | --bind | --bin | --bi) ac_prev=bindir ;; -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) bindir=$ac_optarg ;; -build | --build | --buil | --bui | --bu) ac_prev=build_alias ;; -build=* | --build=* | --buil=* | --bui=* | --bu=*) build_alias=$ac_optarg ;; -cache-file | --cache-file | --cache-fil | --cache-fi \ | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) ac_prev=cache_file ;; -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) cache_file=$ac_optarg ;; --config-cache | -C) cache_file=config.cache ;; -datadir | --datadir | --datadi | --datad) ac_prev=datadir ;; -datadir=* | --datadir=* | --datadi=* | --datad=*) datadir=$ac_optarg ;; -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \ | --dataroo | --dataro | --datar) ac_prev=datarootdir ;; -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \ | --dataroot=* | --dataroo=* | --dataro=* | --datar=*) datarootdir=$ac_optarg ;; -disable-* | --disable-*) ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=no ;; -docdir | --docdir | --docdi | --doc | --do) ac_prev=docdir ;; -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*) docdir=$ac_optarg ;; -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv) ac_prev=dvidir ;; -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*) dvidir=$ac_optarg ;; -enable-* | --enable-*) ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid feature name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval enable_$ac_useropt=\$ac_optarg ;; -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ | --exec | --exe | --ex) ac_prev=exec_prefix ;; -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ | --exec=* | --exe=* | --ex=*) exec_prefix=$ac_optarg ;; -gas | --gas | --ga | --g) # Obsolete; use --with-gas. with_gas=yes ;; -help | --help | --hel | --he | -h) ac_init_help=long ;; -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) ac_init_help=recursive ;; -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) ac_init_help=short ;; -host | --host | --hos | --ho) ac_prev=host_alias ;; -host=* | --host=* | --hos=* | --ho=*) host_alias=$ac_optarg ;; -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht) ac_prev=htmldir ;; -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \ | --ht=*) htmldir=$ac_optarg ;; -includedir | --includedir | --includedi | --included | --include \ | --includ | --inclu | --incl | --inc) ac_prev=includedir ;; -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ | --includ=* | --inclu=* | --incl=* | --inc=*) includedir=$ac_optarg ;; -infodir | --infodir | --infodi | --infod | --info | --inf) ac_prev=infodir ;; -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) infodir=$ac_optarg ;; -libdir | --libdir | --libdi | --libd) ac_prev=libdir ;; -libdir=* | --libdir=* | --libdi=* | --libd=*) libdir=$ac_optarg ;; -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ | --libexe | --libex | --libe) ac_prev=libexecdir ;; -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ | --libexe=* | --libex=* | --libe=*) libexecdir=$ac_optarg ;; -localedir | --localedir | --localedi | --localed | --locale) ac_prev=localedir ;; -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*) localedir=$ac_optarg ;; -localstatedir | --localstatedir | --localstatedi | --localstated \ | --localstate | --localstat | --localsta | --localst | --locals) ac_prev=localstatedir ;; -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*) localstatedir=$ac_optarg ;; -mandir | --mandir | --mandi | --mand | --man | --ma | --m) ac_prev=mandir ;; -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) mandir=$ac_optarg ;; -nfp | --nfp | --nf) # Obsolete; use --without-fp. with_fp=no ;; -no-create | --no-create | --no-creat | --no-crea | --no-cre \ | --no-cr | --no-c | -n) no_create=yes ;; -no-recursion | --no-recursion | --no-recursio | --no-recursi \ | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) no_recursion=yes ;; -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ | --oldin | --oldi | --old | --ol | --o) ac_prev=oldincludedir ;; -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) oldincludedir=$ac_optarg ;; -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) ac_prev=prefix ;; -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) prefix=$ac_optarg ;; -program-prefix | --program-prefix | --program-prefi | --program-pref \ | --program-pre | --program-pr | --program-p) ac_prev=program_prefix ;; -program-prefix=* | --program-prefix=* | --program-prefi=* \ | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) program_prefix=$ac_optarg ;; -program-suffix | --program-suffix | --program-suffi | --program-suff \ | --program-suf | --program-su | --program-s) ac_prev=program_suffix ;; -program-suffix=* | --program-suffix=* | --program-suffi=* \ | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) program_suffix=$ac_optarg ;; -program-transform-name | --program-transform-name \ | --program-transform-nam | --program-transform-na \ | --program-transform-n | --program-transform- \ | --program-transform | --program-transfor \ | --program-transfo | --program-transf \ | --program-trans | --program-tran \ | --progr-tra | --program-tr | --program-t) ac_prev=program_transform_name ;; -program-transform-name=* | --program-transform-name=* \ | --program-transform-nam=* | --program-transform-na=* \ | --program-transform-n=* | --program-transform-=* \ | --program-transform=* | --program-transfor=* \ | --program-transfo=* | --program-transf=* \ | --program-trans=* | --program-tran=* \ | --progr-tra=* | --program-tr=* | --program-t=*) program_transform_name=$ac_optarg ;; -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd) ac_prev=pdfdir ;; -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*) pdfdir=$ac_optarg ;; -psdir | --psdir | --psdi | --psd | --ps) ac_prev=psdir ;; -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*) psdir=$ac_optarg ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) silent=yes ;; -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ | --sbi=* | --sb=*) sbindir=$ac_optarg ;; -sharedstatedir | --sharedstatedir | --sharedstatedi \ | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ | --sharedst | --shareds | --shared | --share | --shar \ | --sha | --sh) ac_prev=sharedstatedir ;; -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ | --sha=* | --sh=*) sharedstatedir=$ac_optarg ;; -site | --site | --sit) ac_prev=site ;; -site=* | --site=* | --sit=*) site=$ac_optarg ;; -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) ac_prev=srcdir ;; -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) srcdir=$ac_optarg ;; -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ | --syscon | --sysco | --sysc | --sys | --sy) ac_prev=sysconfdir ;; -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) sysconfdir=$ac_optarg ;; -target | --target | --targe | --targ | --tar | --ta | --t) ac_prev=target_alias ;; -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) target_alias=$ac_optarg ;; -v | -verbose | --verbose | --verbos | --verbo | --verb) verbose=yes ;; -version | --version | --versio | --versi | --vers | -V) ac_init_version=: ;; -with-* | --with-*) ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=\$ac_optarg ;; -without-* | --without-*) ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && as_fn_error $? "invalid package name: $ac_useropt" ac_useropt_orig=$ac_useropt ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" "*) ;; *) ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig" ac_unrecognized_sep=', ';; esac eval with_$ac_useropt=no ;; --x) # Obsolete; use --with-x. with_x=yes ;; -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ | --x-incl | --x-inc | --x-in | --x-i) ac_prev=x_includes ;; -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) x_includes=$ac_optarg ;; -x-libraries | --x-libraries | --x-librarie | --x-librari \ | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) ac_prev=x_libraries ;; -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) x_libraries=$ac_optarg ;; -*) as_fn_error $? "unrecognized option: \`$ac_option' Try \`$0 --help' for more information" ;; *=*) ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` # Reject names that are not valid shell variable names. case $ac_envvar in #( '' | [0-9]* | *[!_$as_cr_alnum]* ) as_fn_error $? "invalid variable name: \`$ac_envvar'" ;; esac eval $ac_envvar=\$ac_optarg export $ac_envvar ;; *) # FIXME: should be removed in autoconf 3.0. $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; esac done if test -n "$ac_prev"; then ac_option=--`echo $ac_prev | sed 's/_/-/g'` as_fn_error $? "missing argument to $ac_option" fi if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi # Check all directory arguments for consistency. for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ libdir localedir mandir do eval ac_val=\$$ac_var # Remove trailing slashes. case $ac_val in */ ) ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'` eval $ac_var=\$ac_val;; esac # Be sure to have absolute directory names. case $ac_val in [\\/$]* | ?:[\\/]* ) continue;; NONE | '' ) case $ac_var in *prefix ) continue;; esac;; esac as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val" done # There might be people who depend on the old broken behavior: `$host' # used to hold the argument of --host etc. # FIXME: To remove some day. build=$build_alias host=$host_alias target=$target_alias # FIXME: To remove some day. if test "x$host_alias" != x; then if test "x$build_alias" = x; then cross_compiling=maybe elif test "x$build_alias" != "x$host_alias"; then cross_compiling=yes fi fi ac_tool_prefix= test -n "$host_alias" && ac_tool_prefix=$host_alias- test "$silent" = yes && exec 6>/dev/null ac_pwd=`pwd` && test -n "$ac_pwd" && ac_ls_di=`ls -di .` && ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` || as_fn_error $? "working directory cannot be determined" test "X$ac_ls_di" = "X$ac_pwd_ls_di" || as_fn_error $? "pwd does not report name of working directory" # Find the source files, if location was not specified. if test -z "$srcdir"; then ac_srcdir_defaulted=yes # Try the directory containing this script, then the parent directory. ac_confdir=`$as_dirname -- "$as_myself" || $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` srcdir=$ac_confdir if test ! -r "$srcdir/$ac_unique_file"; then srcdir=.. fi else ac_srcdir_defaulted=no fi if test ! -r "$srcdir/$ac_unique_file"; then test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .." as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir" fi ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work" ac_abs_confdir=`( cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg" pwd)` # When building in place, set srcdir=. if test "$ac_abs_confdir" = "$ac_pwd"; then srcdir=. fi # Remove unnecessary trailing slashes from srcdir. # Double slashes in file names in object file debugging info # mess up M-x gdb in Emacs. case $srcdir in */) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;; esac for ac_var in $ac_precious_vars; do eval ac_env_${ac_var}_set=\${${ac_var}+set} eval ac_env_${ac_var}_value=\$${ac_var} eval ac_cv_env_${ac_var}_set=\${${ac_var}+set} eval ac_cv_env_${ac_var}_value=\$${ac_var} done # # Report the --help message. # if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF \`configure' configures SPP 1.7 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... To assign environment variables (e.g., CC, CFLAGS...), specify them as VAR=VALUE. See below for descriptions of some of the useful variables. Defaults for the options are specified in brackets. Configuration: -h, --help display this help and exit --help=short display options specific to this package --help=recursive display the short help of all the included packages -V, --version display version information and exit -q, --quiet, --silent do not print \`checking ...' messages --cache-file=FILE cache test results in FILE [disabled] -C, --config-cache alias for \`--cache-file=config.cache' -n, --no-create do not create output files --srcdir=DIR find the sources in DIR [configure dir or \`..'] Installation directories: --prefix=PREFIX install architecture-independent files in PREFIX [$ac_default_prefix] --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX [PREFIX] By default, \`make install' will install all the files in \`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify an installation prefix other than \`$ac_default_prefix' using \`--prefix', for instance \`--prefix=\$HOME'. For better control, use the options below. Fine tuning of the installation directories: --bindir=DIR user executables [EPREFIX/bin] --sbindir=DIR system admin executables [EPREFIX/sbin] --libexecdir=DIR program executables [EPREFIX/libexec] --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] --datarootdir=DIR read-only arch.-independent data root [PREFIX/share] --datadir=DIR read-only architecture-independent data [DATAROOTDIR] --infodir=DIR info documentation [DATAROOTDIR/info] --localedir=DIR locale-dependent data [DATAROOTDIR/locale] --mandir=DIR man documentation [DATAROOTDIR/man] --docdir=DIR documentation root [DATAROOTDIR/doc/spp] --htmldir=DIR html documentation [DOCDIR] --dvidir=DIR dvi documentation [DOCDIR] --pdfdir=DIR pdf documentation [DOCDIR] --psdir=DIR ps documentation [DOCDIR] _ACEOF cat <<\_ACEOF _ACEOF fi if test -n "$ac_init_help"; then case $ac_init_help in short | recursive ) echo "Configuration of SPP 1.7:";; esac cat <<\_ACEOF Some influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory PKG_CPPFLAGS additional pre-processor flags PKG_LIBS additional linker library flags Use these variables to override the choices made by `configure' or to help it to find libraries and programs with nonstandard names/locations. Report bugs to . _ACEOF ac_status=$? fi if test "$ac_init_help" = "recursive"; then # If there are subdirs, report their specific --help. for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue test -d "$ac_dir" || { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } || continue ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } # Check for guested configure. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive elif test -f "$ac_srcdir/configure"; then echo && $SHELL "$ac_srcdir/configure" --help=recursive else $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF SPP configure 1.7 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF exit fi ## ------------------------ ## ## Autoconf initialization. ## ## ------------------------ ## # ac_fn_c_try_compile LINENO # -------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest.$ac_objext; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_compile # ac_fn_c_try_link LINENO # ----------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. ac_fn_c_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack rm -f conftest.$ac_objext conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then grep -v '^ *+' conftest.err >conftest.er1 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { test -z "$ac_c_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext }; then : ac_retval=0 else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 fi # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would # interfere with the next link command; also delete a directory that is # left behind by Apple's compiler. We do this before executing the actions. rm -rf conftest.dSYM conftest_ipa8_conftest.oo eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval } # ac_fn_c_try_link cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by SPP $as_me 1.7, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ _ACEOF exec 5>>config.log { cat <<_ASUNAME ## --------- ## ## Platform. ## ## --------- ## hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` uname -m = `(uname -m) 2>/dev/null || echo unknown` uname -r = `(uname -r) 2>/dev/null || echo unknown` uname -s = `(uname -s) 2>/dev/null || echo unknown` uname -v = `(uname -v) 2>/dev/null || echo unknown` /usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` /bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` /bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` /usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` /usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` /usr/bin/hostinfo = `(/usr/bin/hostinfo) 2>/dev/null || echo unknown` /bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` /usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` /bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` _ASUNAME as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. $as_echo "PATH: $as_dir" done IFS=$as_save_IFS } >&5 cat >&5 <<_ACEOF ## ----------- ## ## Core tests. ## ## ----------- ## _ACEOF # Keep a trace of the command line. # Strip out --no-create and --no-recursion so they do not pile up. # Strip out --silent because we don't want to record it for future runs. # Also quote any args containing shell meta-characters. # Make two passes to allow for proper duplicate-argument suppression. ac_configure_args= ac_configure_args0= ac_configure_args1= ac_must_keep_next=false for ac_pass in 1 2 do for ac_arg do case $ac_arg in -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; 2) as_fn_append ac_configure_args1 " '$ac_arg'" if test $ac_must_keep_next = true; then ac_must_keep_next=false # Got value, back to normal. else case $ac_arg in *=* | --config-cache | -C | -disable-* | --disable-* \ | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ | -with-* | --with-* | -without-* | --without-* | --x) case "$ac_configure_args0 " in "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; esac ;; -* ) ac_must_keep_next=true ;; esac fi as_fn_append ac_configure_args " '$ac_arg'" ;; esac done done { ac_configure_args0=; unset ac_configure_args0;} { ac_configure_args1=; unset ac_configure_args1;} # When interrupted or exit'd, cleanup temporary files, and complete # config.log. We remove comments because anyway the quotes in there # would cause problems or look ugly. # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? # Save into config.log some information that might help in debugging. { echo $as_echo "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo # The following way of writing the cache mishandles newlines in values, ( for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #( *${as_nl}ac_space=\ *) sed -n \ "s/'\''/'\''\\\\'\'''\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p" ;; #( *) sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) echo $as_echo "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo for ac_var in $ac_subst_vars do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then $as_echo "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo for ac_var in $ac_subst_files do eval ac_val=\$$ac_var case $ac_val in *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac $as_echo "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then $as_echo "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo cat confdefs.h echo fi test "$ac_signal" != 0 && $as_echo "$as_me: caught signal $ac_signal" $as_echo "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && exit $exit_status ' 0 for ac_signal in 1 2 13 15; do trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal done ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h $as_echo "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. cat >>confdefs.h <<_ACEOF #define PACKAGE_NAME "$PACKAGE_NAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_TARNAME "$PACKAGE_TARNAME" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_VERSION "$PACKAGE_VERSION" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_STRING "$PACKAGE_STRING" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" _ACEOF cat >>confdefs.h <<_ACEOF #define PACKAGE_URL "$PACKAGE_URL" _ACEOF # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. ac_site_file1=NONE ac_site_file2=NONE if test -n "$CONFIG_SITE"; then # We do not want a PATH search for config.site. case $CONFIG_SITE in #(( -*) ac_site_file1=./$CONFIG_SITE;; */*) ac_site_file1=$CONFIG_SITE;; *) ac_site_file1=./$CONFIG_SITE;; esac elif test "x$prefix" != xNONE; then ac_site_file1=$prefix/share/config.site ac_site_file2=$prefix/etc/config.site else ac_site_file1=$ac_default_prefix/share/config.site ac_site_file2=$ac_default_prefix/etc/config.site fi for ac_site_file in "$ac_site_file1" "$ac_site_file2" do test "x$ac_site_file" = xNONE && continue if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 $as_echo "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi done if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 $as_echo "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 $as_echo "$as_me: creating cache $cache_file" >&6;} >$cache_file fi # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false for ac_var in $ac_precious_vars; do eval ac_old_set=\$ac_cv_env_${ac_var}_set eval ac_new_set=\$ac_env_${ac_var}_set eval ac_old_val=\$ac_cv_env_${ac_var}_value eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) if test "x$ac_old_val" != "x$ac_new_val"; then # differences in whitespace do not lead to failure. ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 $as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 $as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 $as_echo "$as_me: former value: \`$ac_old_val'" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. *) as_fn_append ac_configure_args " '$ac_arg'" ;; esac fi done if $ac_cache_corrupted; then { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 $as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## ## -------------------- ## ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. set dummy ${ac_tool_prefix}gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$ac_cv_prog_CC"; then ac_ct_CC=$CC # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="gcc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi else CC="$ac_cv_prog_CC" fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. set dummy ${ac_tool_prefix}cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="${ac_tool_prefix}cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi fi if test -z "$CC"; then # Extract the first word of "cc", so it can be a program name with args. set dummy cc; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else ac_prog_rejected=no as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then ac_prog_rejected=yes continue fi ac_cv_prog_CC="cc" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS if test $ac_prog_rejected = yes; then # We found a bogon in the path, so make sure we never use it. set dummy $ac_cv_prog_CC shift if test $# != 0; then # We chose a different compiler from the bogus one. # However, it has the same basename, so the bogon will be chosen # first if we set CC to just the basename; use the full file name. shift ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" fi fi fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi fi if test -z "$CC"; then if test -n "$ac_tool_prefix"; then for ac_prog in cl.exe do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$CC"; then ac_cv_prog_CC="$CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_CC="$ac_tool_prefix$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi CC=$ac_cv_prog_CC if test -n "$CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 $as_echo "$CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$CC" && break done fi if test -z "$CC"; then ac_ct_CC=$CC for ac_prog in cl.exe do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 $as_echo_n "checking for $ac_word... " >&6; } if ${ac_cv_prog_ac_ct_CC+:} false; then : $as_echo_n "(cached) " >&6 else if test -n "$ac_ct_CC"; then ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. for ac_exec_ext in '' $ac_executable_extensions; do if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then ac_cv_prog_ac_ct_CC="$ac_prog" $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 break 2 fi done done IFS=$as_save_IFS fi fi ac_ct_CC=$ac_cv_prog_ac_ct_CC if test -n "$ac_ct_CC"; then { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 $as_echo "$ac_ct_CC" >&6; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } fi test -n "$ac_ct_CC" && break done if test "x$ac_ct_CC" = x; then CC="" else case $cross_compiling:$ac_tool_warned in yes:) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 $as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac CC=$ac_ct_CC fi fi fi test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "no acceptable C compiler found in \$PATH See \`config.log' for more details" "$LINENO" 5; } # Provide some information about the compiler. $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do { { ac_try="$ac_compiler $ac_option >&5" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then sed '10a\ ... rest of stderr output deleted ... 10q' conftest.err >conftest.er1 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 $as_echo_n "checking whether the C compiler works... " >&6; } ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" ac_rmfiles= for ac_file in $ac_files do case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; * ) ac_rmfiles="$ac_rmfiles $ac_file";; esac done rm -f $ac_rmfiles if { { ac_try="$ac_link_default" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, # so that the user can short-circuit this test for compilers unknown to # Autoconf. for ac_file in $ac_files '' do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; [ab].out ) # We found the default executable, but exeext='' is most # certainly right. break;; *.* ) if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi # We set ac_cv_exeext here because the later test for it is not # safe: cross compilers may not add the suffix if given an `-o' # argument, so we may need to know it at that point already. # Even if this section looks crufty: it has the advantage of # actually working. break;; * ) break;; esac done test "$ac_cv_exeext" = no && ac_cv_exeext= else ac_file='' fi if test -z "$ac_file"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error 77 "C compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } else { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 $as_echo_n "checking for C compiler default output file name... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 $as_echo "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 $as_echo_n "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with # `rm'. for ac_file in conftest.exe conftest conftest.*; do test -f "$ac_file" || continue case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM | *.o | *.obj ) ;; *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` break;; * ) break;; esac done else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 $as_echo "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext ac_exeext=$EXEEXT cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int main () { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; ; return 0; } _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 $as_echo_n "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run C compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 $as_echo "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save { $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 $as_echo_n "checking for suffix of object files... " >&6; } if ${ac_cv_objext+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF rm -f conftest.o conftest.obj if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" $as_echo "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.dSYM ) ;; *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` break;; esac done else $as_echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 $as_echo "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 $as_echo "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 $as_echo_n "checking whether we are using the GNU C compiler... " >&6; } if ${ac_cv_c_compiler_gnu+:} false; then : $as_echo_n "(cached) " >&6 else cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { #ifndef __GNUC__ choke me #endif ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_compiler_gnu=yes else ac_compiler_gnu=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_cv_c_compiler_gnu=$ac_compiler_gnu fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 $as_echo "$ac_cv_c_compiler_gnu" >&6; } if test $ac_compiler_gnu = yes; then GCC=yes else GCC= fi ac_test_CFLAGS=${CFLAGS+set} ac_save_CFLAGS=$CFLAGS { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 $as_echo_n "checking whether $CC accepts -g... " >&6; } if ${ac_cv_prog_cc_g+:} false; then : $as_echo_n "(cached) " >&6 else ac_save_c_werror_flag=$ac_c_werror_flag ac_c_werror_flag=yes ac_cv_prog_cc_g=no CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes else CFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : else ac_c_werror_flag=$ac_save_c_werror_flag CFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int main () { ; return 0; } _ACEOF if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_g=yes fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext ac_c_werror_flag=$ac_save_c_werror_flag fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 $as_echo "$ac_cv_prog_cc_g" >&6; } if test "$ac_test_CFLAGS" = set; then CFLAGS=$ac_save_CFLAGS elif test $ac_cv_prog_cc_g = yes; then if test "$GCC" = yes; then CFLAGS="-g -O2" else CFLAGS="-g" fi else if test "$GCC" = yes; then CFLAGS="-O2" else CFLAGS= fi fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 $as_echo_n "checking for $CC option to accept ISO C89... " >&6; } if ${ac_cv_prog_cc_c89+:} false; then : $as_echo_n "(cached) " >&6 else ac_cv_prog_cc_c89=no ac_save_CC=$CC cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include #include struct stat; /* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ struct buf { int x; }; FILE * (*rcsopen) (struct buf *, struct stat *, int); static char *e (p, i) char **p; int i; { return p[i]; } static char *f (char * (*g) (char **, int), char **p, ...) { char *s; va_list v; va_start (v,p); s = g (p, va_arg (v,int)); va_end (v); return s; } /* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has function prototypes and stuff, but not '\xHH' hex character constants. These don't provoke an error unfortunately, instead are silently treated as 'x'. The following induces an error, until -std is added to get proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an array size at least. It's necessary to write '\x00'==0 to get something that's true only with -std. */ int osf4_cc_array ['\x00' == 0 ? 1 : -1]; /* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters inside strings and character constants. */ #define FOO(x) 'x' int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; int test (int i, double x); struct s1 {int (*f) (int a);}; struct s2 {int (*f) (double a);}; int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); int argc; char **argv; int main () { return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; ; return 0; } _ACEOF for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" do CC="$ac_save_CC $ac_arg" if ac_fn_c_try_compile "$LINENO"; then : ac_cv_prog_cc_c89=$ac_arg fi rm -f core conftest.err conftest.$ac_objext test "x$ac_cv_prog_cc_c89" != "xno" && break done rm -f conftest.$ac_ext CC=$ac_save_CC fi # AC_CACHE_VAL case "x$ac_cv_prog_cc_c89" in x) { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 $as_echo "none needed" >&6; } ;; xno) { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 $as_echo "unsupported" >&6; } ;; *) CC="$CC $ac_cv_prog_cc_c89" { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 $as_echo "$ac_cv_prog_cc_c89" >&6; } ;; esac if test "x$ac_cv_prog_cc_c89" != xno; then : fi ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu { $as_echo "$as_me:${as_lineno-$LINENO}: checking for BZ2_bzDecompressInit in -lbz2" >&5 $as_echo_n "checking for BZ2_bzDecompressInit in -lbz2... " >&6; } if ${ac_cv_lib_bz2_BZ2_bzDecompressInit+:} false; then : $as_echo_n "(cached) " >&6 else ac_check_lib_save_LIBS=$LIBS LIBS="-lbz2 $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ /* Override any GCC internal prototype to avoid an error. Use char because int might match the return type of a GCC builtin and then its argument prototype would still apply. */ #ifdef __cplusplus extern "C" #endif char BZ2_bzDecompressInit (); int main () { return BZ2_bzDecompressInit (); ; return 0; } _ACEOF if ac_fn_c_try_link "$LINENO"; then : ac_cv_lib_bz2_BZ2_bzDecompressInit=yes else ac_cv_lib_bz2_BZ2_bzDecompressInit=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_bz2_BZ2_bzDecompressInit" >&5 $as_echo "$ac_cv_lib_bz2_BZ2_bzDecompressInit" >&6; } if test "x$ac_cv_lib_bz2_BZ2_bzDecompressInit" = xyes; then : cat >>confdefs.h <<_ACEOF #define HAVE_LIBBZ2 1 _ACEOF LIBS="-lbz2 $LIBS" fi # find R and set CC/CFLAGS : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then echo "could not determine R_HOME" exit 1 fi CC=`"${R_HOME}/bin/R" CMD config CC`; CXX11=`"${R_HOME}/bin/R" CMD config CXX11` CXX11STD=`"${R_HOME}/bin/R" CMD config CXX11STD` CXX="${CXX11} ${CXX11STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXX11FLAGS` ac_ext=cpp ac_cpp='$CXXCPP $CPPFLAGS' ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_cxx_compiler_gnu CPPFLAGS="${CPPFLAGS} ${PKG_CPPFLAGS}" CXXFLAGS="${CXXFLAGS} ${PKG_CXXFLAGS}" CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` CFLAGS="${CFLAGS} ${PKG_CFLAGS}" RINC=`${R_HOME}/bin/R CMD config --cppflags` RCPP_CXX=`${R_HOME}/bin/Rscript -e "Rcpp:::CxxFlags()"` CPPFLAGS="${CPPFLAGS} ${RCPP_CXX}" RCPP_LIBS=`${R_HOME}/bin/Rscript -e "Rcpp:::LdFlags()"` LIBS="${LIBS} ${PKG_LIBS} ${RCPP_LIBS}" ac_config_files="$ac_config_files src/Makevars" cp confdefs.h src/config.h cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure # scripts and configure runs, see configure's option --config-cache. # It is not useful on other systems. If it contains results you don't # want to keep, you may remove or edit it. # # config.status only pays attention to the cache file if you give it # the --recheck option to rerun configure. # # `ac_cv_env_foo' variables (set or unset) will be overridden when # loading this file, other *unset* `ac_cv_foo' will be assigned the # following values. _ACEOF # The following way of writing the cache mishandles newlines in values, # but we know of no workaround that is simple, portable, and efficient. # So, we kill variables containing newlines. # Ultrix sh set writes to stderr and can't be redirected directly, # and sets the high bit in the cache file unless we assign to the vars. ( for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do eval ac_val=\$$ac_var case $ac_val in #( *${as_nl}*) case $ac_var in #( *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #( *) { eval $ac_var=; unset $ac_var;} ;; esac ;; esac done (set) 2>&1 | case $as_nl`(ac_space=' '; set) 2>&1` in #( *${as_nl}ac_space=\ *) # `set' does not quote correctly, so add quotes: double-quote # substitution turns \\\\ into \\, and sed turns \\ into \. sed -n \ "s/'/'\\\\''/g; s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" ;; #( *) # `set' quotes correctly as required by POSIX, so do not add quotes. sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p" ;; esac | sort ) | sed ' /^ac_cv_env_/b end t clear :clear s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 $as_echo "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else case $cache_file in #( */* | ?:*) mv -f confcache "$cache_file"$$ && mv -f "$cache_file"$$ "$cache_file" ;; #( *) mv -f confcache "$cache_file" ;; esac fi fi else { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 $as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache test "x$prefix" = xNONE && prefix=$ac_default_prefix # Let make expand exec_prefix. test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' # Transform confdefs.h into DEFS. # Protect against shell expansion while executing Makefile rules. # Protect against Makefile macro expansion. # # If the first sed substitution is executed (which looks for macros that # take arguments), then branch to the quote section. Otherwise, # look for a macro that doesn't take arguments. ac_script=' :mline /\\$/{ N s,\\\n,, b mline } t clear :clear s/^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*([^)]*)\)[ ]*\(.*\)/-D\1=\2/g t quote s/^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)/-D\1=\2/g t quote b any :quote s/[ `~#$^&*(){}\\|;'\''"<>?]/\\&/g s/\[/\\&/g s/\]/\\&/g s/\$/$$/g H :any ${ g s/^\n// s/\n/ /g p } ' DEFS=`sed -n "$ac_script" confdefs.h` ac_libobjs= ac_ltlibobjs= U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' ac_i=`$as_echo "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo' done LIBOBJS=$ac_libobjs LTLIBOBJS=$ac_ltlibobjs : "${CONFIG_STATUS=./config.status}" ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 $as_echo "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL # Generated by $as_me. # Run this file to recreate the current configuration. # Compiler output produced by configure, useful for debugging # configure, is in config.log if it exists. debug=false ac_cs_recheck=false ac_cs_silent=false SHELL=\${CONFIG_SHELL-$SHELL} export SHELL _ASEOF cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 ## -------------------- ## ## M4sh Initialization. ## ## -------------------- ## # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST else case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( *) : ;; esac fi as_nl=' ' export as_nl # Printing a long string crashes Solaris 7 /usr/bin/printf. as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo # Prefer a ksh shell builtin over an external printf program on Solaris, # but without wasting forks for bash or zsh. if test -z "$BASH_VERSION$ZSH_VERSION" \ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='print -r --' as_echo_n='print -rn --' elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then as_echo='printf %s\n' as_echo_n='printf %s' else if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' as_echo_n='/usr/ucb/echo -n' else as_echo_body='eval expr "X$1" : "X\\(.*\\)"' as_echo_n_body='eval arg=$1; case $arg in #( *"$as_nl"*) expr "X$arg" : "X\\(.*\\)$as_nl"; arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; esac; expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" ' export as_echo_n_body as_echo_n='sh -c $as_echo_n_body as_echo' fi export as_echo_body as_echo='sh -c $as_echo_body as_echo' fi # The user is always right. if test "${PATH_SEPARATOR+set}" != set; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || PATH_SEPARATOR=';' } fi # IFS # We need space, tab and new line, in precisely that order. Quoting is # there to prevent editors from complaining about space-tab. # (If _AS_PATH_WALK were called with IFS unset, it would disable word # splitting by setting IFS to empty value.) IFS=" "" $as_nl" # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( *[\\/]* ) as_myself=$0 ;; *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS test -z "$as_dir" && as_dir=. test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break done IFS=$as_save_IFS ;; esac # We did not find ourselves, most probably we were run as `sh COMMAND' # in which case we are not to be found in the path. if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi # Unset variables that we do not need and which cause bugs (e.g. in # pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" # suppresses any "Segmentation fault" message there. '((' could # trigger a bug in pdksh 5.2.14. for as_var in BASH_ENV ENV MAIL MAILPATH do eval test x\${$as_var+set} = xset \ && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : done PS1='$ ' PS2='> ' PS4='+ ' # NLS nuisances. LC_ALL=C export LC_ALL LANGUAGE=C export LANGUAGE # CDPATH. (unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- # Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are # provided, also output the error to LOG_FD, referencing LINENO. Then exit the # script with STATUS, using 1 if that was 0. as_fn_error () { as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi $as_echo "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. as_fn_set_status () { return $1 } # as_fn_set_status # as_fn_exit STATUS # ----------------- # Exit the shell with STATUS, even in a "trap 0" or "set -e" context. as_fn_exit () { set +e as_fn_set_status $1 exit $1 } # as_fn_exit # as_fn_unset VAR # --------------- # Portably unset VAR. as_fn_unset () { { eval $1=; unset $1;} } as_unset=as_fn_unset # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : eval 'as_fn_append () { eval $1+=\$2 }' else as_fn_append () { eval $1=\$$1\$2 } fi # as_fn_append # as_fn_arith ARG... # ------------------ # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : eval 'as_fn_arith () { as_val=$(( $* )) }' else as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith if expr a : '\(a\)' >/dev/null 2>&1 && test "X`expr 00001 : '.*\(...\)'`" = X001; then as_expr=expr else as_expr=false fi if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then as_basename=basename else as_basename=false fi if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then as_dirname=dirname else as_dirname=false fi as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || $as_echo X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q } /^X\/\(\/\/\)$/{ s//\1/ q } /^X\/\(\/\).*/{ s//\1/ q } s/.*/./; q'` # Avoid depending upon Character Ranges. as_cr_letters='abcdefghijklmnopqrstuvwxyz' as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) case `echo 'xy\c'` in *c*) ECHO_T=' ';; # ECHO_T is single tab character. xy) ECHO_C='\c';; *) echo `echo ksh88 bug on AIX 6.1` > /dev/null ECHO_T=' ';; esac;; *) ECHO_N='-n';; esac rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file else rm -f conf$$.dir mkdir conf$$.dir 2>/dev/null fi if (echo >conf$$.file) 2>/dev/null; then if ln -s conf$$.file conf$$ 2>/dev/null; then as_ln_s='ln -s' # ... but there are two gotchas: # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail. # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable. # In both cases, we have to default to `cp -pR'. ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe || as_ln_s='cp -pR' elif ln conf$$.file conf$$ 2>/dev/null; then as_ln_s=ln else as_ln_s='cp -pR' fi else as_ln_s='cp -pR' fi rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file rmdir conf$$.dir 2>/dev/null # as_fn_mkdir_p # ------------- # Create "$as_dir" as a directory, including parents if necessary. as_fn_mkdir_p () { case $as_dir in #( -*) as_dir=./$as_dir;; esac test -d "$as_dir" || eval $as_mkdir_p || { as_dirs= while :; do case $as_dir in #( *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" as_dir=`$as_dirname -- "$as_dir" || $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` test -d "$as_dir" && break done test -z "$as_dirs" || eval "mkdir $as_dirs" } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir" } # as_fn_mkdir_p if mkdir -p . 2>/dev/null; then as_mkdir_p='mkdir -p "$as_dir"' else test -d ./-p && rmdir ./-p as_mkdir_p=false fi # as_fn_executable_p FILE # ----------------------- # Test if FILE is an executable regular file. as_fn_executable_p () { test -f "$1" && test -x "$1" } # as_fn_executable_p as_test_x='test -x' as_executable_p=as_fn_executable_p # Sed expression to map a string onto a valid CPP name. as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" # Sed expression to map a string onto a valid variable name. as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" exec 6>&1 ## ----------------------------------- ## ## Main body of $CONFIG_STATUS script. ## ## ----------------------------------- ## _ASEOF test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1 cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Save the log message, to keep $0 and so on meaningful, and to # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" This file was extended by SPP $as_me 1.7, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS CONFIG_LINKS = $CONFIG_LINKS CONFIG_COMMANDS = $CONFIG_COMMANDS $ $0 $@ on `(hostname || uname -n) 2>/dev/null | sed 1q` " _ACEOF case $ac_config_files in *" "*) set x $ac_config_files; shift; ac_config_files=$*;; esac cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 # Files that config.status was made for. config_files="$ac_config_files" _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 ac_cs_usage="\ \`$as_me' instantiates files and other configuration actions from templates according to the current configuration. Unless the files and actions are specified as TAGs, all are instantiated by default. Usage: $0 [OPTION]... [TAG]... -h, --help print this help, then exit -V, --version print version number and configuration settings, then exit --config print configuration, then exit -q, --quiet, --silent do not print progress messages -d, --debug don't remove temporary files --recheck update $as_me by reconfiguring in the same conditions --file=FILE[:TEMPLATE] instantiate the configuration file FILE Configuration files: $config_files Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ SPP config.status 1.7 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" Copyright (C) 2012 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." ac_pwd='$ac_pwd' srcdir='$srcdir' test -n "\$AWK" || AWK=awk _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # The default lists apply if the user does not specify any file. ac_need_defaults=: while test $# != 0 do case $1 in --*=?*) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'` ac_shift=: ;; --*=) ac_option=`expr "X$1" : 'X\([^=]*\)='` ac_optarg= ac_shift=: ;; *) ac_option=$1 ac_optarg=$2 ac_shift=shift ;; esac case $ac_option in # Handling of the options. -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) $as_echo "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) $as_echo "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) $as_echo "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; # This is an error. -*) as_fn_error $? "unrecognized option: \`$1' Try \`$0 --help' for more information." ;; *) as_fn_append ac_config_targets " $1" ac_need_defaults=false ;; esac shift done ac_configure_extra_args= if $ac_cs_silent; then exec 6>/dev/null ac_configure_extra_args="$ac_configure_extra_args --silent" fi _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" fi _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 exec 5>>config.log { echo sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX $as_echo "$ac_log" } >&5 _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # Handling of arguments. for ac_config_target in $ac_config_targets do case $ac_config_target in "src/Makevars") CONFIG_FILES="$CONFIG_FILES src/Makevars" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac done # If the user did not use the arguments to specify the items to instantiate, # then the envvar interface is used. Set only those that are not. # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree # simply because there is no reason against having it here, and in addition, # creating and moving files from /tmp can sometimes cause problems. # Hook for its removal unless debugging. # Note that there is a small window in which the directory will not be cleaned: # after its creation but before its name has been assigned to `$tmp'. $debug || { tmp= ac_tmp= trap 'exit_status=$? : "${ac_tmp:=$tmp}" { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status ' 0 trap 'as_fn_exit 1' 1 2 13 15 } # Create a (secure) tmp directory for tmp files. { tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` && test -d "$tmp" } || { tmp=./conf$$-$RANDOM (umask 077 && mkdir "$tmp") } || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5 ac_tmp=$tmp # Set up the scripts for CONFIG_FILES section. # No need to generate them if there are no CONFIG_FILES. # This happens for instance with `./config.status config.h'. if test -n "$CONFIG_FILES"; then ac_cr=`echo X | tr X '\015'` # On cygwin, bash can eat \r inside `` if the user requested igncr. # But we know of no other shell where ac_cr would be empty at this # point, so we can use a bashism as a fallback. if test "x$ac_cr" = x; then eval ac_cr=\$\'\\r\' fi ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' /dev/null` if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then ac_cs_awk_cr='\\r' else ac_cs_awk_cr=$ac_cr fi echo 'BEGIN {' >"$ac_tmp/subs1.awk" && _ACEOF { echo "cat >conf$$subs.awk <<_ACEOF" && echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' && echo "_ACEOF" } >conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'` ac_delim='%!_!# ' for ac_last_try in false false false false false :; do . ./conf$$subs.sh || as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X` if test $ac_delim_n = $ac_delim_num; then break elif $ac_last_try; then as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5 else ac_delim="$ac_delim!$ac_delim _$ac_delim!! " fi done rm -f conf$$subs.sh cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK && _ACEOF sed -n ' h s/^/S["/; s/!.*/"]=/ p g s/^[^!]*!// :repl t repl s/'"$ac_delim"'$// t delim :nl h s/\(.\{148\}\)..*/\1/ t more1 s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/ p n b repl :more1 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t nl :delim h s/\(.\{148\}\)..*/\1/ t more2 s/["\\]/\\&/g; s/^/"/; s/$/"/ p b :more2 s/["\\]/\\&/g; s/^/"/; s/$/"\\/ p g s/.\{148\}// t delim ' >$CONFIG_STATUS || ac_write_fail=1 rm -f conf$$subs.awk cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 _ACAWK cat >>"\$ac_tmp/subs1.awk" <<_ACAWK && for (key in S) S_is_set[key] = 1 FS = "" } { line = $ 0 nfields = split(line, field, "@") substed = 0 len = length(field[1]) for (i = 2; i < nfields; i++) { key = field[i] keylen = length(key) if (S_is_set[key]) { value = S[key] line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3) len += length(value) + length(field[++i]) substed = 1 } else len += 1 + keylen } print line } _ACAWK _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g" else cat fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5 _ACEOF # VPATH may cause trouble with some makes, so we remove sole $(srcdir), # ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and # trailing colons and then remove the whole line if VPATH becomes empty # (actually we leave an empty line to preserve line numbers). if test "x$srcdir" = x.; then ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{ h s/// s/^/:/ s/[ ]*$/:/ s/:\$(srcdir):/:/g s/:\${srcdir}:/:/g s/:@srcdir@:/:/g s/^:*// s/:*$// x s/\(=[ ]*\).*/\1/ G s/\n// s/^[^=]*=[ ]*$// }' fi cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 fi # test -n "$CONFIG_FILES" eval set X " :F $CONFIG_FILES " shift for ac_tag do case $ac_tag in :[FHLC]) ac_mode=$ac_tag; continue;; esac case $ac_mode$ac_tag in :[FHL]*:*);; :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;; :[FH]-) ac_tag=-:-;; :[FH]*) ac_tag=$ac_tag:$ac_tag.in;; esac ac_save_IFS=$IFS IFS=: set x $ac_tag IFS=$ac_save_IFS shift ac_file=$1 shift case $ac_mode in :L) ac_source=$1;; :[FH]) ac_file_inputs= for ac_f do case $ac_f in -) ac_f="$ac_tmp/stdin";; *) # Look for the file first in the build tree, then in the source tree # (if the path is not absolute). The absolute path cannot be DOS-style, # because $ac_f cannot contain `:'. test -f "$ac_f" || case $ac_f in [\\/$]*) false;; *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";; esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done # Let's still pretend it is `configure' which instantiates (i.e., don't # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 $as_echo "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) ac_sed_conf_input=`$as_echo "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac case $ac_tag in *:-:* | *:-) cat >"$ac_tmp/stdin" \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac ;; esac ac_dir=`$as_dirname -- "$ac_file" || $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || $as_echo X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q } /^X\(\/\/\)[^/].*/{ s//\1/ q } /^X\(\/\/\)$/{ s//\1/ q } /^X\(\/\).*/{ s//\1/ q } s/.*/./; q'` as_dir="$ac_dir"; as_fn_mkdir_p ac_builddir=. case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; esac ;; esac ac_abs_top_builddir=$ac_pwd ac_abs_builddir=$ac_pwd$ac_dir_suffix # for backward compatibility: ac_top_builddir=$ac_top_build_prefix case $srcdir in .) # We are building in place. ac_srcdir=. ac_top_srcdir=$ac_top_builddir_sub ac_abs_top_srcdir=$ac_pwd ;; [\\/]* | ?:[\\/]* ) # Absolute name. ac_srcdir=$srcdir$ac_dir_suffix; ac_top_srcdir=$srcdir ac_abs_top_srcdir=$srcdir ;; *) # Relative name. ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix ac_top_srcdir=$ac_top_build_prefix$srcdir ac_abs_top_srcdir=$ac_pwd/$srcdir ;; esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix case $ac_mode in :F) # # CONFIG_FILE # _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # If the template does not know about datarootdir, expand it. # FIXME: This hack should be removed a few years after 2.60. ac_datarootdir_hack=; ac_datarootdir_seen= ac_sed_dataroot=' /datarootdir/ { p q } /@datadir@/p /@docdir@/p /@infodir@/p /@localedir@/p /@mandir@/p' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 $as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' s&@datadir@&$datadir&g s&@docdir@&$docdir&g s&@infodir@&$infodir&g s&@localedir@&$localedir&g s&@mandir@&$mandir&g s&\\\${datarootdir}&$datarootdir&g' ;; esac _ACEOF # Neutralize VPATH when `$srcdir' = `.'. # Shell code in configure.ac might set extrasub. # FIXME: do we really want to maintain this feature? cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_sed_extra="$ac_vpsub $extrasub _ACEOF cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 :t /@[a-zA-Z_][a-zA-Z_0-9]*@/!b s|@configure_input@|$ac_sed_conf_input|;t t s&@top_builddir@&$ac_top_builddir_sub&;t t s&@top_build_prefix@&$ac_top_build_prefix&;t t s&@srcdir@&$ac_srcdir&;t t s&@abs_srcdir@&$ac_abs_srcdir&;t t s&@top_srcdir@&$ac_top_srcdir&;t t s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t s&@builddir@&$ac_builddir&;t t s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t $ac_datarootdir_hack " eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 $as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" case $ac_file in -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";; *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";; esac \ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;; esac done # for ac_tag as_fn_exit 0 _ACEOF ac_clean_files=$ac_clean_files_save test $ac_write_fail = 0 || as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5 # configure is writing to config.log, and then calls config.status. # config.status does its own redirection, appending to config.log. # Unfortunately, on DOS this fails, as config.log is still kept open # by configure, so config.status won't be able to write to it; its # output is simply discarded. So we exec the FD to /dev/null, # effectively closing config.log, so it can be properly (re)opened and # appended to by config.status. When coming back to configure, we # need to make the FD available again. if test "$no_create" != yes; then ac_cs_success=: ac_config_status_args= test "$silent" = yes && ac_config_status_args="$ac_config_status_args --quiet" exec 5>/dev/null $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false exec 5>>config.log # Use ||, not &&, to avoid exiting from the if with $? = 1, which # would make configure fail if this is the last instruction. $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi spp/man/0000755000176200001440000000000013472222525011633 5ustar liggesusersspp/man/get.conservative.fold.enrichment.profile.Rd0000644000176200001440000000720513471426636022141 0ustar liggesusers\name{get.conservative.fold.enrichment.profile} \alias{get.conservative.fold.enrichment.profile} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Estimate minimal fold enrichment/depletion along the chromosomes } \description{ The method provides a statistical assessment of enrichment/depletion along the chromosomes. To assess tag density enrichment/depletion, a sliding window of a specified size (\code{fws}) is used to calculate the density of the foreground tags (\code{ftl}). Multiple, typically larger windows are used to estimate background tag (\code{btl}) density around the same location. The densities are compared as ratios of two Poisson processes to estimate lower bound of foreground enrichment, or upper bound of foreground depletion. If multiple window sizes were used to estimate the background tag density, the most conservative one is chosen for each point. } \usage{ get.conservative.fold.enrichment.profile(ftl, btl, fws, bwsl = c(1, 5, 25, 50) * fws, step = 50, tag.shift = 146/2, alpha = 0.05, use.most.informative.scale = F, quick.calculation = T, background.density.scaling = T, bg.weight = NULL, posl= NULL, return.mle = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ftl}{ foreground tag vector list } \item{btl}{ background tag vector list } \item{fws}{ foreground window size } \item{bwsl}{ background window scales. The size(s) of background windows will be \code{fws*bwsl}. } \item{step}{ spacing between positions at which the enrichment/depletion is evaluated } \item{tag.shift}{ number of basepairs by which positive and negative tag coordinates should be shifted towards eachother (half of binding peak separation distance)} \item{alpha}{ desired level of statistical significance } \item{use.most.informative.scale}{ for each position, instead of evaluating enrichment ratio bounds for all background window scales, choose the one with the highest observed density to speed up the calculations} \item{quick.calculation}{ Use square root transformation method instead of a Bayesian method. This speeds up the caclulation considerably and is turned on by default. } \item{background.density.scaling}{ If TRUE, regions of significant tag enrichment will be masked out when calculating size ratio of the signal to control datasets (to estimate ratio of the background tag density). If FALSE, the dataset ratio will be equal to the ratio of the number of tags in each dataset.} \item{bg.weight}{ optional weight by which the background density should be multipled for scaling. If not supplied, the weight is calculated based on the ratio of the reduced ChIP to input dataset sizes. } \item{posl}{posl} \item{return.mle}{return.mle} } \value{ A list of elements corresponding to chromosomes, with each element being an $x/$y data.frame giving the position and the log2 conservative estimate of enrichment/depletion fold ratios around that position. Use \code{\link{writewig}} to output the structure to a WIG file. } \references{ R.M.Price, D.G. Bonett "Estimating the ratio fo two Poisson rates", Comp. Stat & Data Anal. 32(2000) 345} \seealso{ \code{\link{get.smoothed.tag.density}} } \examples{ \dontrun{ enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data, input.data, fws=2*binding.characteristics$whs, step=100, alpha=0.01); writewig(enrichment.estimates, "example.enrichment.estimates.wig", "Example conservative fold-enrichment/depletion estimates shown on log2 scale"); } } spp/man/add.broad.peak.regions.Rd0000644000176200001440000000224513471426636016337 0ustar liggesusers\name{add.broad.peak.regions} \alias{add.broad.peak.regions} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate chromosome-wide profiles of smoothed tag density } \description{ Looks for broader regions of enrichment associated with the determined peak positions, adds them to the $npl data as $rs, $re columns. } \usage{ add.broad.peak.regions(chip.tags,input.tags,bp,window.size=500,z.thr=2) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{chip.tags}{ signal chromosome tag coordinate vectors (e.g. output of \code{\link{select.informative.tags}} } \item{input.tags}{ optionall control (input) tags } \item{bp}{ output of find.binding.positions call } \item{window.size}{ window size to be used in calculating enrichment } \item{z.thr}{ Z-score corresponding to the Poisson ratio threshold used to flag significantly enriched windows} } \value{ A structure identical to bp (binding.postions) with two additional columns added (rs and re) corresponding to start and end of the associated significantly enriched region. If no region was associated with a particular peak, NAs values are reported. } spp/man/output.binding.results.Rd0000644000176200001440000000211513471426636016602 0ustar liggesusers\name{output.binding.results} \alias{output.binding.results} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Write out determined binding peaks into a text file table } \description{ Writes out determined binding positions into a text file. The file will contain a table with each row corresponding to a detected position, with the following columns \itemize{ \item{chr: }{ chromosome or target sequence} \item{pos: }{ position of detected binding site on the chromosome/sequence} \item{score: }{ a score reflecting magnitude of the binding} \item{Evalue: }{ E-value corresponding to the peak magnitude} \item{FDR: }{ FDR corresponding to the peak magnitude} \item{enrichment.lb: }{ lower bound of the fold-enrichment ratio} \item{enrichment.mle: }{ maximum likelihood estimate of the fold-enrichment ratio} } } \usage{ output.binding.results(results, filename) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{results}{ output of the \code{\link{find.binding.positions}} } \item{filename}{ file name } } spp/man/points_within.Rd0000644000176200001440000000354013472222350015016 0ustar liggesusers\name{points_within} \alias{points_within} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Find points within } \description{ points_within substitutes the deprecated function "points.within" } \usage{ points_within(x, fs, fe, return.list = F, return.unique = F, sorted = F, return.point.counts = F, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{x}{ Parameter } \item{fs}{ Parameter } \item{fe}{ Parameter } \item{return.list}{ Parameter } \item{return.unique}{ Parameter } \item{sorted}{ Parameter } \item{return.point.counts}{ Parameter } \item{\dots}{ Parameter } } \value{ Parameter } %% ~Make other sections like Warning with \section{Warning }{....} ~ \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (x, fs, fe, return.list = F, return.unique = F, sorted = F, return.point.counts = F, ...) { if (is.null(x) | length(x) < 1) { return(c()) } if (!sorted) { ox <- rank(x, ties.method = "first") x <- sort(x) } se <- c(fs, fe) fi <- seq(1:length(fs)) fi <- c(fi, -1 * fi) fi <- fi[order(se)] se <- sort(se) storage.mode(x) <- storage.mode(fi) <- storage.mode(se) <- "integer" if (return.unique) { iu <- 1 } else { iu <- 0 } if (return.list) { il <- 1 } else { il <- 0 } if (return.point.counts) { rpc <- 1 } else { rpc <- 0 } storage.mode(iu) <- storage.mode(il) <- storage.mode(rpc) <- "integer" result <- .Call("points_withinC", x, se, fi, il, iu, rpc) if (!sorted & !return.point.counts) { result <- result[ox] } return(result) } } } spp/man/get.broad.enrichment.clusters.Rd0000644000176200001440000000326513471426636020003 0ustar liggesusers\name{get.broad.enrichment.clusters} \alias{get.broad.enrichment.clusters} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Determine broad clusters of enrichment } \description{ Scan chromosomes with a pre-defined window size, comparing scaled ChIP and input tag coutns to see if their ratio exceeds that expected from a Poisson process (normalized for dataset size). } \usage{ get.broad.enrichment.clusters(signal.data, control.data, window.size=1e3, z.thr=3, tag.shift=146/2, background.density.scaling = F, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.data}{chip.tags, foreground tag vector list } \item{control.data}{input.tags, background tag vector list } \item{window.size}{ window size to be used for tag counting } \item{z.thr}{ Z-score to be used as a significance threshold } \item{tag.shift}{ number of base pairs by which positive and negative tag coordinates should be shifted towards eachother (half of binding peak separation distance)} \item{background.density.scaling}{ If TRUE, regions of significant tag enrichment will be masked out when calculating size ratio of the signal to control datasets (to estimate ratio of the background tag density). If FALSE, the dataset ratio will be equal to the ratio of the number of tags in each dataset.} \item{\dots}{ additional parameters should be the same as those passed to the \code{find.significantly.enriched.regions}} } \value{ A list of elements corresponding to chromosomes, with each element being an $s/$e/$rv data.frame giving the starting, ending positions and the log2 enrichment estimate for that region. } spp/man/read.short.arachne.tags.Rd0000644000176200001440000000246413472222413016532 0ustar liggesusers\name{read.short.arachne.tags} \alias{read.short.arachne.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read in ARACHNE short tags } \description{ Read in short arachne reads } \usage{ read.short.arachne.tags(filename, fix.chromosome.names = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ filename } \item{fix.chromosome.names}{ Fix chromosome names } } \details{ Not necessary } \value{ A list like structure } \references{ spp by Peter Kharchenko } \author{ Peter Kharchenko } \note{ No Notes } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ Nothing to see here } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (filename, fix.chromosome.names = F) { tl <- lapply(.Call("read_arachne", path.expand(filename)), function(d) { xo <- order(abs(d$t)) d$t <- d$t[xo] d$n <- d$n[xo] return(d) }) if (fix.chromosome.names) { names(tl) <- gsub("\\.fa", "", names(tl)) } return(list(tags = lapply(tl, function(d) d$t), quality = lapply(tl, function(d) d$n))) } } } spp/man/get.mser.Rd0000644000176200001440000000451613471426636013664 0ustar liggesusers\name{get.mser} \alias{get.mser} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate minimal saturated enrichment fold ratio } \description{ Determine if the dataset has reached absolute saturation, or otherwise find minimal fold enrichment ratio above which the detection of peaks has stabilized enough to meet the saturation criteria. } \usage{ get.mser(signal.data, control.data, n.chains = 5, step.size = 1e+05, chains = NULL, cluster = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), n.steps = 1, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.data}{ signal tag vector list } \item{control.data}{ control tag vector list } \item{n.chains}{ number of dataset subsamples to use } \item{step.size}{ subsampling step describing the saturation criteria. The criteria requires the set of detected binding sites to be stable (as described by the \code{test.agreement} param) when the number of tags in the dataset is reduced by \code{step.size}. The value can either be an integer above one, in which case it specifies a fixed number of tags, or a real value below one, in which case it specifies the fraction of tags that should be removed (e.g. 0.1 will remove 10% of tags). } \item{test.agreement}{ Fraction of the detected peaks that should agree between the full and subsampled datasets. } \item{chains}{ optional parameter, giving pre-calculated chains } \item{cluster}{ optional \code{snow} cluster to parallelize processing } \item{return.chains}{ whether subsampled dataset results should be returned as well } \item{enrichment.background.scales}{ one or multiple window scales at which the background tag density should be assessed. See \code{enrichment.background.scales} in \code{\link{find.binding.positions}}. If multiple scales are provided, multiple MSER estimates will be returned.} \item{\dots}{ additional parameters should be the same as those passed to the \code{\link{find.binding.positions}}} \item{n.steps}{n.steps} } \value{ A single, or multple (if multiple \code{enrichment.background.scales} were provided) MSER value. A value of 1 or very close to it implies that the dataset has reached absolute saturation based on the given criteria. } spp/man/get.smoothed.enrichment.mle.Rd0000644000176200001440000000440313471426636017442 0ustar liggesusers\name{get.smoothed.enrichment.mle} \alias{get.smoothed.enrichment.mle} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate chromosome-wide profiles of smoothed enrichment estimate } \description{ Given signal and control tag positions, the method calculates log2 signal to control enrichment esimates (maximum likelihood) for each chromosome, based on the smoothed tag density profile (see \link{get.smoothed.tag.density}). } \usage{ get.smoothed.enrichment.mle(signal.tags, control.tags, tag.shift = 146/2, background.density.scaling = F, pseudocount = 1, bg.weight = NULL, rngl = NULL, chrl = NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output of \code{\link{select.informative.tags}} } \item{control.tags}{ control (input) tags } \item{tag.shift}{ number of base pairs by which positive and negative tag coordinates should be shifted towards eachother (half of binding peak separation distance)} \item{background.density.scaling}{background.density.scaling} \item{pseudocount}{ pseudocount value to be added to tag density - defaults to 1 } \item{bg.weight}{ optional weight by which the background density should be multipled for scaling. If not supplied, the weight is calculated based on the ratio of the reduced ChIP to input dataset sizes. } \item{rngl}{rngl} \item{chrl}{chrl} \item{\dots}{ additional parameters should be the same as those passed to the \code{\link{get.smoothed.tag.density}}, such as for example bandwidth (default value 150) and step (default value 50). see appropriate reference for \code{\link{get.smoothed.tag.density}} for details.} } \value{ A list of elements corresponding to chromosomes, with each element being an $x/$y data.frame giving the position and associated log2 signal/control enrichment estimate. } \seealso{ \code{\link{writewig}} } \examples{ \dontrun{ # get smoothed enrichment estimate profile using 500bp bandwidth at # 50bp steps smoothed.M <- get.smoothed.enrichment.mle(chip.data,bandwidth=500,step=50); writewig(smoothed.M,"example.smoothedM.wig","Example smoothed log2 intensity ratio estimate"); } } spp/man/remove.local.tag.anomalies.Rd0000644000176200001440000000464013471426636017245 0ustar liggesusers\name{remove.local.tag.anomalies} \alias{remove.local.tag.anomalies} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Restrict or remove positions with too many tags relative to local background. } \description{ In Solexa ChIP-seq experiments some anomalous positions contain extremely high number of tags at the exact coordinates. The function scans the chromosomes, determining local tag density based on a provided \code{window.size}, doing two types of corrections: 1. removing all tags from positions that exceed local density by \code{eliminate.fold}; 2. reducing the tag count at positions exceeding \code{cap.fold} to the maximal allowed count. The statistical significance of counts exceeding either of these two threshold densities is calculated based on Poisson model, with confidence interval determined by the \code{z.threshold} Z-score parameter. } \usage{ remove.local.tag.anomalies(tags, window.size = 200, eliminate.fold = 10, cap.fold = 4, z.threshold = 3) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{tags}{ Chromosome-list of tag vectors } \item{window.size}{ Size of the window used to assess local density. Increasing the window size considerably beyond the size of the binding features will result in flattened profiles, with bound positions exhibiting a difference of just 1 tag beyond the background. } \item{eliminate.fold}{ Threshold definining fold-over background density above which the position is considered anomalous and removed completely.} \item{cap.fold}{ Threshold fold-over background density above which the position is capped to the maximum statistically likely given local tag density } \item{z.threshold}{ Z-score used to assess significance of a given position exceeding either of the two density thresholds. } %\item{var.base}{ Minimal level of tag count variance. This is used to % guard against cases where variance of provided tags is near 0, as in %the case of datasets filtered to unclude only unique reads. Defaults % at 0.1} } \value{ A modified chromosome-wise tag vector list. } \references{ ~put references to the literature/web site here ~ } \note{ ~~further notes~~ Increasing window.size to very large values will result in flat profiles similar to those described by Zhang et al. "Model-based Analysis of ChIP-Seq (MACS)." Genome Biol. 2008 Sep 17;9(9):R137. } spp/man/read.bam.tags.Rd0000644000176200001440000000153313471426636014542 0ustar liggesusers\name{read.bam.tags} \alias{read.bam.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read BAM alignment file } \description{ Reads in aligned reads from BAM file. Note: no split (non-unique) alignemnts should be reported in the BAM file. } \usage{ read.bam.tags(filename, read.tag.names = F, fix.chromosome.names = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ BAM file } \item{read.tag.names}{ Whether the tag names should be read in } \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of the sequence names } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Number of mismatches } \item{names }{ Tag names, if \code{read.tag.names} was set } }spp/man/read.helicos.tags.Rd0000644000176200001440000000353413472222400015414 0ustar liggesusers\name{read.helicos.tags} \alias{read.helicos.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read in helicos tags } \description{ Read in Helicos tags } \usage{ read.helicos.tags(filename, read.tag.names = F, fix.chromosome.names = F, include.length.info = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ filename } \item{read.tag.names}{ Read in tag names } \item{fix.chromosome.names}{ Do we fix chromosome names } \item{include.length.info}{ include length information } } \value{ A list like structure } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ Fill in later } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (filename, read.tag.names = F, fix.chromosome.names = F, include.length.info = T) { if (read.tag.names) { rtn <- as.integer(1) } else { rtn <- as.integer(0) } tl <- lapply(.Call("read_helicostabf", path.expand(filename), rtn), function(d) { xo <- order(abs(d$t)) d$t <- d$t[xo] d$n <- d$n[xo] d$l <- d$l[xo] if (read.tag.names) { d$s <- d$s[xo] } return(d) }) if (fix.chromosome.names) { names(tl) <- gsub("\\.fa", "", names(tl)) } if (read.tag.names) { return(list(tags = lapply(tl, function(d) d$t), quality = lapply(tl, function(d) d$n), length = lapply(tl, function(d) d$l), names = lapply(tl, function(d) d$s))) } else { return(list(tags = lapply(tl, function(d) d$t), quality = lapply(tl, function(d) d$n), length = lapply(tl, function(d) d$l))) } } } } spp/man/get.mser.interpolation.Rd0000644000176200001440000000604113471426636016545 0ustar liggesusers\name{get.mser.interpolation} \alias{get.mser.interpolation} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Interpolate MSER dependency on the tag count } \description{ MSER generally decreases with increasing sequencing depth. This function interpolates the dependency of MSER on tag counts as a log-log linear function. The log-log fit is used to estimate the depth of sequencing required to reach desired \code{target.fold.enrichment}. } \usage{ get.mser.interpolation(signal.data, control.data, target.fold.enrichment = 5, n.chains = 10, n.steps = 6, step.size = 1e+05, chains = NULL, test.agreement = 0.99, return.chains = F, enrichment.background.scales = c(1), excluded.steps = c(seq(2, n.steps - 2)), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.data}{ signal chromosome tag vector list } \item{control.data}{ control chromosome tag vector list } \item{target.fold.enrichment}{ target MSER for which the depth should be estimated} \item{n.steps}{ number of steps in each subset chain. } \item{step.size}{ Either number of tags or fraction of the dataset size, see \code{step.size} parameter for \code{\link{get.mser}}. } \item{test.agreement}{ Fraction of the detected peaks that should agree between the full and subsampled datasets. See \code{test.agreement} parameter for \code{\link{get.mser}}} \item{n.chains}{ number of random subset chains } \item{chains}{ optional structure of pre-calculated chains (e.g. generated by an earlier call with \code{return.chains=T}.} \item{return.chains}{ whether to return peak predictions calculated on random chains. These can be passed back using \code{chains} argument to skip subsampling/prediction steps, and just recalculate the depth estimate for a different MSER.} \item{enrichment.background.scales}{ see \code{enrichment.background.scales} parameter for \code{\link{get.mser}} } \item{excluded.steps}{ Intermediate subsampling steps that should be excluded from the chains to speed up the calculation. By default, all intermediate steps except for first two and last two are skipped. Adding intermediate steps improves interpolation at the expense of computational time.} \item{\dots}{ additional parameters are passed to \code{\link{get.mser}} } } \details{ To simulate sequencing growth, the method calculates peak predictions on random chains. Each chain is produced by sequential random subsampling of the original data. The number of steps in the chain indicates how many times the random subsampling will be performed. } \value{ Normally reurns a list, specifying for each backgroundscale: \item{prediction}{estimated sequencing depth required to reach specified target MSER} \item{log10.fit}{linear fit model, a result of \code{lm()} call} If \code{return.chains=T}, the above structure is returned under \code{interpolation} field, along with \code{chains} field containing results of \code{\link{find.binding.positions}} calls on subsampled chains. } spp/man/get.conservative.fold.enrichment.profile2.Rd0000644000176200001440000000736413472222336022221 0ustar liggesusers\name{get.conservative.fold.enrichment.profile2} \alias{get.conservative.fold.enrichment.profile2} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Return Conservative fold enrichment profile controlling for input and a single background scale } \description{ Returns a conservative upper/lower bound profile (log2) given signal tag list, background tag list and window scales controlling for input, and supporting only a single background scale. Novel version of get.conservative.fold.enrichment.profile() supporting a single background scale. } \usage{ get.conservative.fold.enrichment.profile2(ftl1, ftl2, btl1, btl2, fws, bws = 1 * fws, step = 50, tag.shift = 146/2, alpha = 0.05, background.density.scaling = T, bg.weight1 = NULL, bg.weight2 = NULL, posl = NULL, return.mle = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{ftl1}{ Parameter } \item{ftl2}{ Parameter } \item{btl1}{ Parameter } \item{btl2}{ Parameter } \item{fws}{ Parameter } \item{bws}{ Parameter } \item{step}{ Parameter } \item{tag.shift}{ Parameter } \item{alpha}{ Parameter } \item{background.density.scaling}{ Parameter } \item{bg.weight1}{ Parameter } \item{bg.weight2}{ Parameter } \item{posl}{ Parameter } \item{return.mle}{ Parameter } } \value{ A list of elements corresponding to chromosomes, with each element being an $x/$y data.frame giving the position and associated log2 signal/control enrichment estimate. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{get.smoothed.enrichment.mle}} } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (ftl1, ftl2, btl1, btl2, fws, bws = 1 * fws, step = 50, tag.shift = 146/2, alpha = 0.05, background.density.scaling = T, bg.weight1 = NULL, bg.weight2 = NULL, posl = NULL, return.mle = F) { ftl1 <- ftl1[unlist(lapply(ftl1, length)) > 2] chrl <- names(ftl1) names(chrl) <- chrl if (!is.null(posl)) { chrl <- chrl[chrl \%in\% names(posl)] } if (is.null(bg.weight1)) { bg.weight1 <- dataset.density.ratio(ftl1, btl1, background.density.scaling = background.density.scaling) } if (is.null(bg.weight2)) { bg.weight2 <- dataset.density.ratio(ftl2, btl2, background.density.scaling = background.density.scaling) } lapply(chrl, function(chr) { x <- binomial.proportion.ratio.bounds(abs(ftl1[[chr]] + tag.shift), abs(btl1[[chr]] + tag.shift), abs(ftl2[[chr]] + tag.shift), abs(btl2[[chr]] + tag.shift), fws = fws, bws = bws, step = step, bg.weight1 = bg.weight1, bg.weight2 = bg.weight2, alpha = alpha, pos = if (is.null(posl)) { NULL } else { posl[[chr]] }) ps <- rep(0, length(x$mle)) vi <- which(!is.na(x$lb) & x$lb > 0) ps[vi] <- x$lb[vi] vi <- which(!is.na(x$ub) & x$ub < 0) ps[vi] <- x$ub[vi] if (is.null(posl)) { if (return.mle) { return(data.frame(x = x$x, y = ps, mle = x$mle, lb = x$lb, ub = x$ub)) } else { return(data.frame(x = x$x, y = ps)) } } else { if (return.mle) { return(data.frame(x = posl[[chr]], y = ps, mle = x$mle, lb = x$lb, ub = x$ub)) } else { return(data.frame(x = posl[[chr]], y = ps)) } } }) } } } spp/man/read.maqmap.tags.Rd0000644000176200001440000000154213471426636015257 0ustar liggesusers\name{read.maqmap.tags} \alias{read.maqmap.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read MAQ text alignment output file } \description{ Reads in MAQ alignment results in text format (that results from "maq mapview" command.) } \usage{ read.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ MAQ text output file } \item{read.tag.names}{ Whether the tag names should be read in } \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of the sequence names } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Number of mismatches } \item{names }{ Tag names, if \code{read.tag.names} was set } }spp/man/read.arachne.tags.Rd0000644000176200001440000000235513472222362015376 0ustar liggesusers\name{read.arachne.tags} \alias{read.arachne.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read in Arachne tags } \description{ Read in ARACHNE Tag file } \usage{ read.arachne.tags(filename, fix.chromosome.names = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ filename } \item{fix.chromosome.names}{ do we fix chromosome names } } \value{ A list like element } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ Fill in later } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (filename, fix.chromosome.names = F) { tl <- lapply(.Call("read_arachne_long", path.expand(filename)), function(d) { xo <- order(abs(d$t)) d$t <- d$t[xo] d$n <- d$n[xo] d$l <- d$l[xo] return(d) }) if (fix.chromosome.names) { names(tl) <- gsub("\\.fa", "", names(tl)) } return(list(tags = lapply(tl, function(d) d$t), quality = lapply(tl, function(d) d$n), length = lapply(tl, function(d) d$l))) } } } spp/man/spp-package.Rd0000644000176200001440000001662013471426636014332 0ustar liggesusers\name{spp-package} \alias{spp-package} \alias{spp} \docType{package} \title{ ChIP-seq (Solexa) Processing Pipeline } \description{ A set of routines for reading short sequence alignments, calculating tag density, estimates of statistically significant enrichment/depletion along the chromosome, identifying point binding positions (peaks), and characterizing saturation properties related to sequencing depth. } \details{ \tabular{ll}{ Package: \tab spp\cr Type: \tab Package\cr Version: \tab 1.11\cr Date: \tab 2012-06-20\cr License: \tab What license is it under?\cr LazyLoad: \tab yes\cr } See example below for typical processing sequence.y } \author{Peter Kharchenko } \references{ Kharchenko P., Tolstorukov M., Park P. "Design and analysis of ChIP-seq experiments for DNA-binding proteins." Nature Biotech. doi:10.1038/nbt.1508 } \examples{ \dontrun{ # load the library library(spp); ## The following section shows how to initialize a cluster of 8 nodes for parallel processing ## To enable parallel processing, uncomment the next three lines, #and comment out "cluster<-NULL"; ## see "snow" package manual for details. #library(snow) #cluster <- makeCluster(2); #invisible(clusterCall(cluster,source,"routines.r")); cluster <- NULL; # read in tag alignments chip.data <- read.eland.tags("chip.eland.alignment"); input.data <- read.eland.tags("input.eland.alignment"); # get binding info from cross-correlation profile # srange gives the possible range for the size of the protected region; # srange should be higher than tag length; making the upper #boundary too high will increase calculation time # # bin - bin tags within the specified number of basepairs to speed up calculation; # increasing bin size decreases the accuracy of the determined parameters binding.characteristics <- get.binding.characteristics(chip.data, srange=c(50,500), bin=5, cluster=cluster); # plot cross-correlation profile pdf(file="example.crosscorrelation.pdf",width=5,height=5) par(mar = c(3.5,3.5,1.0,0.5), mgp = c(2,0.65,0), cex = 0.8); plot(binding.characteristics$cross.correlation, type='l', xlab="strand shift", ylab="cross-correlation"); abline(v=binding.characteristics$peak$x,lty=2,col=2) dev.off(); # select informative tags based on the binding characteristics chip.data <- select.informative.tags(chip.data,binding.characteristics); input.data <- select.informative.tags(input.data,binding.characteristics); # restrict or remove positions with anomalous number of tags relative # to the local density chip.data <- remove.local.tag.anomalies(chip.data); input.data <- remove.local.tag.anomalies(input.data); # output smoothed tag density (subtracting re-scaled input) into a WIG file # note that the tags are shifted by half of the peak separation distance smoothed.density <- get.smoothed.tag.density(chip.data, control.tags=input.data, bandwidth=200, step=100, tag.shift=round(binding.characteristics$peak$x/2)); writewig(smoothed.density, "example.density.wig","Example smoothed, background-subtracted tag density"); rm(smoothed.density); # output conservative enrichment estimates # alpha specifies significance level at which confidence intervals will be estimated enrichment.estimates <- get.conservative.fold.enrichment.profile(chip.data, input.data, fws=2*binding.characteristics$whs, step=100, alpha=0.01); writewig(enrichment.estimates, "example.enrichment.estimates.wig", "Example conservative fold-enrichment/depletion estimates shown on log2 scale"); rm(enrichment.estimates); # binding detection parameters # desired FDR. Alternatively, an E-value can be supplied to the #method calls below instead of the fdr parameter fdr <- 1e-2; # the binding.characteristics contains the optimized half-size for binding detection window detection.window.halfsize <- binding.characteristics$whs; # determine binding positions using wtd method bp <- find.binding.positions(signal.data=chip.data, control.data=input.data, fdr=fdr, method=tag.wtd, whs=detection.window.halfsize, cluster=cluster) # alternatively determined binding positions using lwcc # method (note: this takes longer than wtd) # bp <- find.binding.positions(signal.data=chip.data,control.data=input.data, #fdr=fdr,method=tag.lwcc,whs=detection.window.halfsize,cluster=cluster) print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks")); # output detected binding positions output.binding.results(bp,"example.binding.positions.txt"); # ------------------------------------------------------------------------------------------- # the set of commands in the following section illustrates methods for saturation analysis # these are separated from the previous section, since they are highly CPU intensive # ------------------------------------------------------------------------------------------- # determine MSER # note: this will take approximately 10-15x the amount of time the initial # binding detection did # The saturation criteria here is 0.99 consistency in the set of binding # positions when adding 1e5 tags. # To ensure convergence the number of subsampled chains (n.chains) should be higher (80) mser <- get.mser(chip.data, input.data, step.size=1e5, test.agreement=0.99, n.chains=8, cluster=cluster, fdr=fdr, method=tag.wtd, whs=detection.window.halfsize) print(paste("MSER at a current depth is",mser)); # note: an MSER value of 1 or very near one implies that the set of # detected binding positions satisfies saturation criteria without # additional selection by fold-enrichment ratios. In other words, # the dataset has reached saturation in a traditional sense (absolute saturation). # interpolate MSER dependency on tag count # note: this requires considerably more calculations than the previous # steps (~ 3x more than the first MSER calculation) # Here we interpolate MSER dependency to determine a point at which MSER of 2 is reached # The interpolation will be based on the difference in MSER at the # current depth, and a depth at 5e5 fewer tags (n.steps=6); # evaluation of the intermediate points is omitted here to # speed up the calculation (excluded.steps parameter) # A total of 7 chains is used here to speed up calculation, # whereas a higher number of chains (50) would give good convergence msers <- get.mser.interpolation(chip.data, input.data, step.size=1e5, test.agreement=0.99, target.fold.enrichment=2, n.chains=7, n.steps=6, excluded.steps=c(2:4), cluster=cluster, fdr=fdr, method=tag.wtd, whs=detection.window.halfsize) print(paste("predicted sequencing depth =", round(unlist(lapply(msers,function(x) x$prediction))/1e6,5)," million tags")) # note: the interpolation will return NA prediction if the dataset # has reached absolute saturation at the current depth. # note: use return.chains=T to also calculated random chains # returned under msers$chains field) - these can be passed back as # "get.mser.interpolation( ..., chains=msers$chains)" to calculate # predictions for another target.fold.enrichment value # without having to recalculate the random chain predictions. ## stop cluster if it was initialized #stopCluster(cluster); } } spp/man/writewig.Rd0000644000176200001440000000225713471426636014001 0ustar liggesusers\name{writewig} \alias{writewig} %- Also NEED an '\alias' for EACH other topic documented here. \title{ A function to save a list of chromosome-wise x/y data frames into a WIG file format. } \description{ Takes a list that contains an $x and $y data.frame for a number of chromosomes and writes it out to a WIG BED style format. } \usage{ writewig(dat, fname, feature, threshold = 5, zip = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{dat}{ Chromosome coordinate-value data. \code{dat} is a list, each member of a list is a data frame with $x and $y columns containing chromosome positions and associated values. The names of the list elements correspond to the chromosomes. } \item{fname}{ Filename to which the output should be written } \item{feature}{ Data description to be incorporated into the WIG header } \item{threshold}{ Optional threshold to be saved in the WIG file} \item{zip}{ Wheter to invoke a zip program to compress the file } } \seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ } \examples{ \dontrun{ data <- list("chr1"=data.frame(x=c(100,130,200),y=c(1.2,4.0,2.3))); writewig(data,"filename"); } } spp/man/get.smoothed.tag.density.Rd0000644000176200001440000000501713471426636016765 0ustar liggesusers\name{get.smoothed.tag.density} \alias{get.smoothed.tag.density} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate chromosome-wide profiles of smoothed tag density } \description{ Given tag positions, the method calculates for each chromosome a tag density profile, smoothed by the Gaussian kernel. If the optional control tags are provided, the difference between ChIP and control tag density is returned. } \usage{ get.smoothed.tag.density(signal.tags, control.tags = NULL, bandwidth = 150, bg.weight = NULL, tag.shift = 146/2, step = round(bandwidth/3), background.density.scaling = T, rngl = NULL, scale.by.dataset.size = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.tags}{ signal chromosome tag coordinate vectors (e.g. output of \code{\link{select.informative.tags}} } \item{control.tags}{ optional control (input) tags } \item{bandwidth}{ standard deviation of the Gaussian kernel } \item{bg.weight}{ optional weight by which the background density should be multipled for scaling. If not supplied, the weight is calculated based on the ratio of the reduced ChIP to input dataset sizes. } \item{tag.shift}{ Distance by which the positive and negative strand tags should be shifted towards eachother. This normally corresponds to the half of the cross-correlation peak position (e.g. \code{get.binding.characteristics()}$peak$x/2) } \item{step}{ The distance between the regularly spaced points for which the values should be calculated. } \item{background.density.scaling}{ If TRUE, regions of significant tag enrichment will be masked out when calculating size ratio of the signal to control datasets (to estimate ratio of the background tag density). If FALSE, the dataset ratio will be equal to the ratio of the number of tags in each dataset.} \item{rngl}{rngl} \item{scale.by.dataset.size}{scale.by.dataset.size} } \value{ A list of elements corresponding to chromosomes, with each element being an $x/$y data.frame giving the position and associated tag density. Use \code{\link{writewig}} to output the structure to a WIG file. } \seealso{ \code{\link{writewig}} } \examples{ \dontrun{ smoothed.density <- get.smoothed.tag.density(chip.data, control.tags=input.data, bandwidth=200, step=100, tag.shift=round(binding.characteristics$peak$x/2)); writewig(smoothed.density, "example.density.wig", "Example smoothed, background-subtracted tag density"); } }spp/man/write.narrowpeak.binding.Rd0000644000176200001440000000210013471426636017036 0ustar liggesusers\name{write.narrowpeak.binding} \alias{write.narrowpeak.binding} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Write out determined binding peaks using narrowPeak format } \description{ Writes out determined binding positions into a narrowPeak file. The region will correspond to associated broad enrichment region, if such were added using add.broad.peak.regions method. Otherwise the region size will be determined using margin (which defaults to the window half size that was used to determine binding positions) Note: since v1.13, FDR is written out in -log10() scale. } \usage{ write.narrowpeak.binding(bd, fname, margin=bd$whs, npeaks) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{bd}{ output of the \code{\link{find.binding.positions}} } \item{fname}{ file name } \item{margin}{ explicit value of the margin to be used if the broad region information is absent (defaults to peak detection window half-size} \item{npeaks}{ optionally, limit the output to the specified number of top peaks } } spp/man/read.tagalign.tags.Rd0000644000176200001440000000320313472222427015556 0ustar liggesusers\name{read.tagalign.tags} \alias{read.tagalign.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read in tagalign tags } \description{ Fill in later } \usage{ read.tagalign.tags(filename, fix.chromosome.names = T, fix.quality = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ Filename of tag file } \item{fix.chromosome.names}{ chromosome names } \item{fix.quality}{ fix quality } } \details{ ... } \value{ a list like structure } \references{ spp by kharchenko } \author{ Peter K. } \note{ Needs further editing } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ Buh! } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (filename, fix.chromosome.names = T, fix.quality = T) { tl <- lapply(.Call("read_tagalign", path.expand(filename)), function(d) { xo <- order(abs(d$t)) d$t <- d$t[xo] d$n <- d$n[xo] if (fix.quality) { if (min(d$n) < 0.5) { d$n = ceiling(1000/4^d$n) } break.vals <- unique(sort(c(0, unique(d$n)))) d$n <- length(break.vals) - 1 - cut(d$n, breaks = break.vals, labels = F) } return(d) }) if (fix.chromosome.names) { names(tl) <- gsub("\\.fa", "", names(tl)) } return(list(tags = lapply(tl, function(d) d$t), quality = lapply(tl, function(d) d$n))) } } } spp/man/select.informative.tags.Rd0000644000176200001440000000232413471426636016671 0ustar liggesusers\name{select.informative.tags} \alias{select.informative.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Choose informative tags } \description{ For datasets with tag alignment quality information (e.g. number of mismatches for Eland alignments), \code{\link{get.binding.characteristics}} determines whether inclusion of tags from each specific quality bin improves the cross-correlation profile. The present function is then used to actually select these informative tags, discarding all other information, including quality scores that are not used in further processing. } \usage{ select.informative.tags(data, binding.characteristics) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{ Full alignment data (a list with $tags and $quality elements) } \item{binding.characteristics}{ result of a \code{\link{get.binding.characteristics}} call. If NULL value is supplied,all tags will be accepted. } } \value{ A chromosome-wise tag list. Each element of the list corresponds to a chromosome and is a numeric vector of 5' tag coordinates, with sign designating DNA strand. This form of tag data is used for most of the other processing. } spp/man/read.eland.tags.Rd0000644000176200001440000000252613471426636015071 0ustar liggesusers\name{read.eland.tags} \alias{read.eland.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read eland output file } \description{ Reads in ELAND output file, returning 5'-end tag coordinates and number of mismatches associated with each mapped tag. } \usage{ read.eland.tags(filename, read.tag.names = F, fix.chromosome.names = T, max.eland.tag.length = -1, extended=F, multi = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ ELAND output file } \item{read.tag.names}{ Whether the tag names should be read in } \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of the sequence names } \item{max.eland.tag.length}{ Specifies max length of the tag sequence considered by ELAND. This needs to be specified if the tags are longer than the sequences considred by ELAND during alignment. } \item{extended}{ Whether the file is written out in "extended" format provided in GA pipeline 1.0. } \item{multi}{ Whether the file is written in "multi" format, showing multiple alignments of the reads } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Number of mismatches } \item{names }{ Tag names, if \code{read.tag.names} was set } } spp/man/read.meland.tags.Rd0000644000176200001440000000227713471426636015251 0ustar liggesusers\name{read.meland.tags} \alias{read.meland.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read modified BED tag alignment file that contains variable match length information } \description{ Reads in an extended BED tag alignment file. An example line given below: \code{49 . U1 . 1 . . 23 chr2 -234567} The line above specifies a 23-bp portion of the tag tag with id 49 was aligned with 1 mismatch to the negative strand of chr2 at position 234567. } \usage{ read.meland.tags(filename, read.tag.names = F, fix.chromosome.names = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ name of the extended BED file } \item{read.tag.names}{ whether to read in tag names } \item{fix.chromosome.names}{ whether to remove ".fa" from the sequence name ends. } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Quality expressed as a float x.y, where x is tag.length - aligned.tag.portion.length, and y is the number of mismatches (must be less than 10). } \item{names }{ Tag names, if \code{read.tag.names} was set } }spp/man/get.smoothed.enrichment.mle2.Rd0000644000176200001440000000704113472222525017515 0ustar liggesusers\name{get.smoothed.enrichment.mle2} \alias{get.smoothed.enrichment.mle2} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate background input controlled chromosome-wide profiles of smoothed enrichment estimate } \description{ Given signal and control tag positions, the method calculates log2 signal to control enrichment esimates (maximum likelihood) for each chromosome, based on the smoothed tag density profile (see \link{get.smoothed.tag.density}). } \usage{ get.smoothed.enrichment.mle2(signal.tags1, control.tags1, signal.tags2, control.tags2, tag.shift = 146/2, background.density.scaling = F, pseudocount = 1, bg.weight1 = NULL, bg.weight2 = NULL, rngl = NULL, chrl = NULL, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{signal.tags1}{ Parameter } \item{control.tags1}{ Parameter } \item{signal.tags2}{ Parameter } \item{control.tags2}{ Parameter } \item{tag.shift}{ Parameter } \item{background.density.scaling}{ Parameter } \item{pseudocount}{ Parameter } \item{bg.weight1}{ Parameter } \item{bg.weight2}{ Parameter } \item{rngl}{ Parameter } \item{chrl}{ Parameter } \item{\dots}{ Parameter } } \value{ A list of elements corresponding to chromosomes, with each element being an $x/$y data.frame giving the position and associated log2 signal/control enrichment estimate. } %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{get.smoothed.enrichment.mle}} } \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (signal.tags1, control.tags1, signal.tags2, control.tags2, tag.shift = 146/2, background.density.scaling = F, pseudocount = 1, bg.weight1 = NULL, bg.weight2 = NULL, rngl = NULL, chrl = NULL, ...) { if (is.null(chrl)) { chrl <- intersect(names(signal.tags1), names(signal.tags2)) names(chrl) <- chrl } if (is.null(rngl)) { rngl <- lapply(chrl, function(chr) range(c(range(abs(signal.tags1[[chr]] + tag.shift)), range(abs(signal.tags2[[chr]] + tag.shift))))) } else { chrl <- names(rngl) names(chrl) <- chrl } ssd1 <- get.smoothed.tag.density(signal.tags1, rngl = rngl, ..., scale.by.dataset.size = F) ssd2 <- get.smoothed.tag.density(signal.tags2, rngl = rngl, ..., scale.by.dataset.size = F) csd1 <- get.smoothed.tag.density(control.tags1, rngl = rngl, ..., scale.by.dataset.size = F) csd2 <- get.smoothed.tag.density(control.tags2, rngl = rngl, ..., scale.by.dataset.size = F) if (is.null(bg.weight1)) { bg.weight1 <- dataset.density.ratio(signal.tags1, control.tags1, background.density.scaling = background.density.scaling) } if (is.null(bg.weight2)) { bg.weight2 <- dataset.density.ratio(signal.tags2, control.tags2, background.density.scaling = background.density.scaling) } cmle <- lapply(chrl, function(chr) { d <- ssd1[[chr]] d$y <- log2(ssd1[[chr]]$y + pseudocount * bg.weight1) - log2(csd1[[chr]]$y + pseudocount) - log2(bg.weight1) - log2(ssd2[[chr]]$y + pseudocount * bg.weight2) + log2(csd2[[chr]]$y + pseudocount) + log2(bg.weight2) return(d) }) } } } spp/man/densum.Rd0000644000176200001440000000451313472222320013411 0ustar liggesusers\name{densum} \alias{densum} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Do Something } \description{ Densum } \usage{ densum(vin, bw = 5, dw = 3, match.wt.f = NULL, return.x = T, from = min(vin), to = max(vin), step = 1, new.code = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{vin}{ Parameter } \item{bw}{ Parameter } \item{dw}{ Parameter } \item{match.wt.f}{ Parameter } \item{return.x}{ Parameter } \item{from}{ Parameter } \item{to}{ Parameter } \item{step}{ Parameter } \item{new.code}{ Parameter } } \value{ Some sum } %% ~Make other sections like Warning with \section{Warning }{....} ~ \examples{ \dontrun{ ##---- Should be DIRECTLY executable !! ---- ##-- ==> Define data, use random, ##-- or do help(data=index) for the standard data sets. ## The function is currently defined as function (vin, bw = 5, dw = 3, match.wt.f = NULL, return.x = T, from = min(vin), to = max(vin), step = 1, new.code = T) { tc <- table(vin[vin >= from & vin <= to]) pos <- as.numeric(names(tc)) storage.mode(pos) <- "double" tc <- as.numeric(tc) storage.mode(tc) <- "double" n <- length(pos) if (!is.null(match.wt.f)) { tc <- tc * match.wt.f(pos) } rng <- c(from, to) if (rng[1] < 0) { stop("range extends into negative values") } if (range(pos)[1] < 0) { stop("position vector contains negative values") } storage.mode(n) <- storage.mode(rng) <- storage.mode(bw) <- storage.mode(dw) <- storage.mode(step) <- "integer" spos <- rng[1] storage.mode(spos) <- "double" dlength <- floor((rng[2] - rng[1])/step) + 1 if (dlength < 1) { stop("zero data range") } if (new.code) { storage.mode(step) <- storage.mode(dlength) <- storage.mode(bw) <- storage.mode(dw) <- "integer" dout <- .Call("ccdensum", pos, tc, spos, bw, dw, dlength, step) } else { dout <- numeric(dlength) storage.mode(dout) <- "double" storage.mode(dlength) <- "integer" .C("cdensum", n, pos, tc, spos, bw, dw, dlength, step, dout) } if (return.x) { return(list(x = c(rng[1], rng[1] + step * (dlength - 1)), y = dout, step = step)) } else { return(dout) } } } } spp/man/find.binding.positions.Rd0000644000176200001440000001631513471426636016517 0ustar liggesusers\name{find.binding.positions} \alias{find.binding.positions} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Determine significant point protein binding positions (peaks) } \description{ Given the signal and optional control (input) data, determine location of the statistically significant point binding positions. If the control data is not provided, the statistical significance can be assessed based on tag randomization. The method also provides options for masking regions exhibiting strong signals within the control data. } \usage{ find.binding.positions(signal.data, f=1,e.value = NULL, fdr = NULL, masked.data = NULL, control.data = NULL, whs = 200, min.dist = 200, window.size = 4e+07, cluster = NULL, debug = T, n.randomizations = 3, shuffle.window = 1, min.thr = 2, topN = NULL, tag.count.whs = 100, enrichment.z = 2, method = tag.wtd, tec.filter = T, tec.window.size = 10000, tec.z = 5, tec.masking.window.size=tec.window.size, tec.poisson.z=5,tec.poisson.ratio=5, tec = NULL, n.control.samples = 1, enrichment.scale.down.control =F, enrichment.background.scales = c(1, 5, 10), use.randomized.controls = F, background.density.scaling = T, mle.filter = F,min.mle.threshold = 1, ...) } %- maybe also 'usage' for other objects documented here. \arguments{ ~~ tag data ~~ \item{signal.data}{ signal tag vector list } \item{control.data}{ optional control (input) tag vector list } \item{f}{Fraction of signal read subsampled. Default=1, i.e. no subsampling} ~~ position stringency criteria ~~ \item{e.value}{ E-value defining the desired statistical significance of binding positions. } \item{fdr}{ FDR defining statistical significance of binding positions } \item{topN}{ instead of determining statistical significance thresholds, return the specified number of highest-scoring positions} ~~ other params ~~ \item{whs}{ window half-sized that should be used for binding detection (e.g. determined from cross-correlation profiles)} \item{masked.data}{ optional set of coordinates that should be masked (e.g. known non-unique regions) } \item{min.dist}{ minimal distance that must separate detected binding positions. In case multiple binding positions are detected within such distance, the position with the highest score is returned. } \item{window.size}{ size of the window used to segment the chromosome during calculations to reduce memory usage. } \item{cluster}{ optional \code{snow} cluster to parallelize the processing on } \item{debug}{ debug mode, whether to print debug messages } \item{min.thr}{ minimal score requirement for a peak } \item{background.density.scaling}{ If TRUE, regions of significant tag enrichment will be masked out when calculating size ratio of the signal to control datasets (to estimate ratio of the background tag density). If FALSE, the dataset ratio will be equal to the ratio of the number of tags in each dataset.} \item{tec}{ tec} \item{n.control.samples}{ n.control.samples} \item{enrichment.scale.down.control}{ enrichment.scale.down.control} \item{\dots}{ additional parameters should be the same as those passed to the \code{lwcc.prediction}} ~~ randomized controls ~~ \item{n.randomizations}{ number of tag randomziations that should be performed (when the control data is not provided) } \item{use.randomized.controls}{ Use randomized tag control, even if \code{control.data} is supplied. } \item{shuffle.window}{ during tag randomizations, tags will be split into groups of \code{shuffle.window} and will be maintained together throughout the randomization. } ~~ fold-enrichment confidence intervals ~~ \item{tag.count.whs}{ half-size of a window used to assess fold enrichment of a binding position} \item{enrichment.z}{ Z-score used to define the significance level of the fold-enrichment confidence intervals } \item{enrichment.background.scales}{ In estimating the peak fold-enrichment confidence intervals, the background tag density is estimated based on windows with half-sizes of \code{2*tag.count.whs*enrichment.background.scales}. } \item{method}{ either \code{tag.wtd} for WTD method, or \code{tag.lwcc} for MTC method} \item{mle.filter}{ If turned on, will exclude predicted positions whose MLE enrichment ratio (for any of the background scales) is below a specified min.mle.threshold } \item{min.mle.threshold}{ MLE enrichment ratio threshold that each predicted position must exceed if mle.filter is turned on. } ~~ masking regions of significant control enrichment ~~ \item{tec.filter}{ Whether to mask out the regions exhibiting significant enrichment in the control data in doing other calculations. The regions are identified using Poisson statistics within sliding windows, either relative to the scaled signal (tec.z), or relative to randomly-distributed expectation (tec.poisson.z).} \item{tec.window.size}{ size of the window used to determine significantly enrichent control regions } \item{tec.masking.window.size}{ size of the window used to mask the area around significantly enrichent control regions } \item{tec.z}{ Z-score defining statistical stringency by which a given window is determined to be significantly higher in the input than in the signal, and masked if that is the case.} \item{tec.poisson.z}{ Z-score defining statistical stringency by which a given window is determined to be significantly higher than the tec.poisson.ratio above the expected uniform input background. } \item{tec.poisson.ratio}{ Fold ratio by which input must exceed the level expected from the uniform distribution. } } \value{ \itemize{ \item{npl}{ A per-chromosome list containing data frames describing determined binding positions. Column description} \itemize{ \item{x:}{ position} \item{y:}{ score} \item{evalue:}{ E-value} \item{fdr:}{ FDR. For peaks higher than the maximum control peak,the highest dataset FDR is reported} \item{enr:}{ lower bound of the fold-enrichment ratio confidence interval. This is the estimate determined using scale of 1. Estimates corresponding to higher scales are returned in other enr columns with scale appearing in the name.} \item{enr.mle:}{ enrichment ratio maximum likely estimate} } \item{thr:}{ info on the chosen statistical threshold of the peak scores} } } \examples{ \dontrun{ # find binding positions using WTD method, 200bp half-window size, # control data, 1% FDR bp <-find.binding.positions(signal.data=chip.data, control.data=input.data, fdr=0.01, method=tag.wtd, whs=200); # find binding positions using MTC method, using 5 tag randomizations, # keeping pairs of tag positions together (shuffle.window=2) bp <- find.binding.positions(signal.data=chip.data, control.data=input.data, fdr=0.01,method=tag.lwcc, whs=200, use.randomized.controls=T, n.randomizations=5, shuffle.window=2) # print out the number of determined positions print(paste("detected",sum(unlist(lapply(bp$npl,function(d) length(d$x)))),"peaks")); } } spp/man/write.broadpeak.info.Rd0000644000176200001440000000107013471426636016143 0ustar liggesusers\name{write.broadpeak.info} \alias{write.broadpeak.info} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Write out determined broad enrichment regions using broadPeak format } \description{ Writes out broad regions of enrichment determined by the get.broad.enrichment.clusters method in a broadPeak format. } \usage{ write.broadpeak.info(bp, fname) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{bp}{broadpeak.results, output of the \code{\link{get.broad.enrichment.clusters}} } \item{fname}{ file name } } spp/man/read.bin.maqmap.tags.Rd0000644000176200001440000000151213471426636016023 0ustar liggesusers\name{read.bin.maqmap.tags} \alias{read.bin.maqmap.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read MAQ binary alignment map file } \description{ Reads in MAQ binary map alignment result file } \usage{ read.bin.maqmap.tags(filename, read.tag.names = F, fix.chromosome.names = T) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ MAQ map output file (binary) } \item{read.tag.names}{ Whether the tag names should be read in } \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of the sequence names } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Number of mismatches } \item{names }{ Tag names, if \code{read.tag.names} was set } }spp/man/get.binding.characteristics.Rd0000644000176200001440000000573713471426636017510 0ustar liggesusers\name{get.binding.characteristics} \alias{get.binding.characteristics} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Calculate characteristics of observed DNA-binding signal from cross-correlation profiles } \description{ The methods calculates strand cross-correlation profile to determine binding peak separation distance and approximate window size that should be used for binding detection. If quality scores were given for the tags, which quality bins improve the cross-correlation pattern. } \usage{ get.binding.characteristics(data, srange = c(50, 500), bin = 5, cluster = NULL, debug = F, min.tag.count = 1000, acceptance.z.score = 3, remove.tag.anomalies = T, anomalies.z = 5,accept.all.tags=F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{data}{ Tag/quality data: output of \code{read.eland.tags} or similar function } \item{srange}{ A range within which the binding peak separation is expected to fall. Should be larger than probe size to avoid artifacts. } \item{bin}{ Resolution (in basepairs) at which cross-corrrelation should be calculated. bin=1 is ideal, but takes longer to calculate. } \item{cluster}{ optional snow cluster for parallel processing } \item{debug}{ whether to print debug messages } \item{min.tag.count}{ minimal number of tags on the chromosome to be considered in the cross-correlation calculations } \item{acceptance.z.score}{ A Z-score used to determine if a given tag quality bin provides significant improvement to the strand cross-correlation } \item{remove.tag.anomalies}{ Whether to remove singular tag count peaks prior to calculation. This is recommended, since such positions may distort the cross-correlation profile and increase the necessary computational time. } \item{anomalies.z}{ Z-score for determining if the number of tags at a given position is significantly higher about background, and should be considered an anomaly.} \item{accept.all.tags}{ Whether tag alignment quality calculations should be skipped and all available tags should be accepted in the downstream analysis.} } \value{ \item{cross.correlation }{ Cross-correlation profile as an $x/$y data.frame} \item{peak}{ Position ($x) and height ($y) of automatically detected cross-correlation peak.} \item{whs}{ Optimized window half-size for binding detection (based on the width of the cross-correlation peak)} \item{quality.bin.acceptance}{ A list structure, describing the effect of inclusion of different tag quality bins on cross-correlation, and a resolution on which bins should be considered.} \itemize{ \item{informative.bins:}{ A boolean vector indicating whether the inclusion of tags from the tag quality bin specified in the name attribute signiificantly increases cross-correlation profile near the peak.} \item{quality.cc:}{ A list giving the cross-correlation profile after the inclusion of the tags from different quality bins} } } spp/man/read.bowtie.tags.Rd0000644000176200001440000000150013471426636015266 0ustar liggesusers\name{read.bowtie.tags} \alias{read.bowtie.tags} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Read bowtie text alignment output file } \description{ Reads in bowtie alignment results in text format } \usage{ read.bowtie.tags(filename, read.tag.names = F, fix.chromosome.names = F) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{filename}{ bowtie text output file } \item{read.tag.names}{ Whether the tag names should be read in } \item{fix.chromosome.names}{ Whether to remove ".fa" from the end of the sequence names } } \value{ \item{tags }{ A vector of 5' tag coordinates, with negative values corresponding to tags mapped to the negative strand. } \item{quality }{ Number of mismatches } \item{names }{ Tag names, if \code{read.tag.names} was set } }