TFMPvalue/0000755000176200001440000000000013276762732012074 5ustar liggesusersTFMPvalue/inst/0000755000176200001440000000000013276760366013052 5ustar liggesusersTFMPvalue/inst/TFMPvalueBuild/0000755000176200001440000000000013276760366015635 5ustar liggesusersTFMPvalue/inst/TFMPvalueBuild/build.R0000644000176200001440000000451513276760366017064 0ustar liggesusers##TFM-Pvalue g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c ArgumentException.cpp -o ArgumentException.o g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c FileException.cpp -o FileException.o g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c ParseException.cpp -o ParseException.o g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c Matrix.cpp -o Matrix.o g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c TFMpvalue.cpp -o TFMpvalue.o g++ -I/Users/gtan/src/R-devel/include -DNDEBUG -I/usr/local/include -I"/Users/gtan/src/R-devel/library/Rcpp/include" -fPIC -g -O2 -c TFMMain.cpp -o TFMMain.o g++ -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/usr/local/lib -o TFMPvalue.so TFMMain.o TFMpvalue.o ArgumentException.o FileException.o ParseException.o Matrix.o -L/Users/gtan/src/R-devel/lib -lR -dylib_file libRblas.dylib:/Users/gtan/src/R-devel/lib/libRblas.dylib -Wl,-framework -Wl,CoreFoundation library(Rcpp) mat <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9,0,2,3,9,5,0,0,11,0,3,0,2,7,0,5), nrow = 4, dimnames = list(c("A","C","G","T") )) bg=c(A=0.25, C=0.25, G=0.25, T=0.25) score=8.77 type="PFM" pvalue=1e-5 dyn.load("/Users/gtan/Repositories/Bitbucket/TFMPvalue/src/TFMPvalue.so") .Call("sc2pv", mat, score, bg, type) .Call("pv2sc", mat, pvalue, bg, type) .Call("lazyScore", mat, pvalue, bg, type, 1e-5) ./TFMpvalue-fastpvalue -a 0.25 -t 0.25 -c 0.25 -g 0.25 -m MA0045.pfm -s 8.77 -G 1e-5 ./TFMpvalue-lazydistrib -a 0.25 -t 0.25 -c 0.25 -g 0.25 -m MA0045.pfm -p 1e-5 -G 1e-5 R CMD build TFMPvalue R CMD check --as-cran TFMPvalue_0.0.5.tar.gz R CMD install TFMPvalue_0.0.5.tar.gz library(TFMPvalue) library(RUnit) TFMsc2pv(pwm, score, type="PWM") TFMPvalue/tests/0000755000176200001440000000000013276760366013237 5ustar liggesusersTFMPvalue/tests/testthat/0000755000176200001440000000000013276760366015077 5ustar liggesusersTFMPvalue/tests/testthat/test_pv2sc.R0000644000176200001440000000101013276760366017306 0ustar liggesuserstest_that("test_pv2sc", { pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), nrow = 4, dimnames = list(c("A", "C", "G", "T"))) bg <- c(A = 0.25, C = 0.25, G = 0.25, T = 0.25) pvalue <- 0.00001 type <- "PFM" score <- TFMpv2sc(pfm, pvalue, bg, type) expect_equal(score, 8.773708, tolerance=0.01) }) TFMPvalue/tests/testthat/test_sc2pv.R0000644000176200001440000000101213276760366017310 0ustar liggesuserstest_that("test_sc2pv", { pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), nrow = 4, dimnames = list(c("A", "C", "G", "T"))) bg <- c(A = 0.25, C = 0.25, G = 0.25, T = 0.25) score <- 8.77 type <- "PFM" pvalue <- TFMsc2pv(pfm, score, bg, type) expect_equal(pvalue, 0.00001007156, tolerance=1e-5) }) TFMPvalue/tests/testthat/test_lazyScore.R0000644000176200001440000000126213276760366020235 0ustar liggesusers #test_lazyScore <- function(){ # pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, # 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, # 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, # 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), # nrow = 4, dimnames = list(c("A","C","G","T")) # ) # bg <- c(A=0.25, C=0.25, G=0.25, T=0.25) # pvalue <- 1e-5 # type <- "PFM" # granularity <- 1e-5 # #dyn.load("/Users/gtan/Repositories/Bitbucket/TFMPvalue/src/TFMPvalue.so") # score <- TFMLazyScore(pfm, pvalue, bg, type, granularity) # checkEqualsNumeric(8.77361, score, tolerance=0.01) # #} TFMPvalue/src/0000755000176200001440000000000013276761357012665 5ustar liggesusersTFMPvalue/src/ArgumentException.h0000644000176200001440000000061313276761357016477 0ustar liggesusers/* * ArgumentException.h * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #ifndef __ARGUMENTEXCEPTION__ #define __ARGUMENTEXCEPTION__ #include using namespace std; class ArgumentException { public: ArgumentException () {} ArgumentException(const char *str) { cerr << str << endl;} }; #endif TFMPvalue/src/Makevars0000644000176200001440000000002213276761357014353 0ustar liggesusersPKG_CXX_STD=CXX11 TFMPvalue/src/FileException.cpp0000644000176200001440000000026413276761357016131 0ustar liggesusers/* * FileException.cpp * pvalue * * Created by Jean-StŽphane VarrŽ on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #include "FileException.h" TFMPvalue/src/Matrix.cpp0000644000176200001440000003021213276761357014633 0ustar liggesusers/* * Matrix.cpp * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #include "Matrix.h" //#define PRINTVERBOSE //#define SHOWCERR //#define VERBOSE void Matrix::computesIntegerMatrix (double granularity, bool sortColumns) { double minS = 0, maxS = 0; double scoreRange; // computes precision for (int i = 0; i < length; i++) { double min = mat[0][i]; double max = min; for (int k = 1; k < 4; k++ ) { min = ((min < mat[k][i])?min:(mat[k][i])); max = ((max > mat[k][i])?max:(mat[k][i])); } minS += min; maxS += max; } // score range scoreRange = maxS - minS + 1; if (granularity > 1.0) { this->granularity = granularity / scoreRange; } else if (granularity < 1.0) { this->granularity = 1.0 / granularity; } else { this->granularity = 1.0; } matInt = new qlonglong *[length]; for (int k = 0; k < 4; k++ ) { matInt[k] = new qlonglong[length]; for (int p = 0 ; p < length; p++) { matInt[k][p] = ROUND_TO_INT((double)(mat[k][p]*this->granularity)); } } #ifdef PRINTVERBOSE /*cout << "SCORE RANGE : " << minS << " -> " << maxS << endl; cout << "PRECISION " << this->granularity << endl; cout << "INTEGER MATRIX WITHOUT OFFSET" << endl; for (int k = 0; k < 4; k++ ) { for (int i = 0 ; i < length; i++) { cout << matInt[k][i] << "\t"; } cout << endl; }*/ #endif this->errorMax = 0.0; for (int i = 1; i < length; i++) { double maxE = mat[0][i] * this->granularity - (matInt[0][i]); for (int k = 1; k < 4; k++) { maxE = ((maxE < mat[k][i] * this->granularity - matInt[k][i])?(mat[k][i] * this->granularity - (matInt[k][i])):(maxE)); } this->errorMax += maxE; } #ifdef PRINTVERBOSE //cout << " ERROR MAX : " << this->errorMax << endl; #endif if (sortColumns) { // sort the columns : the first column is the one with the greatest value qlonglong min = 0; for (int i = 0; i < length; i++) { for (int k = 0; k < 4; k++) { min = MIN(min,matInt[k][i]); } } min --; qlonglong *maxs = new qlonglong [length]; for (int i = 0; i < length; i++) { maxs[i] = matInt[0][i]; for (int k = 1; k < 4; k++) { if (maxs[i] < matInt[k][i]) { maxs[i] = matInt[k][i]; } } } qlonglong **mattemp = new qlonglong *[4]; for (int k = 0; k < 4; k++) { mattemp[k] = new qlonglong [length]; } for (int i = 0; i < length; i++) { qlonglong max = maxs[0]; int p = 0; for (int j = 1; j < length; j++) { if (max < maxs[j]) { max = maxs[j]; p = j; } } maxs[p] = min; for (int k = 0; k < 4; k++) { mattemp[k][i] = matInt[k][p]; } } #ifdef PRINTVERBOSE /*cout << "INTEGER MATRIX WITHOUT OFFSET ORDERED" << endl; for (int k = 0; k < 4; k++) { for (int i = 0; i < length; i++) { cout << mattemp[k][i] << "\t"; } cout << endl; }*/ #endif for (int k = 0; k < 4; k++) { for (int i = 0; i < length; i++) { matInt[k][i] = mattemp[k][i]; } } for(int i=0; i<4; i++){ delete[] mattemp[i]; } delete[] mattemp; delete[] maxs; } // computes offsets this->offset = 0; offsets = new qlonglong [length]; for (int i = 0; i < length; i++) { qlonglong min = matInt[0][i]; for (int k = 1; k < 4; k++ ) { min = ((min < matInt[k][i])?min:(matInt[k][i])); } offsets[i] = -min; for (int k = 0; k < 4; k++ ) { matInt[k][i] += offsets[i]; } this->offset += offsets[i]; } #ifdef PRINTVERBOSE //cout << "OFFSET : " << this->offset << endl; #endif #ifdef PRINTVERBOSE /*cout << "INTEGER MATRIX WITH OFFSET" << endl; for (int k = 0; k < 4; k++ ) { for (int i = 0; i < length; i++) { cout << matInt[k][i] << "\t"; } cout << endl; }*/ #endif // look for the minimum score of the matrix for each column minScoreColumn = new qlonglong [length]; maxScoreColumn = new qlonglong [length]; sum = new qlonglong [length]; minScore = 0; maxScore = 0; for (int i = 0; i < length; i++) { minScoreColumn[i] = matInt[0][i]; maxScoreColumn[i] = matInt[0][i]; sum[i] = 0; for (int k = 1; k < 4; k++ ) { sum[i] = sum[i] + matInt[k][i]; if (minScoreColumn[i] > matInt[k][i]) { minScoreColumn[i] = matInt[k][i]; } if (maxScoreColumn[i] < matInt[k][i]) { maxScoreColumn[i] = matInt[k][i]; } } minScore = minScore + minScoreColumn[i]; maxScore = maxScore + maxScoreColumn[i]; //cout << "minScoreColumn[" << i << "] = " << minScoreColumn[i] << endl; //cout << "maxScoreColumn[" << i << "] = " << maxScoreColumn[i] << endl; } this->scoreRange = maxScore - minScore + 1; #ifdef PRINTVERBOSE //cout << "SCORE RANGE : " << minScore << " - " << maxScore << " : " << this->scoreRange << endl; #endif bestScore = new qlonglong[length]; worstScore = new qlonglong[length]; bestScore[length-1] = maxScore; worstScore[length-1] = minScore; for (int i = length - 2; i >= 0; i--) { bestScore[i] = bestScore[i+1] - maxScoreColumn[i+1]; worstScore[i] = worstScore[i+1] - minScoreColumn[i+1]; } } /** * Computes the pvalue associated with the threshold score requestedScore. */ void Matrix::lookForPvalue (qlonglong requestedScore, qlonglong min, qlonglong max, double *pmin, double *pmax) { map *nbocc = calcDistribWithMapMinMax(min,max); map::iterator iter; #ifdef SHOWCERR //cerr << " Looks for Pvalue between " << min << " and " << max << " for score " << requestedScore << endl; #endif // computes p values and stores them in nbocc[length] double sum = nbocc[length][max+1]; qlonglong s = max + 1; map::reverse_iterator riter = nbocc[length-1].rbegin(); while (riter != nbocc[length-1].rend()) { sum += riter->second; if (riter->first >= requestedScore) s = riter->first; nbocc[length][riter->first] = sum; riter++; } //cout << " s found : " << s << endl; iter = nbocc[length].find(s); while (iter != nbocc[length].begin() && iter->first >= s - errorMax) { iter--; } //cout << " s - E found : " << iter->first << endl; #ifdef MEMORYCOUNT // for tests, store the number of memory bloc necessary for (int pos = 0; pos <= length; pos++) { totalMapSize += nbocc[pos].size(); } #endif *pmax = nbocc[length][s]; *pmin = iter->second; delete[] nbocc; } /** * Computes the score associated with the pvalue requestedPvalue. */ qlonglong Matrix::lookForScore (qlonglong min, qlonglong max, double requestedPvalue, double *rpv, double *rppv) { map *nbocc = calcDistribWithMapMinMax(min,max); map::iterator iter; #ifdef SHOWCERR //cerr << " Looks for score between " << min << " and " << max << endl; #endif // computes p values and stores them in nbocc[length] double sum = 0.0; map::reverse_iterator riter = nbocc[length-1].rbegin(); qlonglong alpha = riter->first+1; qlonglong alpha_E = alpha; nbocc[length][alpha] = 0.0; while (riter != nbocc[length-1].rend()) { sum += riter->second; //cout << "Pv(S) " << riter->first << " " << sum << " " << requestedPvalue << endl; nbocc[length][riter->first] = sum; if (sum >= requestedPvalue) { break; } riter++; } //cout << "BREAK Pv(S) " << riter->first << " " << sum << " " << requestedPvalue << endl; if (sum > requestedPvalue) { alpha_E = riter->first; riter--; alpha = riter->first; } else { if (riter == nbocc[length-1].rend()) { // path following the remark of the mail riter--; alpha = alpha_E = riter->first; } else { alpha = riter->first; riter++; sum += riter->second; alpha_E = riter->first; } nbocc[length][alpha_E] = sum; //cout << "Pv(S) " << riter->first << " " << sum << endl; } #ifdef VERBOSE //cerr << riter->first << " ALPHA found at score " << alpha << " and P-value " << nbocc[length][alpha] << endl; //cerr << riter->first << " ALPHA-E found at score " << alpha_E << " and P-value " << nbocc[length][alpha_E] << endl; #endif // affichage des pvaleurs /*iter = nbocc[length].begin(); while (iter != nbocc[length].end()) { cerr << iter->first << "[" << iter->second << "]" << endl; iter++; }*/ #ifdef MEMORYCOUNT // for tests, store the number of memory bloc necessary for (int pos = 0; pos <= length; pos++) { totalMapSize += nbocc[pos].size(); } #endif if (alpha - alpha_E > errorMax) alpha_E = alpha; *rpv = nbocc[length][alpha]; *rppv = nbocc[length][alpha_E]; delete[] nbocc; return alpha; } // computes the distribution of scores between score min and max as the DP algrithm proceeds // but instead of using a table we use a map to avoid computations for scores that cannot be reached map *Matrix::calcDistribWithMapMinMax (qlonglong min, qlonglong max) { // maps for each step of the computation // nbocc[length] stores the pvalue // nbocc[pos] for pos < length stores the qvalue map *nbocc = new map [length+1]; map::iterator iter; qlonglong *maxs = new qlonglong[length+1]; // @ pos i maximum score reachable with the suffix matrix from i to length-1 #ifdef VERBOSE //cerr << " Calc distrib between " << min << " and " << max << endl; #endif maxs[length] = 0; for (int i = length-1; i >= 0; i--) { maxs[i] = maxs[i+1] + maxScoreColumn[i]; } // initializes the map at position 0 for (int k = 0; k < 4; k++) { if (matInt[k][0]+maxs[1] >= min) { nbocc[0][matInt[k][0]] += background[k]; } } // computes q values for scores greater or equal than min nbocc[length-1][max+1] = 0.0; for (int pos = 1; pos < length; pos++) { iter = nbocc[pos-1].begin(); while (iter != nbocc[pos-1].end()) { for (int k = 0; k < 4; k++) { qlonglong sc = iter->first + matInt[k][pos]; if (sc+maxs[pos+1] >= min) { // the score min can be reached if (sc > max) { // the score will be greater than max for all suffixes nbocc[length-1][max+1] += nbocc[pos-1][iter->first] * background[k]; //pow(4,length-pos-1) ; totalOp++; } else { nbocc[pos][sc] += nbocc[pos-1][iter->first] * background[k]; totalOp++; } } } iter++; } //cerr << " map size for " << pos << " " << nbocc[pos].size() << endl; } delete[] maxs; return nbocc; } qlonglong Matrix::fastPvalue (Matrix *m, qlonglong alpha) { map *q = new map [m->length+1]; map::iterator iter; qlonglong P = 0; qlonglong *maxm = new qlonglong[m->length+1]; // @ pos i maximum score reachable with the suffix matrix from i to length-1 maxm[m->length] = 0; for (int i = m->length-1; i >= 0; i--) { maxm[i] = maxm[i+1] + m->maxScoreColumn[i]; } // initializes the map at position 0 for (int k = 0; k < 4; k++) { if (m->matInt[k][0]+maxm[1] >= alpha) { //cout << "FP: Set " << m->matInt[k][0] << " to "; q[0][m->matInt[k][0]] += 1; //cout << q[0][m->matInt[k][0]] << endl; } } // computes q values for scores strictly greater than alpha for (int pos = 1; pos < m->length; pos++) { iter = q[pos-1].begin(); while (iter != q[pos-1].end()) { for (int k = 0; k < 4; k++) { qlonglong scm = iter->first + m->matInt[k][pos]; if (scm > alpha) { //cout << "Update P from " << P; P += iter->second * (qlonglong)pow(4.0,m->length-pos-1); //cout << " to P " << P << endl; } else if (scm + maxm[pos+1] > alpha) { q[pos][scm] += iter->second; } } iter++; } q[pos-1].erase(q[pos-1].begin(),q[pos-1].end()); } delete[] maxm; return P; } TFMPvalue/src/TFMPvalue_init.c0000644000176200001440000000122513276761357015657 0ustar liggesusers#include #include #include // for NULL #include /* FIXME: Check these declarations against the C/Fortran source code. */ /* .Call calls */ extern SEXP lazyScore(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP pv2sc(SEXP, SEXP, SEXP, SEXP); extern SEXP sc2pv(SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"lazyScore", (DL_FUNC) &lazyScore, 5}, {"pv2sc", (DL_FUNC) &pv2sc, 4}, {"sc2pv", (DL_FUNC) &sc2pv, 4}, {NULL, NULL, 0} }; void R_init_TFMPvalue(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } TFMPvalue/src/TFMMain.cpp0000644000176200001440000002010413276761357014621 0ustar liggesusers#include #include #include #include #include #include //#include #include #include #include "Matrix.h" #include "ArgumentException.h" #include "TFMpvalue.h" #include using namespace std; /******************************************************************** * Free the Matric class * *****************************************************************/ void freeMatrix(Matrix m, int nrow){ // free the memory allocated, not typical Rcpp way for(int i=0; i= maxGranularity; granularity /= 10){ //cout << "Computing rounded matrix with granularity " << granularity << endl; m.computesIntegerMatrix(granularity); max = requestedScore*m.granularity + m.offset + m.errorMax+1; min = requestedScore*m.granularity + m.offset - m.errorMax-1; score = requestedScore*m.granularity + m.offset; /*cout << "Score range : " << m.scoreRange << endl; cout << "Min : " << min << endl; cout << "Max : " << max << endl; cout << "Precision : " << m.granularity << endl; cout << "Error max : " << m.errorMax << endl; cout << "Computing pvalue for requested score " << requestedScore << " " << score << endl;*/ #ifdef MEMORYCOUNT m.totalMapSize = 0; m.totalOp = 0; #endif m.lookForPvalue(score,min,max,&ppv,&pv); /* cout << "Prev. Pvalue : " << ppv << endl; cout << "Pvaluex : " << pv << endl; cout << "Comp. score : " << score << endl;*/ #ifdef MEMORYCOUNT totalSize += m.totalMapSize; totalOp += m.totalOp; #endif //cout << "***********************************************" << endl; if (ppv == pv) { if (!forcedGranularity) { break; } } } Rcpp::NumericVector ans(1); ans[0] = pv; // free the memory allocated, not typical Rcpp way //for(i=0; i= maxGranularity; granularity /= decrgr) { m.computesIntegerMatrix(granularity); double ppv; score = m.lookForScore(min,max,requestedPvalue,&pv,&ppv); min = (score - ceil(m.errorMax+0.5)) * decrgr; max = (score + ceil(m.errorMax+0.5)) * decrgr; if (pv == ppv) { if (!forcedGranularity) { break; } } } Rcpp::NumericVector ans(1); ans[0] = ((score-m.offset)/m.granularity); freeMatrix(m, nrow); return Rcpp::wrap(ans); } /******************************************************************** * .Call() Entry point FastPvalue * *****************************************************************/ RcppExport SEXP FastPvalue(SEXP mat, SEXP Rscore, SEXP bg, SEXP type, SEXP Rgranularity){ Rcpp::NumericVector background(bg); Rcpp::NumericMatrix matrix(mat); Rcpp::NumericVector RscoreVec(Rscore); Rcpp::CharacterVector Type(type); Rcpp::NumericVector granularityVec(Rgranularity); // Fill with background Matrix m(background[0], background[1], background[2], background[3]); // Fill with matrix int i=0, j=0; m.mat = new double*[4]; int ncol = matrix.ncol(); int nrow = matrix.nrow(); m.length = ncol; for(i=0; i *nbOcc = new map [m.length+1]; map *pbuf = new map [m.length+1]; qlonglong score = m.maxScore+ceil(m.errorMax); qlonglong d = 0; double pv = 0; nbOcc[m.length][score] = pv; while (pv <= requestedPvalue) { score --; pv += _beckstette(m,&nbOcc,&pbuf,m.length-1,score,d); nbOcc[m.length][score] = pv; d++; } pv = nbOcc[m.length][score]; Rcpp::NumericVector ans(1); ans[0] = ((score-m.offset)/m.granularity); freeMatrix(m, nrow); delete[] nbOcc; delete[] pbuf; return Rcpp::wrap(ans); } TFMPvalue/src/Matrix.h0000644000176200001440000001456613276761357014316 0ustar liggesusers/* * Matrix.h * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #ifndef __MATRIX__ #define __MATRIX__ #include #include #include #include #include #include #include "FileException.h" #include "ParseException.h" using namespace std; #ifdef __GNUC__ #ifdef _WIN32 typedef long long qlonglong; #else #include typedef int64_t qlonglong; #endif #else typedef long long qlonglong; #endif #define ROUND_TO_INT(n) ((qlonglong)floor(n)) #define MIN(a,b) ((a)<(b)?(a):(b)) #define MAX(a,b) ((a)>(b)?(a):(b)) extern map OPTIONS; //#define PRINTLOGRATIO class Matrix { private: /** * Split a string following delimiters */ void tokenize(const string& str, vector& tokens, const string& delimiters) { // Skip delimiters at beginning. string::size_type lastPos = str.find_first_not_of(delimiters, 0); // Find first "non-delimiter". string::size_type pos = str.find_first_of(delimiters, lastPos); while (string::npos != pos || string::npos != lastPos) { // Found a token, add it to the vector. tokens.push_back(str.substr(lastPos, pos - lastPos)); // Skip delimiters. Note the "not_of" lastPos = str.find_first_not_of(delimiters, pos); // Find next "non-delimiter" pos = str.find_first_of(delimiters, lastPos); } } public: // used for efficiency tests qlonglong totalMapSize; qlonglong totalOp; double ** mat; // the matrix as it is stored in the matrix file int length; double granularity; // the real granularity used, greater than 1 qlonglong ** matInt; // the discrete matrix with offset double errorMax; qlonglong *offsets; // offset of each column qlonglong offset; // sum of offsets qlonglong *minScoreColumn; // min discrete score at each column qlonglong *maxScoreColumn; // max discrete score at each column qlonglong *sum; qlonglong minScore; // min total discrete score (normally 0) qlonglong maxScore; // max total discrete score qlonglong scoreRange; // score range = max - min + 1 qlonglong *bestScore; qlonglong *worstScore; double background[4]; Matrix() { granularity = 1.0; offset = 0; background[0] = background[1] = background[2] = background[3] = 0.25; } Matrix(double pA, double pC, double pG, double pT) { granularity = 1.0; offset = 0; background[0] = pA; background[1] = pC; background[2] = pG; background[3] = pT; } void toLogOddRatio () { for (int p = 0; p < length; p++) { double sum = mat[0][p] + mat[1][p] + mat[2][p] + mat[3][p]; for (int k = 0; k < 4; k++) { mat[k][p] = log2((mat[k][p] + 0.25) /(sum + 1)) - log2(background[k]); } } #ifdef PRINTLOGRATIO /* for (int k = 0; k < 4; k++ ) { for (int i = 0 ; i < length; i++) { cout << mat[k][i] << "\t"; } cout << endl; }*/ #endif } /** * Transforms the initial matrix into an integer and offseted matrix. */ void computesIntegerMatrix (double granularity, bool sortColumns = true); // computes the complete score distribution between score min and max void showDistrib (qlonglong min, qlonglong max) { map *nbocc = calcDistribWithMapMinMax(min,max); map::iterator iter; if (OPTIONS['h']) { //cout << "Scores and p-values between " << min << " and " << max << endl; } // computes p values and stores them in nbocc[length] double sum = 0; map::reverse_iterator riter = nbocc[length-1].rbegin(); while (riter != nbocc[length-1].rend()) { sum += riter->second; nbocc[length][riter->first] = sum; riter++; } iter = nbocc[length].begin(); while (iter != nbocc[length].end() && iter->first <= max) { //cout << (((iter->first)-offset)/granularity) << " " << (iter->second) << " " << nbocc[length-1][iter->first] << endl; iter ++; } } /** * Computes the pvalue associated with the threshold score requestedScore. */ void lookForPvalue (qlonglong requestedScore, qlonglong min, qlonglong max, double *pmin, double *pmax); /** * Computes the score associated with the pvalue requestedPvalue. */ qlonglong lookForScore (qlonglong min, qlonglong max, double requestedPvalue, double *rpv, double *rppv); /** * Computes the distribution of scores between score min and max as the DP algrithm proceeds * but instead of using a table we use a map to avoid computations for scores that cannot be reached */ map *calcDistribWithMapMinMax (qlonglong min, qlonglong max); /** * Computes the pvalue for a given score and at a fixed granularity */ qlonglong fastPvalue (Matrix *m, qlonglong alpha); void readJasparMatrix (string filename) { ifstream f(filename.data()); if (!f) { throw new FileException(); } string str; this->length = 0; vector v; mat = new double*[4]; for (int j = 0; j < 4; j++) { getline(f,str); tokenize(str,v," \t|"); this->length = v.size(); this->mat[j] = new double[this->length]; for (unsigned int i = 0; i < v.size(); i++) { mat[j][i] = atof(v.at(i).data()); } v.clear(); } f.close(); #ifdef PRINTVERBOSE /*cout << "INITIAL MATRIX" << endl; for (int k = 0; k < 4; k++ ) { for (int i = 0 ; i < length; i++) { cout << mat[k][i] << "\t"; } cout << endl; }*/ #endif } void readHorizontalMatrix (string filename) { ifstream f(filename.data()); if (!f) { throw new FileException(); } string str; this->length = 0; // comment out for JASPAR matrices getline(f,str); // line with matrix name and family vector v; mat = new double*[4]; for (int j = 0; j < 4; j++) { getline(f,str); tokenize(str,v," \t|"); this->length = v.size() -1; // not -1 for JASPAR this->mat[j] = new double[this->length]; for (unsigned int i = 1; i < v.size(); i++) { // 1 if not JASPAR mat[j][i-1] = atof(v.at(i).data()); } v.clear(); } f.close(); } }; /* Matrix */ #endif TFMPvalue/src/ArgumentException.cpp0000644000176200001440000000027413276761357017035 0ustar liggesusers/* * ArgumentException.cpp * pvalue * * Created by Jean-StŽphane VarrŽ on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #include "ArgumentException.h" TFMPvalue/src/ParseException.cpp0000644000176200001440000000026613276761357016326 0ustar liggesusers/* * ParseException.cpp * pvalue * * Created by Jean-StŽphane VarrŽ on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #include "ParseException.h" TFMPvalue/src/FileException.h0000644000176200001440000000043113276761357015572 0ustar liggesusers/* * FileException.h * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #ifndef __FILEEXCEPTION__ #define __FILEEXCEPTION__ #include using namespace std; class FileException { }; #endif TFMPvalue/src/TFMpvalue.cpp0000644000176200001440000004230413276761357015237 0ustar liggesusers/* * TFMpvalue.cpp * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #include "TFMpvalue.h" map OPTIONS; static string REQUIRED[6] = { "a:t:c:g:m:p:", "a:t:c:g:m:s:" , "a:c:t:g:m:", "a:c:t:g:m:s:S:G:", "a:c:t:g:m:s:G:", "a:c:t:g:m:p:G:" }; static string OPTIONAL[6] = { "whi", "wh", "whG:", "wh", "wh", "wh" }; void stop () { string str; getline(cin,str); } void enumScoreFloatPvalue (Matrix *m, int pos, double score, map *t, qlonglong *nbocc, qlonglong pval) { if (*nbocc < pval) { if (pos == m->length) { (*t)[score] = 1; (*nbocc)++; } else { for (int k = 0; k < 4; k++) { enumScoreFloatPvalue(m,pos+1,score+m->mat[k][pos],t,nbocc,pval); } } } } void enumScoreFloat (Matrix *m, int pos, double score, map *t) { if (pos == m->length) { (*t)[score] += 1; } else { for (int k = 0; k < 4; k++) { enumScoreFloat(m,pos+1,score+m->mat[k][pos],t); } } } void enumScore (Matrix *m, int pos, qlonglong score, map*t) { if (pos == m->length) { (*t)[score] += 1; } else { for (int k = 0; k < 4; k++) { enumScore(m,pos+1,score+m->matInt[k][pos],t); } } } /** * LAZY DISTRIBUTION */ double _beckstette (Matrix m, map **nbOcc, map **pbuf, int pos, qlonglong score, qlonglong d); double _beckstettePbuf (Matrix m, map **nbOcc, map **pbuf, int pos, qlonglong score, qlonglong d) { //cout << "d=" << d << " Pbuf_" << pos << " (" << score << ") = "; if (pos == -1) { return 0; } map::iterator iterPbuf; iterPbuf = (*pbuf)[pos].find(score); double nb; if (iterPbuf == ((*pbuf)[pos]).end()) { nb = 0; } else { nb = iterPbuf->second; // set at the old value } // compute Pbuf[pos][score] for (int k = 0; k < 4; k++) { if (m.matInt[k][pos] < m.maxScoreColumn[pos] - d) { qlonglong s = score - m.matInt[k][pos]; //cout << "(" << k << "," << pos << ")" << "->" << matInt[k][pos] << " "; if (s <= m.bestScore[pos-1] && s >= 0) { nb += _beckstette(m,nbOcc,pbuf,pos-1,s,d) * m.background[k]; } } } (*pbuf)[pos][score] = nb; return nb; } double _beckstette (Matrix m, map **nbOcc, map **pbuf, int pos, qlonglong score, qlonglong d) { //cout << "Q_" << pos << " (" << score << ")" << endl; if (score < 0 || pos == -1) { if (score == 0) return 1; else return 0; } map::iterator iterNbOcc; iterNbOcc = (*nbOcc)[pos].find(score); if (iterNbOcc == ((*nbOcc)[pos]).end()) { // first compute pbuf double nb = _beckstettePbuf(m,nbOcc,pbuf,pos,score,d); // qlonglong nb = (*pbuf)[pos][score]; //cout << nb << endl; // then compute NbOcc for (int k = 0; k < 4; k++) { if (m.matInt[k][pos] >= m.maxScoreColumn[pos] - d) { qlonglong s = score - m.matInt[k][pos]; if (s <= m.bestScore[pos-1] && s >= 0) { nb += _beckstette(m,nbOcc,pbuf,pos-1,s,d) * m.background[k]; } } } (*nbOcc)[pos][score] = nb; } return (*nbOcc)[pos][score]; } void testLazyDistrib (Matrix m, double granularity, double requestedPvalue) { #ifdef MEMORYCOUNT qlonglong totalSize = 0; qlonglong totalOp = 0; #endif #ifdef VERBOSE //cerr << "### LAZY DISTRIB (pvalue=" << requestedPvalue << ", with granularity=" << granularity << ") ############################################" << endl; #endif m.computesIntegerMatrix(granularity,true); map *nbOcc = new map [m.length+1]; map *pbuf = new map [m.length+1]; qlonglong score = m.maxScore+ceil(m.errorMax); qlonglong d = 0; double pv = 0; nbOcc[m.length][score] = pv; while (pv <= requestedPvalue) { score --; //cout << requestedPvalue << "***** BECK for pv " << pv << " and score " << score << " " << (score - m.offset) / m.granularity << endl; pv += _beckstette(m,&nbOcc,&pbuf,m.length-1,score,d); // pv += nbOcc[length-1][score]; nbOcc[m.length][score] = pv; d++; } //cout << requested_pvalue << "***** BECK for pv " << pv << " and score " << score << " " << (score - offset) / granularity << endl; // score++; pv = nbOcc[m.length][score]; /* totalMapSize = 0; for (int pos = 0; pos <= length; pos++) { totalMapSize += nbOcc[pos].size() + pbuf[pos].size(); } */ if (OPTIONS['h']) { /*cout << "Score : " << ((score-m.offset)/m.granularity) << endl; cout << "Pvalue : " << pv << endl; cout << "Granularity : " << m.granularity << endl; #ifdef MEMORYCOUNT cout << "Total map size : " << totalSize << endl; cout << "Total op : " << totalOp << endl; #endif*/ } else { /*cout << ((score-m.offset)/m.granularity) << " "; cout << pv << " "; cout << m.granularity << " "; #ifdef MEMORYCOUNT cout << totalSize << " " << totalOp << " "; cout << endl; #endif*/ } } /** * FAST PVALUE */ void testFastPvalue (Matrix m, double granularity, double score) { #ifdef VERBOSE //cerr << "### FastPvalue (score " << score << ") #########################################" << endl; #endif #ifdef MEMORYCOUNT qlonglong totalSize = 0; qlonglong totalOp = 0; #endif m.computesIntegerMatrix(granularity,true); double pvalue = m.fastPvalue(&m,(qlonglong)(score * m.granularity + m.offset)); if (OPTIONS['h']) { /*cout << "Score : " << score << endl; cout << "Pvalue : " << pvalue << endl; cout << "Granularity : " << m.granularity << endl; #ifdef MEMORYCOUNT cout << "Total map size : " << totalSize << endl; cout << "Total op : " << totalOp << endl; #endif*/ } else { /*cout << score << " "; cout << pvalue << " "; cout << m.granularity << " "; #ifdef MEMORYCOUNT cout << totalSize << " " << totalOp << " "; cout << endl; #endif*/ } } void testScoreToPvalue (Matrix m, double initialGranularity, double requestedScore, bool forcedGranularity = false, double maxGranularity = 1e-9) { #ifdef VERBOSE //cerr << "### ScoreToPvalue (score " << requestedScore << ") #########################################" << endl; #endif #ifdef MEMORYCOUNT qlonglong totalSize = 0; qlonglong totalOp = 0; #endif qlonglong max; qlonglong min; double ppv; double pv; qlonglong score; for (double granularity = initialGranularity; granularity >= maxGranularity; granularity /= 10) { #ifdef VERBOSE //cerr << "Computing rounded matrix with granularity " << granularity << endl; #endif m.computesIntegerMatrix(granularity); max = requestedScore*m.granularity + m.offset + m.errorMax+1; min = requestedScore*m.granularity + m.offset - m.errorMax-1; score = requestedScore*m.granularity + m.offset; #ifdef VERBOSE /*cerr << "Score range : " << m.scoreRange << endl; cerr << "Min : " << min << endl; cerr << "Max : " << max << endl; cerr << "Precision : " << m.granularity << endl; cerr << "Error max : " << m.errorMax << endl; cerr << "Computing pvalue for requested score " << requestedScore << " " << score << endl;*/ #endif // computes pvalues for reachable score in range min - max #ifdef MEMORYCOUNT m.totalMapSize = 0; m.totalOp = 0; #endif m.lookForPvalue(score,min,max,&ppv,&pv); #ifdef VERBOSE /*cerr << "Prev. Pvalue : " << ppv << endl; cerr << "Pvaluex : " << pv << endl; cerr << "Comp. score : " << score << endl;*/ #endif #ifdef MEMORYCOUNT totalSize += m.totalMapSize; totalOp += m.totalOp; #endif #ifdef VERBOSE //cerr << "***********************************************" << endl; #endif if (ppv == pv) { #ifdef VERBOSE //cerr << "##### STOP score computed #####" << endl; #endif if (!forcedGranularity) { break; } } } if (OPTIONS['h']) { /*cout << "Score : " << ((score-m.offset)/m.granularity) << endl; cout << "Pvalue : " << pv << endl; cout << "Granularity : " << m.granularity << endl; #ifdef MEMORYCOUNT cout << "Total map size : " << totalSize << endl; cout << "Total op : " << totalOp << endl; #endif*/ } else { /*cout << ((score-m.offset)/m.granularity) << " "; cout << pv << " "; cout << m.granularity << " "; #ifdef MEMORYCOUNT cout << totalSize << " " << totalOp << " "; cout << endl; #endif*/ } } void testPvalueToScore (Matrix m, double initialGranularity, double requestedPvalue, bool forcedGranularity = false, double maxGranularity = 1e-10, bool sortColumns = false, qlonglong decrgr = 10) { #ifdef VERBOSE //cerr << "### PvalueToScore (pv " << requestedPvalue << ") #########################################" << endl; #endif #ifdef MEMORYCOUNT qlonglong totalSize; qlonglong totalOp; totalSize = 0; totalOp = 0; #endif m.computesIntegerMatrix(initialGranularity); qlonglong max = m.maxScore+ceil(m.errorMax+0.5); qlonglong min = m.minScore; double pv; qlonglong score; for (double granularity = initialGranularity; granularity >= maxGranularity; granularity /= decrgr) { #ifdef VERBOSE //cerr << "Computing rounded matrix with granularity " << granularity << endl; #endif m.computesIntegerMatrix(granularity); #ifdef VERBOSE /*cerr << "Score range : " << m.scoreRange << endl; cerr << "Min : " << min << " " << m.minScore << endl; cerr << "Max : " << max << endl; cerr << "Precision : " << m.granularity << endl; cerr << "Error max : " << m.errorMax << endl; cerr << "Computing score for requested pvalue " << requestedPvalue << endl;*/ #endif double ppv; #ifdef MEMORYCOUNT m.totalMapSize = 0; m.totalOp = 0; #endif score = m.lookForScore(min,max,requestedPvalue,&pv,&ppv); #ifdef VERBOSE /*cerr << "P-Pvalue : " << ppv << endl; cerr << "Pvalue : " << pv << endl; cerr << "Rounded score : " << score << endl; cerr << "Real score : " << ((score-m.offset)/m.granularity) << endl;*/ #ifdef MEMORYCOUNT //cerr << "Memory : " << m.totalMapSize << " " << totalSize << endl; #endif #endif #ifdef MEMORYCOUNT totalSize += m.totalMapSize; totalOp += m.totalOp; #endif min = (score - ceil(m.errorMax+0.5)) * decrgr; max = (score + ceil(m.errorMax+0.5)) * decrgr; #ifdef VERBOSE //cerr << "***********************************************" << endl; #endif if (pv == ppv) { #ifdef VERBOSE //cerr << "##### STOP Pvalue computed #####" << endl; #endif if (!forcedGranularity) { break; } } } if (OPTIONS['h']) { /*cout << "Score : " << ((score-m.offset)/m.granularity) << endl; cout << "Pvalue : " << pv << endl; cout << "Granularity : " << m.granularity << endl; #ifdef MEMORYCOUNT cout << "Total map size : " << totalSize << endl; cout << "Total op : " << totalOp << endl; #endif*/ } else { /*if (OPTIONS['i']) { cout << score << " "; } cout << ((score-m.offset)/m.granularity) << " "; cout << pv << " "; cout << m.granularity << " "; #ifdef MEMORYCOUNT cout << totalSize << " " << totalOp << " "; #endif cout << endl;*/ } } //testPvalueToScore void testDistrib(Matrix m, double granularity, double min, double max) { m.computesIntegerMatrix(granularity); m.showDistrib(min*m.granularity+m.offset,max*m.granularity+m.offset); } /*void usage (char * const argv[]) { cout << "Usage : " << argv[0] << " -a X -t X -g X -c X -m matrix_filename "; switch(PROGRAM) { case PV2SC: cout << " -p pvalue [-w]" << endl; cout << "Computes the score threshold associated with a P-value" << endl; break; case SC2PV: cout << "-s threshold [-w]" << endl; cout << "Computes the P-value associated with a score threshold" << endl; break; case FASTPVALUE: cout << "-s threshold -G [-w]" << endl; cout << "Computes the P-value associated with a score threshold for a given granularity" << endl; break; case ENUMSC: cout << "[-w -G]" << endl; cout << "Computes the number of scores of the matrix" << endl; break; case DISTRIB: cout << "-s min_score -S max_score -G granularity" << endl; cout << "Computes the distibution of scores between min_score and max_score" << endl; break; case LAZY: cout << "-p requested pvalue -G granularity" << endl; cout << "Computes the score threshold associated with P-value p using the algorithm of Beckstette 2006" << endl; break; } cout << " -a -t -c -g : background probabilies" << endl; cout << endl; cout << " -m : matrix file" << endl; #ifndef JASPAR cout << "comment on the first line" << endl; cout << "A| sc1A sc2A sc3A ..." << endl; cout << "C| sc1C sc2C sc3C ..." << endl; cout << "G| sc1G sc2G sc3G ..." << endl; cout << "T| sc1T sc2T sc3T ..." << endl; #else cout << "sc1A sc2A sc3A ..." << endl; cout << "sc1C sc2C sc3C ..." << endl; cout << "sc1G sc2G sc3G ..." << endl; cout << "sc1T sc2T sc3T ..." << endl; #endif cout << endl; cout << " -w : the matrix is already a weight matrix, otherwise it is assumed to be a count matrix"<< endl; cout << endl; cout << " -p : requested pvalue" << endl; cout << endl; cout << " -s : score threshold" << endl; cout << endl; // cout << " -G : granularity for integer matrix (a floating number)" << endl; // cout << endl; } void arguments (int argc, char * const argv[]) { // parse options char option; map opt; char options[REQUIRED[PROGRAM].length()+OPTIONAL[PROGRAM].length()+1]; for (int i = 0; i < REQUIRED[PROGRAM].length(); i++) { options[i] = REQUIRED[PROGRAM][i]; } for (int i = 0; i < OPTIONAL[PROGRAM].length(); i++) { options[i+REQUIRED[PROGRAM].length()] = OPTIONAL[PROGRAM][i]; } options[REQUIRED[PROGRAM].length()+OPTIONAL[PROGRAM].length()+1] = '\0'; while (((option = getopt(argc,argv,options)) != EOF)) { if (option == '?') { throw new ArgumentException("Bad argument"); } OPTIONS[option] = optind-1; opt[option] = true; } for (int i = 0; i < REQUIRED[PROGRAM].length(); i++) { if (REQUIRED[PROGRAM][i] != ':' && !opt[REQUIRED[PROGRAM][i]]) throw new ArgumentException("Bad number of args"); } }*/ /*int main (int argc, char * const argv[]) { try { arguments(argc,argv); } catch (ArgumentException *e) { usage(argv); exit(1); } Matrix m(atof(argv[OPTIONS['a']]),atof(argv[OPTIONS['c']]),atof(argv[OPTIONS['g']]),atof(argv[OPTIONS['t']])); try { #ifndef JASPAR m.readHorizontalMatrix(argv[OPTIONS['m']]); #else m.readJasparMatrix(argv[OPTIONS['m']]); #endif } catch (FileException *e) { cerr << "Unable to open/read " << argv[OPTIONS['m']] << endl; exit(2); } catch (ParseException *e) { cerr << "The matrix " << argv[1] << " seems to be in a wrong format. The format of the matrix file is : " << endl; #ifndef JASPAR cout << "comment on the first line" << endl; cout << "A| sc1A sc2A sc3A ..." << endl; cout << "C| sc1C sc2C sc3C ..." << endl; cout << "G| sc1G sc2G sc3G ..." << endl; cout << "T| sc1T sc2T sc3T ..." << endl; #else cout << "sc1A sc2A sc3A ..." << endl; cout << "sc1C sc2C sc3C ..." << endl; cout << "sc1G sc2G sc3G ..." << endl; cout << "sc1T sc2T sc3T ..." << endl; #endif cout << endl; exit(2); } if (OPTIONS['h']) { cout << "Matrix length : " << m.length << endl; } else { if (PROGRAM != DISTRIB) { cout << m.length << " "; } } if (!OPTIONS['w']) { m.toLogOddRatio(); } float start = clock(); switch (PROGRAM) { case PV2SC : testPvalueToScore(m,0.1,(atof(argv[OPTIONS['p']]))); break; case SC2PV : testScoreToPvalue(m,0.1,atof(argv[OPTIONS['s']])); break; case ENUMSC : { qlonglong nbsc = 0; if (OPTIONS['G']) { m.computesIntegerMatrix(atof(argv[OPTIONS['G']])); map t; enumScore(&m,0,0,&t); nbsc = t.size(); } else { map t; // long int sum = 0; enumScoreFloat(&m,0,0,&t);*/ /* map::reverse_iterator riter = t.rbegin(); while (riter->first >= atof(argv[3]) && riter != t.rend()) { sum += riter->second; riter++; } */ /* nbsc = t.size(); } if (OPTIONS['h']) { cout << "Number of different scores = " << nbsc << endl; cout << "Number of different words = " << (qlonglong)(pow(4.0,m.length)) << endl; } else { cout << nbsc << " " << (qlonglong)(pow(4.0,m.length)) << endl; } } break; case DISTRIB : testDistrib(m,atof(argv[OPTIONS['G']]),atof(argv[OPTIONS['s']]),atof(argv[OPTIONS['S']])); break; case FASTPVALUE: testFastPvalue(m,atof(argv[OPTIONS['G']]),atof(argv[OPTIONS['s']])); break; case LAZY: testLazyDistrib(m,atof(argv[OPTIONS['G']]),atof(argv[OPTIONS['p']])); break; } cout << "TIME:" << (clock()-start)/CLOCKS_PER_SEC; return 0; } */ TFMPvalue/src/TFMpvalue.h0000644000176200001440000000134713276761357014706 0ustar liggesusers#include #include #include #include #include #include //#include #include #include #include "Matrix.h" #include "ArgumentException.h" using namespace std; #define VERBOSE #define PRINTVERBOSE #define PV2SC 0 #define SC2PV 1 #define ENUMSC 2 #define DISTRIB 3 #define FASTPVALUE 4 #define LAZY 5 #define TOLOGRATIO 1 #define TOINTEGER 1 #ifdef VERBOSE #define MEMORYCOUNT #endif double _beckstette (Matrix m, map **nbOcc, map **pbuf, int pos, qlonglong score, qlonglong d); double _beckstettePbuf (Matrix m, map **nbOcc, map **pbuf, int pos, qlonglong score, qlonglong d); TFMPvalue/src/ParseException.h0000644000176200001440000000043513276761357015771 0ustar liggesusers/* * ParseException.h * pvalue * * Created by Jean-Stéphane Varré on 02/07/07. * Copyright 2007 LIFL-USTL-INRIA. All rights reserved. * */ #ifndef __PARSEEXCEPTION__ #define __PARSEEXCEPTION__ #include using namespace std; class ParseException { }; #endif TFMPvalue/src/Makevars.win0000644000176200001440000000002213276761357015147 0ustar liggesusersPKG_CXX_STD=CXX11 TFMPvalue/NAMESPACE0000644000176200001440000000033713276760366013317 0ustar liggesusersuseDynLib(TFMPvalue, .registration = TRUE) importFrom(Rcpp, evalCpp) ### ----------------------------------------------------------------- ### Export non-generic functions ### export(TFMLazyScore, TFMpv2sc, TFMsc2pv) TFMPvalue/R/0000755000176200001440000000000013276760366012276 5ustar liggesusersTFMPvalue/R/TFMPvalue-lazyScore.R0000644000176200001440000000112713276760366016176 0ustar liggesusersTFMLazyScore <- function(mat, pvalue, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM"), granularity=1e-5){ type <- match.arg(type) if(length(pvalue) != 1L){ stop("pvalue must be length of 1") } if(pvalue > 1 || pvalue < 0){ stop("pvalue must be between 0 and 1") } if(granularity <= 0){ stop("granularity must be larger than 0") } bg <- normargPriorParams(bg) if(type == "PFM"){ mat <- normargMat(mat) } score <- .Call("lazyScore", mat, pvalue, bg, type, granularity, PACKAGE="TFMPvalue") return(score) } TFMPvalue/R/util.R0000644000176200001440000000261413276760366013401 0ustar liggesusers### Typical 'prior.params' vector: c(A=0.25, C=0.25, G=0.25, T=0.25) ### This is taken from Biostrings package matchPWM.R. ### Just to get rid of the node during the build. DNA_BASES <- c("A", "C", "G", "T") normargPriorParams <- function(prior.params) { if (!is.numeric(prior.params)) stop("'prior.params' must be a numeric vector") if (length(prior.params) != length(DNA_BASES) || !setequal(names(prior.params), DNA_BASES)) stop("'prior.params' elements must be named A, C, G and T") ## Re-order the elements. prior.params <- prior.params[DNA_BASES] if (any(is.na(prior.params)) || any(prior.params < 0)) stop("'prior.params' contains NAs and/or negative values") prior.params } normargMat = function(x){ if (is.null(rownames(x))) stop("invalid Matrix 'mat': no row names") if (!all(DNA_BASES %in% rownames(x))) stop("invalid Matrix 'mat': row names must contain A, C, G and T") if (any(duplicated(rownames(x)))) stop("invalid Matrix 'mat': duplicated row names") if (ncol(x) == 0L) stop("invalid Matrix 'mat': no columns") if (any(is.na(x)) || any(x < 0L)) stop("invalid Matrix 'mat': values cannot be NA or negative") if (any(x[!(rownames(x) %in% DNA_BASES), ] != 0L)) stop("invalid Matrix 'mat': IUPAC ambiguity letters are represented") x <- x[DNA_BASES, , drop = FALSE] x } TFMPvalue/R/TFMPvalue-pv2sc.R0000644000176200001440000000064213276760366015261 0ustar liggesusersTFMpv2sc <- function(mat, pvalue, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM")){ ## TODD: valiate these mat, bg type <- match.arg(type) if(length(pvalue) != 1L){ stop("pvalue must be length of 1") } bg <- normargPriorParams(bg) if(type == "PFM"){ mat <- normargMat(mat) } score <- .Call("pv2sc", mat, pvalue, bg, type, PACKAGE="TFMPvalue") return(score) } TFMPvalue/R/TFMPvalue-sc2pv.R0000644000176200001440000000064113276760366015260 0ustar liggesusers TFMsc2pv <- function(mat, score, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM")){ ## TODD: valiate these mat, bg type <- match.arg(type) if(length(score) != 1L){ stop("score must be length of 1") } bg <- normargPriorParams(bg) if(type == "PFM"){ mat <- normargMat(mat) } pvalue <- .Call("sc2pv", mat, score, bg, type, PACKAGE="TFMPvalue") return(pvalue) } TFMPvalue/MD50000644000176200001440000000276613276762732012417 0ustar liggesusers81fca4068c4075b8c4ae996add46ed55 *DESCRIPTION 00899d74de25880360753739f5fad85a *NAMESPACE 410d176a2d6eb5c4a912c6d53799e3e8 *R/TFMPvalue-lazyScore.R 7801f41dc3f90aabda366f5d671ba962 *R/TFMPvalue-pv2sc.R d686633ee57329cbba8dde923ee74e02 *R/TFMPvalue-sc2pv.R 1d3cb9113e7d5aa7d4809bde0e5aedc5 *R/util.R 009a8ca89d44b36998a490f259c7b4ea *inst/TFMPvalueBuild/build.R 3026c2d8ba9c238288ef9c7d57b422ce *man/TFMPvalue-lazyScore.Rd a80e07bbf7f6f1bf83939c94cce897cc *man/TFMPvalue-package.Rd 94ffff6dc2b273bd5f76f859b1dbb4a8 *man/TFMPvalue-pv2sc.Rd 3bbd196318ee1a74afccb69d60405ce6 *man/TFMPvalue-sc2pv.Rd f16f8166019475491167115de82a8eda *src/ArgumentException.cpp def9ee124e82c1345ade0dfa9805c4c0 *src/ArgumentException.h 267ef82c26a9c64b8ee7a8d1eb6c3833 *src/FileException.cpp 392381c4ca3be5bf99f779c7f894ab27 *src/FileException.h b236c1742bf694155c5e0a94bf59fcfd *src/Makevars b236c1742bf694155c5e0a94bf59fcfd *src/Makevars.win 0b28123412f041a15c646555eeb75547 *src/Matrix.cpp 68636eba6fe85eff0ccca847ab713dd3 *src/Matrix.h 1f207f93d290fcf844b53fa5d64ac8df *src/ParseException.cpp 77f11be664c3a11157caa3194f8b64b1 *src/ParseException.h 4b0627c2f585a9caf63ca67bb6ce43aa *src/TFMMain.cpp 44919b82bd6be80cb7e6e41648d830d6 *src/TFMPvalue_init.c 8074033715ba9fddfdbd81336c989841 *src/TFMpvalue.cpp 85073c71e95119f7f4279d21224af189 *src/TFMpvalue.h c2c0089bfac865cb5c74896b6f767c5e *tests/testthat/test_lazyScore.R e3a4e4e7215d10aa077e855930aaf30b *tests/testthat/test_pv2sc.R 2254b875903261ca144d93b853b5bae1 *tests/testthat/test_sc2pv.R TFMPvalue/DESCRIPTION0000644000176200001440000000244113276762732013603 0ustar liggesusersPackage: TFMPvalue Version: 0.0.8 Date: 2018-05-16 Title: Efficient and Accurate P-Value Computation for Position Weight Matrices Description: In putative Transcription Factor Binding Sites (TFBSs) identification from sequence/alignments, we are interested in the significance of certain match score. TFMPvalue provides the accurate calculation of P-value with score threshold for Position Weight Matrices, or the score with given P-value. It is an interface to code originally made available by Helene Touzet and Jean-Stephane Varre, 2007, Algorithms Mol Biol:2, 15. Touzet and Varre (2007) . Author: Ge Tan Maintainer: Ge Tan Copyright: 2007 LIFL-USTL-INRIA Imports: Rcpp(>= 0.11.1) Depends: R (>= 3.0.1) Suggests: testthat LinkingTo: Rcpp License: GPL-2 URL: https://github.com/ge11232002/TFMPvalue BugReports: https://github.com/ge11232002/TFMPvalue/issues Type: Package NeedsCompilation: yes SystemRequirements: C++11 LazyData: yes Collate: TFMPvalue-sc2pv.R TFMPvalue-pv2sc.R TFMPvalue-lazyScore.R util.R Packaged: 2018-05-16 07:51:11 UTC; gtan Repository: CRAN Date/Publication: 2018-05-16 08:03:38 UTC TFMPvalue/man/0000755000176200001440000000000013276760366012650 5ustar liggesusersTFMPvalue/man/TFMPvalue-lazyScore.Rd0000644000176200001440000000300213276760366016706 0ustar liggesusers\name{TFMLazyScore} \alias{TFMLazyScore} \title{ Compute the score from P-value. } \description{ Computes the score threshold associated with P-value p using the algorithm of Beckstette 2006. } \usage{ TFMLazyScore(mat, pvalue, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM"), granularity=1e-5) } \arguments{ \item{mat}{ The input matrix. It can be a Position Frequency Matrix (PFM) or Position Weight Matrix (PWM) in log ratio. The matrix must have row names with "A", "C", "G", "T". } \item{pvalue}{ The required P-value. } \item{bg}{ The background frequency of the sequences. A numeric vector with names "A", "C", "G", "T". } \item{type}{ The type of input matrix. Can be "PFM" or "PWM". } \item{granularity}{ The granularity used in the computation. } } \value{ The score is returned based on the matrix, given P-value and granularity. } \author{ Ge Tan } \examples{ \donttest{ ## This example is not tested due to running time > 5s pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), nrow = 4, dimnames = list(c("A","C","G","T")) ) bg <- c(A=0.25, C=0.25, G=0.25, T=0.25) pvalue <- 1e-5 type <- "PFM" granularity <- 1e-5 TFMLazyScore(pfm, pvalue, bg, type, granularity) } } TFMPvalue/man/TFMPvalue-sc2pv.Rd0000644000176200001440000000262313276760366016000 0ustar liggesusers\name{TFMsc2pv} \alias{TFMsc2pv} \title{ Compute P-value from score. } \description{ Computes the P-value associated with a score threshold. } \usage{ TFMsc2pv(mat, score, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM")) } \arguments{ \item{mat}{ The input matrix. It can be a Position Frequency Matrix (PFM) or Position Weight Matrix (PWM) in log ratio. The matrix must have row names with "A", "C", "G", "T". } \item{score}{ The required score. } \item{bg}{ The background frequency of the sequences. A numeric vector with names "A", "C", "G", "T". } \item{type}{ The type of input matrix. Can be "PFM" or "PWM". } } \value{ The P-value is returned based on the matrix, given the desired score. } \references{ Touzet, H., and Varre, J.-S. (2007). Efficient and accurate P-value computation for Position Weight Matrices. Algorithms Mol Biol 2, 15. } \author{ Ge Tan } \examples{ pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), nrow = 4, dimnames = list(c("A","C","G","T")) ) bg <- c(A=0.25, C=0.25, G=0.25, T=0.25) score <- 8.77 type <- "PFM" pvalue <- TFMsc2pv(pfm, score, bg, type) } TFMPvalue/man/TFMPvalue-pv2sc.Rd0000644000176200001440000000262013276760366015775 0ustar liggesusers\name{TFMpv2sc} \alias{TFMpv2sc} \title{ Compute score from P-value. } \description{ Computes the score threshold associated with a P-value. } \usage{ TFMpv2sc(mat, pvalue, bg=c(A=0.25, C=0.25, G=0.25, T=0.25), type=c("PFM", "PWM")) } \arguments{ \item{mat}{ The input matrix. It can be a Position Frequency Matrix (PFM) or Position Weight Matrix (PWM) in log ratio. The matrix must have row names with "A", "C", "G", "T". } \item{pvalue}{ The required P-value. } \item{bg}{ The background frequency of the sequences. A numeric vector with names "A", "C", "G", "T". } \item{type}{ The type of input matrix. Can be "PFM" or "PWM". } } \value{ The score is returned based on the matrix, given P-value. } \references{ Touzet, H., and Varre, J.-S. (2007). Efficient and accurate P-value computation for Position Weight Matrices. Algorithms Mol Biol 2, 15. } \author{ Ge Tan } \examples{ pfm <- matrix(c(3, 5, 4, 2, 7, 0, 3, 4, 9, 1, 1, 3, 3, 6, 4, 1, 11, 0, 3, 0, 11, 0, 2, 1, 11, 0, 2, 1, 3, 3, 2, 6, 4, 1, 8, 1, 3, 4, 6, 1, 8, 5, 1, 0, 8, 1, 4, 1, 9, 0, 2, 3, 9, 5, 0, 0, 11, 0, 3, 0, 2, 7, 0, 5), nrow = 4, dimnames = list(c("A","C","G","T")) ) bg <- c(A=0.25, C=0.25, G=0.25, T=0.25) pvalue <- 1e-5 type <- "PFM" score <- TFMpv2sc(pfm, pvalue, bg, type) } TFMPvalue/man/TFMPvalue-package.Rd0000644000176200001440000000156213276760366016337 0ustar liggesusers\name{TFMPvalue-package} \alias{TFMPvalue-package} \docType{package} \title{ Efficient and accurate P-value computation for Position Weight Matrices } \description{ This package provides a novel algorithm that solves the P-value calculation problem given the score based on a Postion Weight Matrices (PWMs), or the reverse problem: finding the score give the desired P-value. This package is an interface to code originally made available by Helene Touzet and Jean-Stephane Varre, 2007, Algorithms Mol Biol:2, 15. } \details{ The original code is taken from \url{http://bioinfo.lifl.fr/TFM/TFMpvalue/TFM-Pvalue.tar.gz}, retrived 26/03/2014. The algorithm is described in Touzet, H., and Varre, J.-S. (2007). Efficient and accurate P-value computation for Position Weight Matrices. Algorithms Mol Biol 2, 15. } \author{ Ge Tan } \keyword{package}