fingerprint/0000755000176200001440000000000013224521352012600 5ustar liggesusersfingerprint/inst/0000755000176200001440000000000011447374434013571 5ustar liggesusersfingerprint/inst/unitTests/0000755000176200001440000000000012224264435015564 5ustar liggesusersfingerprint/inst/unitTests/reportSummary.txt0000644000176200001440000000041411571714217021217 0ustar liggesusersRUNIT TEST PROTOCOL -- Thu Jun 2 10:05:03 2011 *********************************************** Number of test functions: 16 Number of errors: 0 Number of failures: 0 1 Test Suite : fingerprint fingerprint Unit Tests - 16 test functions, 0 errors, 0 failures fingerprint/inst/unitTests/Makefile0000644000176200001440000000036313224262715017226 0ustar liggesusersTOP=../.. PKG=${shell cd ${TOP};pwd} SUITE=doRUnit.R R=R all: inst test inst: # Install package cd ${TOP}/..;\ ${R} CMD INSTALL ${PKG} test: # Run unit tests export RCMDCHECK=FALSE;\ cd ${TOP}/tests;\ ${R} --vanilla --slave < ${SUITE} fingerprint/inst/unitTests/test.ecfp0000644000176200001440000001305512224260514017400 0ustar liggesusersmol01 17 0 16 3 1 1747237384 1499521844 -1539132615 1294255210 332760439 -1549163031 1035613116 1618154665 590925877 1872154524 -1143715940 203677720 -1272768868 136120670 136597326 -1460348762 -1262922302 -1201618245 -402549409 -1270820019 929601590 -1597477966 -1274743746 -1155471474 1258428229 -1838187238 -798628285 -1773728142 -773983804 -453677277 1674451008 65948508 991735244 -1412946825 846704869 -2103621484 -886204842 1725648567 -353343892 -585443181 -533273616 2031084733 -801248129 1752802620 -976015189 -992213424 2109043264 -790336137 630139722 -505031736 -1427697183 -2090462286 -1724769936 mol02 16 9 1 0 17 32 332760439 -1362791977 367998008 1035613116 -1277879912 1747237384 71476542 -124655670 203677720 1618154665 907007053 -1707366455 1969481564 -1597477966 1966552162 547884906 -1270820019 -2135641502 -497728148 1674451008 -453677277 -2005085798 2047992816 786486417 1523337873 -2045753164 859018953 404853571 1383886699 -745001879 1985089045 -1445962196 mol03 16 1 0 17 3 32 7 332760439 367998008 1035613116 566058135 1747237384 580900652 907007053 1070061035 71476542 203677720 -124655670 -548602426 1618154665 -1707366455 1969481564 -1597477966 -881072729 547884906 -1564724132 -1270820019 -2004812302 -497728148 -2135641502 1674451008 -453677277 2047992816 786486417 1523337873 242457334 -2045753164 859018953 265023308 1381300059 404853571 -745001879 1985089045 mol04 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -453677277 -745491832 551850122 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -1206981816 -1508180856 -149636017 -505031736 -1427697183 -2090462286 -1724769936 mol05 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -773983804 1674451008 -453677277 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -505031736 -1427697183 -1205069278 -2090462286 -1724769936 -1698724694 -2093839777 mol06 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 -836633685 1872154524 71953198 136597326 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -1454111645 289095609 -453677277 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 1724895444 1790572653 1785362907 -505031736 -1427697183 -1724769936 -2090462286 mol07 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 -1549103449 1618154665 1747237384 1035613116 1294255210 -1539132615 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -1926447181 -98859492 885225145 -1598679931 1175232969 -1199556931 -1155471474 1258428229 -1462709112 1506190109 -1280036918 -1695756380 730557100 -773983804 1674451008 991735244 689610531 -888075169 650647287 1033863897 -1799143719 1119771930 -1139544385 1646645826 1040131620 2111406068 -800045143 1132802373 -10819545 137138064 -505031736 -1427697183 -2090462286 -1724769936 mol08 16 17 1 9 0 32 -1410079687 1747237384 675769755 178336375 -1362791977 -1343180157 1618154665 -1277879912 -1272768868 367998008 -587569116 71476542 -939475899 -1044865801 946229467 193705859 1852108031 557002734 1967609676 -822042736 713358128 -745491832 -964367925 -270564593 551850122 -2122102020 679321016 48182684 210231571 281647195 516865083 1706555375 -362593762 1475536852 -1294566343 461422072 -1516875559 566085027 mol09 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 -745491832 -773983804 551850122 1674451008 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1508180856 -794597678 -175681259 -1427697183 -505031736 -1724769936 -2090462286 mol10 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 1674451008 -773983804 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1427697183 -505031736 -792685140 -1724769936 -2090462286 -2093839777 fingerprint/inst/unitTests/report.html0000644000176200001440000001145211571714217017772 0ustar liggesusers RUNIT TEST PROTOCOL--Thu Jun 2 10:05:03 2011

RUNIT TEST PROTOCOL--Thu Jun 2 10:05:03 2011

Number of test functions: 16

Number of errors: 0

Number of failures: 0


1 Test suite

Name Test functions Errors Failures
fingerprint fingerprint Unit Tests 16 0 0

Details

Test Suite: fingerprint fingerprint Unit Tests
Test function regexp: ^test.+
Test file regexp: ^runit.+\.[rR]$
Involved directory:
/Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests

Name Value
platform i386-apple-darwin9.8.0
arch i386
os darwin9.8.0
system i386, darwin9.8.0
status
major 2
minor 11.0
year 2010
month 04
day 22
svn rev 51801
language R
version.string R version 2.11.0 (2010-04-22)
host Rajarshi-Guha-MacBook-Pro.local
compiler NA
fingerprint/inst/unitTests/runit.fp.R0000644000176200001440000001556612224264435017471 0ustar liggesuserstest.new.fp <- function() { fp <- new("fingerprint", bits=c(1,2,3,4), nbit=8, provider='rg',name='foo') checkTrue(!is.null(fp)) } test.distance1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) d <- distance(fp1,fp2) checkEquals(d, 0) } test.distance2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) d <- distance(fp1,fp2) checkEquals(d, 1) } test.and1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- fp1 & fp2 bits <- fpnew@bits checkTrue( all(bits == c(1,2,3,4))) } test.and2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- fp1 & fp2 bits <- fpnew@bits checkEquals(length(bits),0) } test.or1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- fp1 | fp2 bits <- fpnew@bits checkTrue(all(bits == c(1,2,3,4,5,6,7,8))) } test.or2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- fp1 | fp2 bits <- fpnew@bits checkTrue(all(bits == c(1,2,3,4))) } test.not <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) nfp1 <- !fp1 checkTrue(all(nfp1@bits == c(5,6,7,8))) checkTrue(all(fp1@bits == (!nfp1)@bits)) } test.xor1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fpnew <- xor(fp1,fp2) bits <- fpnew@bits checkEquals(length(bits),0) } test.xor2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fpnew <- xor(fp1,fp2) bits <- fpnew@bits checkEquals(length(bits),8) checkTrue(all(bits == c(1,2,3,4,5,6,7,8))) } test.fold1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) nfp <- fold(fp1) checkTrue(all(nfp@bits == c(1,2,3,4))) } test.fold2 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4,8), nbit=8) nfp <- fold(fp1) checkTrue(all(nfp@bits == c(1,2,3,4))) } test.fp.to.matrix <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8) m1 <- fp.to.matrix(list(fp1,fp2,fp3)) m2 <- rbind(c(1,1,1,1,0,0,0,0), c(0,0,0,0,1,1,1,1), c(1,1,1,0,1,1,1,1)) checkTrue(all(m1 == m2)) } test.tversky.1 <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) s <- distance(fp1, fp2, "tversky", a=1,b=1) checkEquals(1.0, s) } test.tversky.2 <- function() { fp1 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) s <- distance(fp1, fp2, "tversky", a=1,b=1) checkEquals(0.0, s) } test.tversky.3 <- function() { fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) stv <- distance(fp1, fp2, "tversky", a=1,b=1) sta <- distance(fp1, fp2) checkEquals(stv, sta) } test.tversky.4 <- function() { fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8) fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) stv <- distance(fp1, fp2, "tversky", a=0.5,b=0.5) std <- distance(fp1, fp2, "dice") checkEquals(stv, std) } test.fp.sim.matrix <- function() { fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8) fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8) fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8) fpl <- list(fp1,fp2,fp3) sm <- round(fp.sim.matrix(fpl),2) am <- rbind(c(1,0,0.38), c(0,1,0.57), c(0.38,0.57,1)) checkTrue(all(sm == am)) } test.fp.balance <- function() { fp1 <- new("fingerprint", bits=c(1,2,3), nbit=6) fp2 <- balance(fp1) checkTrue(12 == length(fp2)) checkEquals(c(1,2,3,10,11,12), fp2@bits) } test.fps.reader <- function() { data.file <- file.path(system.file("unitTests", "bits.fps", package="fingerprint")) fps <- fp.read(data.file, lf=fps.lf) checkEquals(323, length(fps)) ## OK, we need to pull in the bit positions Andrew specified for (i in seq_along(fps)) { expected <- sort(as.numeric(strsplit(fps[[i]]@misc[[1]],",")[[1]])+1) observed <- sort(fps[[i]]@bits) checkEquals(expected, observed, msg = sprintf("%s had a mismatch in bit positions", fps[[i]]@name)) } } ####################################### ## ## Feature vector tests ## ####################################### test.feature <- function() { f1 <- new('feature', feature='F1') checkEquals(1, f1@count) f2 <- new('feature', feature='F2', count=as.integer(12)) checkEquals(12, f2@count) } test.feature.c <- function() { f1 <- new('feature', feature='F1', count=as.integer(2)) f2 <- new('feature', feature='F2', count=as.integer(3)) fl <- c(f1, f2) checkEquals(2, length(fl)) checkEquals("list", class(fl)) checkTrue(identical(f1, fl[[1]])) checkTrue(identical(f2, fl[[2]])) } test.feature.fp <- function() { feats <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) fv <- new('featvec', features=feats) checkEquals(10, length(fv)) } test.feature.dist1 <- function() { f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) f2 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) fv1 <- new('featvec', features=f1) fv2 <- new('featvec', features=f2) d <- distance(fv1, fv2, method='tanimoto') checkEquals(1, d) } test.feature.dist2 <- function() { f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1))) f2 <- sapply(letters[11:20], function(x) new('feature', feature=x, count=as.integer(1))) fv1 <- new('featvec', features=f1) fv2 <- new('featvec', features=f2) d <- distance(fv1, fv2, method='tanimoto') checkEquals(0, d) } test.featvec.read <- function() { data.file <- file.path(system.file("unitTests", "test.ecfp", package="fingerprint")) fps <- fp.read(data.file, lf=ecfp.lf, binary=FALSE) checkEquals(10, length(fps)) lengths <- c(58L, 38L, 43L, 66L, 62L, 66L, 65L, 44L, 66L, 61L) ol <- sapply(fps, length) checkTrue(identical(lengths, ol)) } tester.getters.setters <- function() { f <- new("feature", feature='ABCD', count=as.integer(1)) checkEquals("ABCD", feature(f)) checkEquals(1, count(f)) feature(f) <- 'UXYZ' count(f) <- 10 checkEquals("UXYZ", feature(f)) checkEquals(10, count(f)) } fingerprint/inst/unitTests/bits.fps0000644000176200001440000006450111571516543017251 0ustar liggesusers#FPS1 #num_bits=166 #software=OEChem/1.7.4 (20100809) #type=RDMACCS-OpenEye/1 #source=bits.smi #date=2011-06-01T12:49:56 010000000000000000000000000000000000000000 Fake-0 0 000000000008000000000000000000000000000000 Fake-43 43 000000000400000000000000000000000000000000 [Li] 34 000200000000000000000000000000000000000000 [Be] 9 000002000000000000000000000000000000000000 [B] 17 000000000000000000000000000000000000000001 [N] 160 000000000000000000000000000000000000000008 [O] 163 000000000002000000000000000000002000000000 [F] 41,133 000000000400000000000000000000000000000000 [Na] 34 000200000000000000000000000000000000000000 [Mg] 9 000002000000000000000000000000000000000000 [Al] 17 000008000000000000000000000000000000000000 [Si] 19 000000100000000000000000000000000000000000 [P] 28 000000000000000000008000000000000000000000 [S] 87 000000000000000000000000400000002000000000 [Cl] 102,133 000000000400000000000000000000000000000000 [K] 34 000200000000000000000000000000000000000000 [Ca] 9 100000000000000000000000000000000000000000 [Sc] 4 100000000000000000000000000000000000000000 [Ti] 4 400000000000000000000000000000000000000000 [V] 6 400000000000000000000000000000000000000000 [Cr] 6 400000000000000000000000000000000000000000 [Mn] 6 000100000000000000000000000000000000000000 [Fe] 8 000100000000000000000000000000000000000000 [Co] 8 000100000000000000000000000000000000000000 [Ni] 8 000800000000000000000000000000000000000000 [Cu] 11 000800000000000000000000000000000000000000 [Zn] 11 000002000000000000000000000000000000000000 [Ga] 17 040000000000000000000000000000000000000000 [Ge] 2 040000000000000000000000000000000000000000 [As] 2 040000000000000000000000000000000000000000 [Se] 2 000000000020000000000000000000002000000000 [Br] 45,133 000000000400000000000000000000000000000000 [Rb] 34 000200000000000000000000000000000000000000 [Sr] 9 100000000000000000000000000000000000000000 [Y] 4 100000000000000000000000000000000000000000 [Zr] 4 400000000000000000000000000000000000000000 [Nb] 6 400000000000000000000000000000000000000000 [Mo] 6 400000000000000000000000000000000000000000 [Tc] 6 000100000000000000000000000000000000000000 [Ru] 8 000100000000000000000000000000000000000000 [Rh] 8 000100000000000000000000000000000000000000 [Pd] 8 000800000000000000000000000000000000000000 [Ag] 11 000800000000000000000000000000000000000000 [Cd] 11 000002000000000000000000000000000000000000 [In] 17 040000000000000000000000000000000000000000 [Sn] 2 040000000000000000000000000000000000000000 [Sb] 2 040000000000000000000000000000000000000000 [Te] 2 000000040000000000000000000000002000000000 [I] 26,133 000000000400000000000000000000000000000000 [Cs] 34 000200000000000000000000000000000000000000 [Ba] 9 200000000000000000000000000000000000000000 [La] 5 200000000000000000000000000000000000000000 [Ce] 5 200000000000000000000000000000000000000000 [Pr] 5 200000000000000000000000000000000000000000 [Nd] 5 200000000000000000000000000000000000000000 [Pm] 5 200000000000000000000000000000000000000000 [Sm] 5 200000000000000000000000000000000000000000 [Eu] 5 200000000000000000000000000000000000000000 [Gd] 5 200000000000000000000000000000000000000000 [Tb] 5 200000000000000000000000000000000000000000 [Dy] 5 200000000000000000000000000000000000000000 [Ho] 5 200000000000000000000000000000000000000000 [Er] 5 200000000000000000000000000000000000000000 [Tm] 5 200000000000000000000000000000000000000000 [Yb] 5 200000000000000000000000000000000000000000 [Lu] 5 100000000000000000000000000000000000000000 [Hf] 4 400000000000000000000000000000000000000000 [Ta] 6 400000000000000000000000000000000000000000 [W] 6 400000000000000000000000000000000000000000 [Re] 6 000100000000000000000000000000000000000000 [Os] 8 000100000000000000000000000000000000000000 [Ir] 8 000100000000000000000000000000000000000000 [Pt] 8 000800000000000000000000000000000000000000 [Au] 11 000800000000000000000000000000000000000000 [Hg] 11 040002000000000000000000000000000000000000 [Tl] 2,17 040000000000000000000000000000000000000000 [Pb] 2 040000000000000000000000000000000000000000 [Bi] 2 000000000400000000000000000000000000000000 [Fr] 34 000200000000000000000000000000000000000000 [Ra] 9 080000000000000000000000000000000000000000 [Ac] 3 080000000000000000000000000000000000000000 [Th] 3 080000000000000000000000000000000000000000 [Pa] 3 080000000000000000000000000000000000000000 [U] 3 080000000000000000000000000000000000000000 [Np] 3 080000000000000000000000000000000000000000 [Pu] 3 080000000000000000000000000000000000000000 [Am] 3 080000000000000000000000000000000000000000 [Cm] 3 080000000000000000000000000000000000000000 [Bk] 3 080000000000000000000000000000000000000000 [Cf] 3 080000000000000000000000000000000000000000 [Es] 3 080000000000000000000000000000000000000000 [Fm] 3 080000000000000000000000000000000000000000 [Md] 3 080000000000000000000000000000000000000000 [No] 3 0a0000000000000000000000000000000000000000 [Lr] 3,1 000000000000000000000000000000000000000016 c1ccccc1 161,162,164 000000000000000000000000000000100000010036 c1ccccc1.c1ccccc1 124,144,161,162,164,165 000000100000000000000000020000000001000016 c1ccccp1 28,97,136,161,162,164 00000000000000010000048000000000000100001a c1ccco1 56,82,95,163,136,161,164 000000000800000000008480000000000001000012 c1cccs1 35,82,87,95,136,161,164 040000000000000000000480000000000001000012 [se]1cccc1 2,82,95,136,161,164 040000000000000000000480000000000001000012 [te]1cccc1 2,82,95,136,161,164 000400000000000000000000000020000000040010 C1CCC1 10,117,146,164 800400000004280038500020000080090621400011 N1NNN1 7,10,42,51,53,67,68,69,76,78,93,119,120,123,129,130,136,141,150,160,164 0010800000000000500220302000000a000418a009 ON(C)C 12,23,68,70,73,85,92,93,101,121,123,138,147,148,157,159,160,163 0010840000100000503400602400c04b0065082019 On1cncccc1 12,18,23,44,68,70,74,76,77,93,94,98,101,118,119,120,121,123,126,136,138,141,142,147,157,160,163,164 0010800000100001400400302600800b000108b01d c1ccon(C)c1 12,23,44,56,70,74,92,93,97,98,101,119,120,121,123,136,147,156,157,159,160,162,163,164 0010840000000001410004a03401810b008108181b c1cccon2c13.c2cc3 12,18,23,56,64,70,82,93,95,98,100,101,104,112,119,120,121,123,136,143,147,155,156,160,161,163,164 0010000000000140400220302000400a000018a009 O=[N+](C)C 12,48,62,70,73,85,92,93,101,118,121,123,147,148,157,159,160,163 0010800002000100502000302000400a000408a009 O[N+](=C)C 12,23,33,48,68,70,77,92,93,101,118,121,123,138,147,157,159,160,163 00200000000000001c008000000000080400000000 SS 13,66,67,68,87,123,130 004000000004000000000000000200040404025008 OC(O)O 14,42,105,122,130,138,145,156,158,163 004000000004000000000000000200040404025208 O=C(O)O 14,42,105,122,130,138,145,153,156,158,163 004000000004000000000000000200040404025208 OC(=O)O 14,42,105,122,130,138,145,153,156,158,163 004000000004000000000000000200040404025208 OC(O)=O 14,42,105,122,130,138,145,153,156,158,163 00400000000000018000048000028144004102421a c1coc(=O)o1 14,56,71,82,95,105,112,119,122,126,136,142,145,153,158,161,163,164 008020000000000100002000001000000001041118 C1CO1 15,21,56,85,108,136,146,152,156,163,164 000020000000000000000000000020000000040010 C1CC1 21,117,146,164 000000000200000000000000040000000000000000 C=C 33,98 000001000000000000000000000000000000000000 C#C 16 000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164 00000000000000000000000000082e800100148400 CCCCCCC 107,113,114,115,117,127,128,146,148,154,159 000010000214000000080808042010000404403809 C=C(N)O 20,33,42,44,75,83,91,98,109,116,130,138,150,155,156,157,160,163 000000000220000000080000040400002000000016 C=C(c1ccccc1)Br 33,45,75,98,106,133,161,162,164 000040000004000000000800002210040404407809 NC(O)O 22,42,83,105,109,116,122,130,138,150,155,156,157,158,160,163 000040000004000000000800002210040404407a09 NC(=O)O 22,42,83,105,109,116,122,130,138,150,153,155,156,157,158,160,163 000040000004000000200000002250040404405809 N=C(O)O 22,42,77,105,109,116,118,122,130,138,150,155,156,158,160,163 0000400000000801018004e80132936d006110dd1b CCOc1nnc(o1)C 22,51,56,64,79,82,91,93,94,95,96,105,108,109,112,113,116,119,120,122,123,125,126,136,141,142,148,152,154,155,156,158,159,160,161,163,164 000000010004000000100800000200000420402801 NC(N)N 24,42,76,83,105,130,141,150,155,157,160 00000002000002000008008604012040014484101c C1CC2=CC(C1C2)O 25,49,75,89,90,95,98,104,117,126,128,138,142,146,151,156,162,163,164 000000080004000000100a00080000000420402501 NCN 27,42,76,81,83,99,130,141,150,152,154,157,160 000000080004000000100a00080000000420402501 NC([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160 000000080004000000100a00080000000420402501 NC([H])([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160 000008200000000000022010008000000010188000 C[Si](C)(C)C 19,29,73,85,92,111,140,147,148,159 000000400002000010000820000000082000400001 NF 30,41,68,83,93,123,133,150,160 000000800100000014008830000000080000408001 CSN 31,32,66,68,83,87,92,93,123,150,159,160 00000000010000001c008820000000080400400001 NS 32,66,67,68,83,87,93,123,130,150,160 000000000100000014018020000040080000400001 N=S 32,66,68,72,87,93,118,123,150,160 000000000200000000000000040000000000000000 C=C 33,98 000000000200000000000000040000000000000000 [H]C([H])=C([H])[H] 33,98 000000000200000000200000000040000000400001 C=N 33,77,118,150,160 000000000000000000200000000040000000400001 [CH]=N 77,118,150,160 000000000000000000200000000040000000400001 [CH]=N 77,118,150,160 00000000080000000000a480000020000103040110 S1CCCC1 35,82,85,87,95,117,128,136,137,146,152,164 00000000080000000000a480000020000103040110 S1C([H])C([H])C([H])C1 35,82,85,87,95,117,128,136,137,146,152,164 00000000080000000000a480000020000103040110 S1CCCC1[H] 35,82,85,87,95,117,128,136,137,146,152,164 000000000800000000008480000000000001000012 s1cccc1 35,82,87,95,136,161,164 000000001004000000100800002210000424403809 NC(O)N 36,42,76,83,105,109,116,130,138,141,150,155,156,157,160,163 000000001004000000300800002250000424403809 N=C(O)N 36,42,76,77,83,105,109,116,118,130,138,141,150,155,156,157,160,163 000000001004000000100800002210000420402a09 NC(=O)N 36,42,76,83,105,109,116,130,141,150,153,155,157,160,163 000000801940300015d08da0012291495461402a1b c1(nc(=O)[nH]s1)N 31,32,35,36,46,52,53,64,66,68,76,78,79,80,82,83,87,93,95,96,105,109,112,116,119,120,123,126,130,132,134,136,141,142,150,153,155,157,160,161,163,164 00000000200400000010080000000000042040a801 NC(C)N 37,42,76,83,130,141,150,155,157,159,160 00000000200400000030080000004000042040a801 N=C(C)N 37,42,76,77,83,118,130,141,150,155,157,159,160 000010002214000000180800040000000420402801 NC(=C)N 20,33,37,42,44,75,76,83,98,130,141,150,155,157,160 000000802940080025d08ca0000080095221402813 c1c(nns1)N 31,32,35,37,46,51,64,66,69,76,78,79,82,83,87,93,95,119,120,123,129,132,134,136,141,150,155,157,160,161,164 000000002000080011d004e0010080090021408a1b Cc1[nH]nc(n1)C=O 37,51,64,68,76,78,79,82,93,94,95,96,119,120,123,136,141,150,153,155,159,160,161,163,164 00000000c080411a1401800020820008820c0a4008 OS(=O)(=O)[O-] 38,39,47,48,54,57,59,60,66,68,72,87,101,105,111,123,129,135,138,139,145,147,158,163 00000000c080401a140180002002000802040a4008 OS(=O)[O] 38,39,47,54,57,59,60,66,68,72,87,101,105,123,129,138,145,147,158,163 00000000c080541a14018202a08220088e060e4508 C(CS(=O)(=O)O)S 38,39,47,50,52,54,57,59,60,66,68,72,81,87,89,101,103,105,111,117,123,129,130,131,135,137,138,145,146,147,152,154,158,163 00000000800000001c008000200000080404000008 S-O 39,66,67,68,87,101,123,130,138,163 00000000800000001c008000200000080404000008 [H]S-O[H] 39,66,67,68,87,101,123,130,138,163 000000000001000000000000000000000000000001 C#N 40,160 000000000200000000200000000040000000400001 C=N 33,77,118,150,160 000000000200000000200000000040000000400001 [H]C=N[H] 33,77,118,150,160 000000000200000000200000000040000000400001 [H]C([H])=N[H] 33,77,118,150,160 000000080004000000000a00083010000404403509 NCO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000080004000000000a00083010000404403509 [H][NH]CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000080004000000000a00083010000404403509 [H][N]([H])CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163 000000000210000000000800040000000000402001 C=CN 33,44,83,98,150,157,160 000000000210000000200000040040000000400001 C=C=N 33,44,77,98,118,150,160 000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160 000080008044000054008820200000280600400009 SON 23,39,42,46,66,68,70,83,87,93,101,123,125,129,130,150,160,163 000000000040000000018800000000000000402001 S=CN 46,72,83,87,150,157,160 000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160 000000000040000000218000000040000000400001 S=C=N 46,72,77,87,118,150,160 0010800088400410550484a02400816b024108001b c1ccc[sH]1On1cccc1 12,23,35,39,46,50,60,64,66,68,70,74,82,87,93,95,98,101,112,119,120,121,123,125,126,129,136,142,147,160,161,163,164 000100400000010000000000400000082000000000 Cl[Rh+2] 8,30,48,102,123,133 000000000000010000000000000000000000000008 [O-2] 48,163 000000000000010000008000000000000000000000 [35S-2] 48,87 000100000000010000000000000000000000000000 [Pt+2] 8,48 0000000002000200000a0000040000000000108000 C=C(C)C 33,49,73,75,98,148,159 000000000200020000080000040000000000008016 C=C(C)c1ccccc1 33,49,75,98,159,161,162,164 000000008000040014008010200000080004008008 CSO 39,50,66,68,87,92,101,123,138,159,163 00000000800004841400810020000008000400001e c1ccccc1SO 39,50,58,63,66,68,80,87,101,123,138,161,162,163,164 000000000000080018000820000000080420400001 NN 51,67,68,83,93,123,130,141,150,160 000000000000080018000820000000080420400001 [H]NN[H] 51,67,68,83,93,123,130,141,150,160 000000000000080018000820000000080420400001 [H]N([H])N([H])[H] 51,67,68,83,93,123,130,141,150,160 000000000000080018000020000040080420400001 N=N 51,67,68,93,118,123,130,141,150,160 000000000000080018000020000040080420400001 N=N[H] 51,67,68,93,118,123,130,141,150,160 000000000000080018000020000040080420400001 [H]N=N[H] 51,67,68,93,118,123,130,141,150,160 000000000000080000000020000000080020000001 N#N 51,93,123,141,160 000000000000080001000020020080090021000017 c1cnncc1 51,64,93,97,119,120,123,136,141,160,161,162,164 000000000000100000800a02884020000422442501 NCCCN 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160 000000000000100000800a02884020000422442501 N([H])([H])C([H])([H])C([H])([H])C([H])([H])N([H])([H]) 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160 000000001004100001900009032291418465425a1f c1c([nH]c(=O)[nH]c1=O)O 36,42,52,64,76,79,88,91,96,97,105,109,112,116,119,120,126,130,135,136,138,141,142,145,150,153,155,156,158,160,161,162,163,164 000000000000200080000200801020000c06045508 OCCO 53,71,81,103,108,117,130,131,137,138,146,152,154,156,158,163 000000002002300001d044c8132791512461400a1f c1[nH]c2c(n1)[nH]c(nc2=O)F 37,41,52,53,64,76,78,79,82,86,91,94,95,96,97,100,104,105,106,109,112,116,119,120,124,126,130,133,136,141,142,150,153,155,160,161,162,163,164 000000008004400214008000200000080604004008 OSO 39,42,54,57,66,68,87,101,123,129,130,138,158,163 0000800000048000700000302000000a060408e009 ON(O)C 23,42,55,68,69,70,92,93,101,121,123,129,130,138,147,157,158,159,160,163 0000800000048000700000202000000a560408681f ON(O)c1ccccc1 23,42,55,68,69,70,93,101,121,123,129,130,132,134,138,147,155,157,158,160,161,162,163,164 00000000000000010000200002102080090304111c O1CCCCC1 56,85,97,108,117,127,128,131,136,137,146,152,156,162,163,164 00000000000000010000048000000000000100001a o1cccc1 56,82,95,136,161,163,164 080000100000000214008000000000080200000000 PS[U] 3,28,57,66,68,87,123,129 000000000000008400008100000000000000000016 Sc1ccccc1 58,63,80,87,161,162,164 000000000000000804018000200000080000000008 S=O 59,66,72,87,101,123,163 00000000800000001c008000200000080404000008 SO 39,66,67,68,87,101,123,130,138,163 000000088040641a14018a40a80000080e04486509 C(N)S(=O)O 27,39,46,50,53,54,57,59,60,66,68,72,81,83,87,94,99,101,103,123,129,130,131,138,147,150,152,154,157,158,160,163 00000000000000100002a010000000000010188000 CS(C)C 60,73,85,87,92,140,147,148,159 000000400820449e0401c58060860008a28128401a c1c(c(sc1S(=O)(=O)Cl)Br)Br 30,35,45,50,54,57,58,59,60,63,66,72,80,82,86,87,95,101,102,105,106,111,123,129,133,135,136,143,147,149,158,161,163,164 0000000008400021010085c000008011000180081b c1cc(oc1)c2nccs2 35,46,56,61,64,80,82,87,94,95,119,120,124,136,151,155,160,161,163,164 000020000000002002000000048000000000008016 CC1(C=C1)c2ccccc2 21,61,65,98,111,159,161,162,164 000000000000004050000020200040080000400009 N=O 62,68,70,93,101,118,123,150,160,163 0010800000000140414400202300c14b026108401f c1c[n+](=O)ccn1[O-] 12,23,48,62,64,70,74,78,93,96,97,101,112,118,119,120,121,123,126,129,136,141,142,147,158,160,161,162,163,164 000020000000008000008102800000000000040010 C1CC1S 21,63,80,87,89,103,146,164 00000000000000850000858000000000000100001a c1cocc1S 56,58,63,80,82,87,95,136,161,163,164 88042000080000b0140083028000a08801030c0110 C1CC1S1[U]CC1 3,7,10,21,35,60,61,63,66,68,80,81,87,89,103,117,119,123,127,128,136,137,146,147,152,164 000000000000000002020000008000000010108000 CC(C)(C)C 65,73,111,140,148,159 00000010000000001c008000000000080400000000 PS 28,66,67,68,87,123,130 000000000100000014018020000040080000400001 S=N 32,66,68,72,87,93,118,123,150,160 00000000880004010400800026008008000100101c c1ccosc1 35,39,50,56,66,87,97,98,101,119,123,136,156,162,163,164 000000100000000018000000000000080400000000 PP 28,67,68,123,130 000000100000000018000000000000080400000000 P=P 28,67,68,123,130 000000500002000010000000000000082000000000 PF 28,30,41,68,123,133 000000100000000018000000200000080404000008 PO 28,67,68,101,123,130,138,163 000000100000000010000000200000080000000008 P=O 28,68,101,123,163 000000100000000000000010000000000000008000 PC 28,92,159 000000100004000038000020000000080600400001 PNP 28,42,67,68,69,93,123,129,130,150,160 000000100000000018000820000000080400400001 [H]NP 28,67,68,83,93,123,130,150,160 0000800000000940e10400202200d14b02e128481f c1cc[n+](=O)n(c1)[O-] 23,48,51,62,64,69,70,71,74,93,97,101,112,116,118,119,120,121,123,126,129,136,141,142,143,147,149,155,158,160,161,162,163,164 000000002000200141d00ce02100800956a160281b c1(c(non1)N)N 37,53,56,64,70,76,78,79,82,83,93,94,95,96,101,119,120,123,129,130,132,134,136,141,143,149,150,155,157,160,161,163,164 080000000000200090000000200000080604004008 O[U][Np]O 3,53,68,71,101,123,129,130,138,158,163 000000000000000000020000000002000000108400 CCC 73,113,148,154,159 000000000000000000020000000002000000108400 C([2H])([2H])([2H])C([2H])([2H])C([2H])([2H])[2H] 73,113,148,154,159 000000000000000000008200800026000000048500 SCCC 81,87,103,113,114,117,146,152,154,159 00802000000000000004301008000c0300010ca111 CN1CC1 15,21,74,84,85,92,99,114,115,120,121,136,146,147,152,157,159,160,164 008020000000000000002200080000010001442111 [H]N1CC1 15,21,81,85,99,120,136,146,150,152,157,160,164 000000000600000000080000040000000000008000 C=C([Li])C 33,34,75,98,159 000000000400000000020000000000000000108000 CC([Li])C 34,73,148,159 000000000200000000000000040000000000008000 C=C([H])C 33,98,159 400100000000200010400820000000080620400001 N[Fe][W]N 6,8,53,68,78,83,93,123,129,130,141,150,160 0000000000000000014414800840a01300230c2513 n12cccc1.C2C3.n13cccc1 64,74,78,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164 0000000000000000004434800840a08301230c2511 N12CCCC1.C2C3.N13CCCC1 74,78,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164 00000010810030021c808820200010280620400009 NSOPN 28,32,39,52,53,57,66,67,68,79,83,87,93,101,116,123,125,129,130,141,150,160,163 0000000000000000018414800840a01300230c2513 n12cccc1.C2CC3.n13cccc1 64,74,79,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164 0000000000000000008434800840a08301230c2511 N12CCCC1.C2CC3.N13CCCC1 74,79,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164 000000000000000000008200000002000000008500 SC(C)[H] 81,87,113,152,154,159 000000080002000000000a00080000002000402501 NCF 27,41,81,83,99,133,150,152,154,157,160 000000000002000000000010000000002000008000 [H]CF 41,92,133,159 000000000002000000000000000002002000008500 CCF 41,113,133,152,154,159 000000080002000000000000000000002000000500 FCF 27,41,133,152,154 000000000000000000022010000000200000109008 COC 73,85,92,125,148,156,159,163 000000080002000000002010001004202000009508 COCF 27,41,85,92,108,114,125,133,152,154,156,159,163 000000000200000000000010040000200000009008 COC=C 33,92,98,125,156,159,163 000001000000000000000010000000200000009008 COC#C 16,92,125,156,159,163 000020000002000000004000000400002000040010 FC1CC1 21,41,86,106,133,146,164 000020000000000000004000400400002000040010 ClC1CC1 21,86,102,106,133,146,164 000020000020000000004000000400002000040010 BrC1CC1 21,45,86,106,133,146,164 000020040000000000004000000400002000040010 [I]C1CC1 21,26,86,106,133,146,164 000000100004000018000a22800002080600408501 NPPCC 28,42,67,68,81,83,89,93,103,113,123,129,130,150,152,154,159,160 000000100004000018000202a00002080604008508 OPPCC 28,42,67,68,81,89,101,103,113,123,129,130,138,152,154,159,163 000000100000000018000820000000080600408001 NPP=CC 28,67,68,83,93,123,129,130,150,159,160 000000100004000018000830000000080600408001 NPPC[H] 28,42,67,68,83,92,93,123,129,130,150,159,160 000000500020000018000200800002082600008500 BrPPCC 28,30,45,67,68,81,103,113,123,129,130,133,152,154,159 80040000000000000000020288402c01000144a911 CC1CCN1 7,10,81,89,99,103,110,114,115,117,120,136,146,150,152,155,157,159,160,164 800400000000200000000246884020050405447b19 C1CN[C@H]1C(=O)O 7,10,53,81,89,90,94,99,103,110,117,120,122,130,136,138,146,150,152,153,155,156,157,158,160,163,164 00002000000100000240080280c000001020442811 C1CC1(C#N)N 21,40,65,78,83,89,103,110,111,132,141,146,150,155,157,160,164 000000100004000010000222000042080600408501 N=PPCC 28,42,68,81,89,93,113,118,123,129,130,150,152,154,159,160 000000100000000010000822000002080200408501 NP=PCC 28,68,83,89,93,113,123,129,150,152,154,159,160 000000100000000010000200200002080200008508 O=PPCC 28,68,81,101,113,123,129,152,154,159,163 0000001000802000900000062092022c860c0ad708 CCOP(=O)(C(=O)O)O 28,47,53,68,71,89,90,101,105,108,111,113,122,123,125,129,130,135,138,139,145,147,152,153,154,156,158,159,163 000480000000000050200026204060081004040819 C1CC(=NO)C1 10,23,68,70,77,89,90,93,101,110,117,118,123,132,138,146,155,160,163,164 000000000840200000408fce88e8b801042344ab19 CC1(NCCS1)C(=O)N 35,46,53,78,80,81,82,83,87,89,90,91,94,95,99,103,107,109,110,111,115,116,117,119,120,130,136,137,141,146,150,152,153,155,157,159,160,163,164 0000000800000001000026c68870b0010903443119 C1COCN1 27,56,81,82,85,89,90,94,95,99,103,108,109,110,116,117,119,120,128,131,136,137,146,150,152,156,157,160,163,164 000400000000008000008106800020000000040010 C1CC(C1)S 10,63,80,87,89,90,103,117,146,164 000020000200020000080206041000000104201518 C=C1CC1CO 21,33,49,75,81,89,90,98,108,128,138,149,152,154,156,163,164 000020000000000000004206409400002104201518 C1C(C1(Cl)Cl)CO 21,81,86,89,90,102,106,108,111,128,133,138,149,152,154,156,163,164 0000040000100001002000080420d001000100b819 c1cccoc(C)n1 18,44,56,77,91,98,109,116,118,119,120,136,155,156,157,159,160,163,164 000000100000000000000010000000000000008000 PC 28,92,159 000000100000000000000010000000000000008000 P[CH3] 28,92,159 000000100000000000000010000000000000008000 P[C]([H])([H])[H] 28,92,159 000000100200000000000000000000000000000000 P=C 28,33 000000100000000000000020000000080000000001 [P]#[N] 28,93,123,160 000000100000000000000020000000080000000001 P#N 28,93,123,160 000000000000000000000000000000000000400001 [H]N[H] 150,160 0000000000000000010416c08850200308070c351b c1cccn1CCO 64,74,81,82,84,94,95,99,103,108,110,117,120,121,131,136,137,138,146,147,152,154,156,157,160,161,163,164 00000008004000000104b68209100003000708351b c1cccn1CSCO 27,46,64,74,81,82,84,85,87,89,95,96,99,108,120,121,136,137,138,147,152,154,156,157,160,161,163,164 000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160 00000000000000000104148008000203000108a513 CCn1cccc1 64,74,82,84,95,99,113,120,121,136,147,152,154,157,159,160,161,164 00000000000000000000081000000000000040a001 [H]CN 83,92,150,157,159,160 000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160 000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000100020800100040010 C1CCCCCCCCCCCCC1 100,117,127,128,146,164 000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCC1 117,127,128,146,164 000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCCC1 117,127,128,146,164 000000080004000000008200001000000404001508 S[CH2]O 27,42,81,87,108,130,138,152,154,156,163 000000000000000000000000000000000004001008 [H][CH]O 138,156,163 000000100000000000000200000002000000008500 CCP 28,81,113,152,154,159 000000000000000000000000000000000000008000 C[CH][2H] 159 00000000000000000002201000000000000050a001 [H]N(C)C 73,85,92,148,150,157,159,160 000000000000010000000000000000000004000008 [O-][2H] 48,138,163 000000000800000000008580100180100001000012 c1csc2c1csc2 35,80,82,87,95,100,104,119,124,136,161,164 000400000200020000080202041000800004001518 C=C1CC(C1)CO 10,33,49,75,81,89,98,108,127,138,152,154,156,163,164 80040000000000010000000000182a800901849518 CCC1CCO1 7,10,56,107,108,113,115,117,127,128,131,136,146,151,152,154,156,159,163,164 00802000000000010200000000982e800901949518 CCCC1(CO1)C 15,21,56,65,107,108,111,113,114,115,117,127,128,131,136,146,148,151,152,154,156,159,163,164 000020000200000000000000040020800100040410 C=CCCC1CC1 21,33,98,117,127,128,146,154,164 000000000200000000000000000000000000000208 O=C 33,153,163 000000000200000000000000000000008000004228 O=C.O=C 33,135,153,158,163,165 080000000004380038d00020020080090621400015 N1NN[U]NN1 3,42,51,52,53,67,68,69,76,78,79,93,97,119,120,123,129,130,136,141,150,160,162,164 000000000000000000008000000000000000000000 S[CH][2H] 87 000000000000000000000000000000000004000008 [OH] 138,163 000000000000000000000000000000000004000008 [O][3H] 138,163 000000000000000000000000000000000004000008 [3H][O][3H] 138,163 000000000000000000000000000000000000108000 CC 148,159 000000000000000000020000000002000000108400 CCC 73,113,148,154,159 000000000000000000000000000026000000148400 CCCC 113,114,117,146,148,154,159 000000000000000000000000000000000000108020 C.C.C.C.C.C.C.C.C.C.C.C 148,159,165 000000000000000000000000000000000000108000 C1.C1 148,159 fingerprint/inst/unitTests/report.txt0000644000176200001440000000246111571714217017645 0ustar liggesusersRUNIT TEST PROTOCOL -- Thu Jun 2 10:05:03 2011 *********************************************** Number of test functions: 16 Number of errors: 0 Number of failures: 0 1 Test Suite : fingerprint fingerprint Unit Tests - 16 test functions, 0 errors, 0 failures Details *************************** Test Suite: fingerprint fingerprint Unit Tests Test function regexp: ^test.+ Test file regexp: ^runit.+\.[rR]$ Involved directory: /Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests --------------------------- Test file: /Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests/runit.fp.R test.and1: (1 checks) ... OK (0 seconds) test.and2: (1 checks) ... OK (0 seconds) test.distance1: (1 checks) ... OK (0 seconds) test.distance2: (1 checks) ... OK (0 seconds) test.fold1: (1 checks) ... OK (0.01 seconds) test.fold2: (1 checks) ... OK (0.01 seconds) test.fp.balance: (2 checks) ... OK (0 seconds) test.fp.sim.matrix: (1 checks) ... OK (0.01 seconds) test.fp.to.matrix: (1 checks) ... OK (0 seconds) test.fps.reader: (324 checks) ... OK (0.56 seconds) test.new.fp: (1 checks) ... OK (0 seconds) test.not: (2 checks) ... OK (0 seconds) test.or1: (1 checks) ... OK (0 seconds) test.or2: (1 checks) ... OK (0 seconds) test.xor1: (1 checks) ... OK (0.01 seconds) test.xor2: (2 checks) ... OK (0.01 seconds) fingerprint/tests/0000755000176200001440000000000013224262707013750 5ustar liggesusersfingerprint/tests/doRUnit.R0000644000176200001440000000400013224262707015451 0ustar liggesusersif(require("RUnit", quietly=TRUE)) { ## --- Setup --- pkg <- "fingerprint" # <-- Change to package name! if(Sys.getenv("RCMDCHECK") == "FALSE") { ## Path to unit tests for standalone running under Makefile (not R CMD check) ## PKG/tests/../inst/unitTests path <- file.path(getwd(), "..", "inst", "unitTests") } else { ## Path to unit tests for R CMD check ## PKG.Rcheck/tests/../PKG/unitTests path <- system.file(package=pkg, "unitTests") } cat("\nRunning unit tests\n") print(list(pkg=pkg, getwd=getwd(), pathToUnitTests=path)) library(package=pkg, character.only=TRUE) ## If desired, load the name space to allow testing of private functions ## if (is.element(pkg, loadedNamespaces())) ## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3) ## ## or simply call PKG:::myPrivateFunction() in tests ## --- Testing --- ## Define tests testSuite <- defineTestSuite(name=paste(pkg, "fingerprint Unit Tests"), dirs=path) ## Run tests <- runTestSuite(testSuite) ## Default report name pathReport <- file.path(path, "report") ## Report to stdout and text files cat("------------------- UNIT TEST SUMMARY ---------------------\n\n") printTextProtocol(tests, showDetails=FALSE) #printTextProtocol(tests, showDetails=FALSE, # fileName=paste(pathReport, "Summary.txt", sep="")) #printTextProtocol(tests, showDetails=TRUE, # fileName=paste(pathReport, ".txt", sep="")) ## Report to HTML file #printHTMLProtocol(tests, fileName=paste(pathReport, ".html", sep="")) ## Return stop() to cause R CMD check stop in case of ## - failures i.e. FALSE to unit tests or ## - errors i.e. R errors tmp <- getErrors(tests) if(tmp$nFail > 0 | tmp$nErr > 0) { stop(paste("\n\nunit testing failed (#test failures: ", tmp$nFail, ", #R errors: ", tmp$nErr, ")\n\n", sep="")) } } else { warning("cannot run unit tests -- package RUnit is not available") } fingerprint/src/0000755000176200001440000000000013224262715013374 5ustar liggesusersfingerprint/src/fpdistance.c0000644000176200001440000000330713224262715015663 0ustar liggesusers#include #define X(_m,_i,_j,_nrow) _m[ _i + _nrow * _j ] #define METRIC_TANIMOTO 1 #define METRIC_EUCLIDEAN 2 double d_tanimoto(double*,double*,int); double d_euclidean(double*,double*,int); void m_tanimoto(double *m, int *nrow, double *ret) { int i,j; for (i = 0; i < *nrow; i++) { for (j = i+1; j < *nrow; j++) { double mij = X(m, i,j, *nrow); double mii = X(m, i,i, *nrow); double mjj = X(m, j,j, *nrow); X(ret, i, j, *nrow) = X(ret, j, i, *nrow) = mij / (mii+mjj-mij); } } return; } /** fp1 and fp2 should be an array of 1's and 0's, of length equal to the size of the fingerprint **/ void fpdistance(double *fp1, double *fp2, int *nbit, int *metric, double *ret) { double r = 0.0; switch(*metric) { case METRIC_TANIMOTO: r = d_tanimoto(fp1, fp2, *nbit); break; case METRIC_EUCLIDEAN: r = d_euclidean(fp1, fp2, *nbit); } *ret = r; return; } /** http://www.daylight.com/dayhtml/doc/theory/theory.finger.html **/ double d_tanimoto(double *fp1, double *fp2, int nbit) { int i; int nc = 0; int na = 0; int nb = 0; if (nbit <= 0) return(-1.0); for (i = 0; i < nbit; i++) { if (fp1[i] == 1 && fp2[i] == 1) nc++; if (fp1[i] == 1 && fp2[i] == 0) na++; if (fp2[i] == 1 && fp1[i] == 0) nb++; } return ((double) nc) / (double) (na + nb + nc); } /** http://www.daylight.com/dayhtml/doc/theory/theory.finger.html **/ double d_euclidean(double *fp1, double *fp2, int nbit) { int i; int nc = 0; int nd = 0; if (nbit <= 0) return(-1.0); for (i = 0; i < nbit; i++) { if (fp1[i] == 1 && fp2[i] == 1) nc++; if (fp1[i] == 0 && fp2[i] == 0) nd++; } return sqrt(((double) nc + (double) nd) / (double) nbit); } fingerprint/src/registerDynamicSymbol.c0000644000176200001440000000030613224262715020056 0ustar liggesusers#include #include #include void R_init_markovchain(DllInfo* info) { R_registerRoutines(info, NULL, NULL, NULL, NULL); R_useDynamicSymbols(info, TRUE); } fingerprint/src/readfps.c0000644000176200001440000000434313224262715015170 0ustar liggesusers#include #include /* Bulk of the code provided by Andrew Dalke, modified by me to be usable from R */ int bit_is_on(char*,int); #define charmask(c) ((unsigned char)((c) & 0xff)) static int to_int(int c) { if (c >= '0' && c <= '9') { return c - '0'; } if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } return -1; } SEXP parse_hex(SEXP hexstr, SEXP hexlen) { int i,j; const char *argbuf; int arglen; argbuf = (const char*) CHAR(STRING_ELT(hexstr,0)); arglen = INTEGER(hexlen)[0]; char* retbuf = (char*) R_alloc(arglen/2, sizeof(char)); for (i=j=0; i < arglen; i += 2) { int top = to_int(charmask(argbuf[i])); int bot = to_int(charmask(argbuf[i+1])); if (top == -1 || bot == -1) { return R_NilValue; } retbuf[j++] = (top << 4) + bot; } // determine the number of on bits int n_on = 0; for (i = 0; i < arglen*4; i++) if (bit_is_on(retbuf, i)) n_on++; // now, we save the positions of the bits int *bitpos = (int*) R_alloc(n_on, sizeof(int)); j = 0; for (i = 0; i < arglen*4; i++) { if (bit_is_on(retbuf, i)) bitpos[j++] = i; } SEXP retsexp; PROTECT(retsexp = allocVector(INTSXP, n_on)); for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i]; UNPROTECT(1); return(retsexp); } int bit_is_on(char *fp, int B) { return fp[B / 8] >> (B%8) & 0x01; } SEXP parse_jchem_binary(SEXP bstr, SEXP len) { int i,j; const char *argbuf; int arglen; argbuf = (const char*) CHAR(STRING_ELT(bstr,0)); arglen = INTEGER(len)[0]; // determine number of 1's int n_on = 0; i = 0; while (i < arglen) { if (argbuf[i++] == 9) break; } int startPos = i; while (i < arglen) { if (argbuf[i++] == 49) n_on++; } // no get the actual bit positions int *bitpos = (int*) R_alloc(n_on, sizeof(int)); int bitIdx = 0; j = 0; for (i = startPos; i < arglen; i++) { int c = argbuf[i]; if (c != 49 && c != 48) continue; if (c == 49) bitpos[j++] = bitIdx; bitIdx++; } SEXP retsexp; PROTECT(retsexp = allocVector(INTSXP, n_on)); for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i]; UNPROTECT(1); return(retsexp); } fingerprint/NAMESPACE0000644000176200001440000000113513160063475014025 0ustar liggesusersimportFrom("methods", "new") exportClasses("fingerprint") exportClasses("featvec") exportClasses("feature") exportMethods("fold", "euc.vector", "distance", "random.fingerprint", "as.character", "length", "feature", "count", 'feature<-', 'count<-') export("fp.sim.matrix", "fp.to.matrix", "fp.factor.matrix", "fp.read.to.matrix", "fp.read","shannon", ##"featvec.to.binaryfp", "moe.lf", "bci.lf", "cdk.lf", "ecfp.lf", "fps.lf", "jchem.binary.lf", "bit.spectrum", "balance", "bit.importance") useDynLib(fingerprint,.registration = TRUE) fingerprint/R/0000755000176200001440000000000013156525101013001 5ustar liggesusersfingerprint/R/balance.R0000644000176200001440000000072011447374434014524 0ustar liggesusersbalance <- function(fplist) { if (is.list(fplist)) { lapply(fplist, function(fp) { compl <- !fp new('fingerprint', nbit = 2 * length(fp), bits = c(fp@bits, compl@bits+length(fp)), provider='R', name='balanced') }) } else { fp <- fplist compl <- !fp new('fingerprint', nbit = 2 * length(fp), bits = c(fp@bits, compl@bits+length(fp)), provider='R', name='balanced') } } fingerprint/R/featurefp.R0000644000176200001440000000364712224262153015117 0ustar liggesusers## A feature fingerprint will be a vector of feature objects setClass("featvec", representation(features="list", provider="character", name="character", misc="list"), validity=function(object) { ## features must be a list of feature objects klasses <- unique(sapply(object@features, class)) if (length(klasses) != 1 || klasses != 'feature') return("Must supply a list of 'feature' objects") iss4s <- sapply(object@features, isS4) if (!all(iss4s)) return("Must supply a list of 'feature' objects") return(TRUE) }, prototype(features=list(), provider="", name="", misc=list())) setMethod('show', 'featvec', function(object) { cat("Feature fingerprint\n") cat(" name = ", object@name, "\n") cat(" source = ", object@provider, "\n") cat(" features = ", paste(sapply(object@features, as.character), collapse=' '), "\n") }) setMethod('as.character', 'featvec', function(x) { return(paste(sapply(x@features, as.character), collapse=' ')) }) setMethod("length", "featvec", function(x) { length(x@features) }) ## featvec.to.binaryfp <- function(fps, bit.length = 256) { ## if (!all(sapply(fps, class) == 'featvec')) ## stop("Must supply a list of feature vector fingerprints") ## ## get all the features ## features <- sort(unique(unlist(lapply(fps, as.numeric)))) ## nbit <- length(features) ## if (nbit %% 2 == 1) nbit <- nbit + 1 ## ## based on the entire feature set, convert original fps to binary fps ## fps <- lapply(fps, function(x) { ## bitpos <- match(as.numeric(x), features) ## new("fingerprint", nbit=nbit, folded=FALSE, provider=x@provider,name=x@name, bits=bitpos) ## }) ## return(fps) ## } fingerprint/R/bitimp.R0000644000176200001440000000055311447374434014427 0ustar liggesusersbit.importance <- function(actives, background) { bs.actives <- bit.spectrum(actives) bs.background <- bit.spectrum(background) m <- length(actives) n <- length(background) pa <- (m*bs.actives+bs.background)/(m+1) pb <- (n*bs.background+bs.actives)/(n+1) kl <- pa * log(pa/pb) + (1-pa) * log( (1-pa)/(1-pb) ) kl[is.nan(kl)] <- NA return(kl) } fingerprint/R/feature.R0000644000176200001440000000342113003502273014553 0ustar liggesusers## Define a feature and its count setClass("feature", contains = 'integer', representation(feature='character', count='integer'), validity=function(object) { if (is.na(object@feature) || is.null(object@feature)) return("feature must be a string") if (object@count < 0) return("count must be zero or a positive integer") return(TRUE) }, prototype(feature='', count=as.integer(1)) ) setMethod('show', 'feature', function(object) { cat(sprintf('%s:%d', object@feature, object@count), '\n') }) setMethod('as.character', signature(x='feature'), function(x) sprintf("%s:%d", x@feature, x@count)) setMethod('c', signature(x='feature'), function(x, ..., recursive=FALSE) { elems <- list(x, ...) ret <- list() for (i in seq_along(elems)) { ret[[i]] <- new("feature", feature=elems[[i]]@feature, count=as.integer(elems[[i]]@count)) } return(ret) }) ## getters/setters setGeneric("feature", function(object) standardGeneric("feature")) setMethod("feature", "feature", function(object) object@feature) setGeneric("feature<-", function(this, value) standardGeneric("feature<-")) setReplaceMethod("feature", signature=signature("feature", "character"), function(this, value) { this@feature <- value this }) setGeneric("count", function(object) standardGeneric("count")) setMethod("count", "feature", function(object) object@count) setGeneric("count<-", function(this, value) standardGeneric("count<-")) setReplaceMethod("count", signature=signature("feature", "numeric"), function(this, value) { this@count <- as.integer(value) this }) fingerprint/R/ops.R0000644000176200001440000000311611447374434013742 0ustar liggesuserssetMethod("&", c("fingerprint", "fingerprint"), function(e1, e2) { if (e1@nbit != e2@nbit) stop("fp1 & fp2 must of the same bit length") andbits <- intersect(e1@bits, e2@bits) new("fingerprint", bits=andbits, nbit=e1@nbit, provider="R") }) setMethod("|", c("fingerprint", "fingerprint"), function(e1, e2) { if (e1@nbit != e2@nbit) stop("fp1 & fp2 must of the same bit length") orbits <- union(e1@bits, e2@bits) new("fingerprint", bits=orbits, nbit=e1@nbit, provider="R") }) setMethod("!", c("fingerprint"), function(x) { bs <- 1:(x@nbit) if (length(x@bits) > 0) b <- bs[ -x@bits ] else b <- bs ret <- new("fingerprint", bits=b, nbit=x@nbit, provider="R") return(ret) }) setMethod("xor", c("fingerprint", "fingerprint"), function(x,y) { if (x@nbit != y@nbit) stop("e1 & e2 must of the same bit length") tmp1 <- rep(FALSE, x@nbit) tmp2 <- rep(FALSE, y@nbit) tmp1[x@bits] <- TRUE tmp2[y@bits] <- TRUE tmp3 <- xor(tmp1,tmp2) xorbits <- which(tmp3) new("fingerprint", bits=xorbits, nbit=x@nbit, provider="R") }) fingerprint/R/fingerprint.R0000644000176200001440000000425411571707721015471 0ustar liggesuserssetClass("fingerprint", representation(bits="numeric", nbit="numeric", folded="logical", provider="character", name="character", misc="list"), validity=function(object) { if (any(object@bits > object@nbit)) return("Bit positions were greater than the specified bit length") else return(TRUE) }, prototype(bits=c(), nbit=0, folded=FALSE, provider="", name="", misc=list())) #setGeneric("show", function(object) standardGeneric("show")) setMethod("show", "fingerprint", function(object) { cat("Fingerprint object\n") cat(" name = ", object@name, "\n") cat(" length = ", object@nbit, "\n") cat(" folded = ", object@folded, "\n") cat(" source = ", object@provider, "\n") cat(" bits on = ", paste(sort(object@bits), collapse=' '), "\n") }) setMethod('as.character', "fingerprint", function(x) { s <- numeric(x@nbit) s[x@bits] <- 1 paste(s,sep='',collapse='') }) setMethod("length", "fingerprint", function(x) { x@nbit }) parseCall <- function (obj) { if (class(obj) != "call") { stop("Must supply a 'call' object") } srep <- deparse(obj) if (length(srep) > 1) srep <- paste(srep, sep = "", collapse = "") fname <- unlist(strsplit(srep, "\\("))[1] func <- unlist(strsplit(srep, paste(fname, "\\(", sep = "")))[2] func <- unlist(strsplit(func, "")) func <- paste(func[-length(func)], sep = "", collapse = "") func <- unlist(strsplit(func, ",")) vals <- list() nms <- c() cnt <- 1 for (args in func) { arg <- unlist(strsplit(args, "="))[1] val <- unlist(strsplit(args, "="))[2] arg <- gsub(" ", "", arg) val <- gsub(" ", "", val) vals[[cnt]] <- val nms[cnt] <- arg cnt <- cnt + 1 } names(vals) <- nms vals } fingerprint/R/bitspec.R0000644000176200001440000000131611551603371014561 0ustar liggesusersbit.spectrum <- function(fplist) { if (class(fplist) != 'list') stop("Must provide a list of fingerprint objects") if (any(unlist(lapply(fplist, class)) != 'fingerprint')) stop("Must provide a list of fingerprint objects"); nbit <- length(fplist[[1]]) spec <- numeric(nbit) for (i in 1:length(fplist)) { bits <- fplist[[i]]@bits spec[bits] <- spec[bits]+1 } spec / length(fplist) } shannon <- function(fplist) { if (class(fplist) != 'list') stop("Must provide a list of fingerprint objects") if (any(unlist(lapply(fplist, class)) != 'fingerprint')) stop("Must provide a list of fingerprint objects"); bs <- bit.spectrum(fplist) bs <- bs[ bs != 0 ] -1 * sum( bs * log2(bs) ) } fingerprint/R/misc.R0000644000176200001440000002110012777037330014063 0ustar liggesusers setGeneric("fold", function(fp) standardGeneric("fold")) setMethod("fold", "fingerprint", function(fp) { size <- fp@nbit if (size %% 2 != 0) { stop('Need to supply a fingerprint of even numbered length') } bfp <- rep(FALSE, size) bfp[fp@bits] <- TRUE subfplen <- size/2 b1 <- which(bfp[1:subfplen]) b2 <- which(bfp[(subfplen+1):size]) subfp1 <- new("fingerprint", nbit=subfplen, bits=b1, provider="R"); subfp2 <- new("fingerprint", nbit=subfplen, bits=b2, provider="R") foldedfp <- subfp1 | subfp2 foldedfp@folded <- TRUE return(foldedfp) }) setGeneric("euc.vector", function(fp) standardGeneric("euc.vector")) setMethod("euc.vector", "fingerprint", function(fp) { coord <- rep(0,length(fp)) coord[fp@bits] <- 1.0 / sqrt(length(fp)) coord }) setGeneric("distance", function(fp1,fp2,method,a,b) standardGeneric("distance")) setMethod("distance", c("featvec", "featvec", "missing", "missing", "missing"), function(fp1, fp2) { distance(fp1, fp2, "tanimoto" ) }) setMethod("distance", c("featvec", "featvec", "character", "missing", "missing"), function(fp1, fp2, method=c("tanimoto", "dice", "robust")) { method <- match.arg(method) n1 <- length(fp1) n2 <- length(fp2) ## extract the feature strings, ignoring counts for now f1 <- sapply(fp1@features, function(x) x@feature) f2 <- sapply(fp2@features, function(x) x@feature) n12 <- length(intersect(f1,f2)) if (method == 'tanimoto') { return(n12/(n1+n2-n12)) } else if (method == "robust") { return(0.5 + 0.5 * n12 * n12 / (n1*n2)) } else if (method == "dice") { return(2.0 * n12 / (n1+n2)) } }) setMethod("distance", c("fingerprint", "fingerprint", "missing", "missing", "missing"), function(fp1,fp2) { distance(fp1,fp2,"tanimoto") }) setMethod("distance", c("fingerprint", "fingerprint", "character", "numeric", "numeric"), function(fp1, fp2, method="tversky", a, b) { if (!is.null(method) && !is.na(method) && method != "tversky") distance(fp1, fp2, method) if ( length(fp1) != length(fp2)) stop("Fingerprints must of the same bit length") if (a < 0 || b < 0) stop("a and b must be positive") tmp <- fp1 & fp2 xiy <- length(tmp@bits) tmp <- fp1 | fp2 xuy <- length(tmp@bits) x <- length(fp1@bits) y <- length(fp2@bits) return( xiy / (a*x + b*y + (1-a-b)*xiy ) ) }) setMethod("distance", c("fingerprint", "fingerprint", "character", "missing", "missing"), function(fp1,fp2, method=c('tanimoto', 'euclidean', 'mt', 'simple', 'jaccard', 'dice', 'russelrao', 'rodgerstanimoto','cosine', 'achiai', 'carbo', 'baroniurbanibuser', 'kulczynski2', 'hamming', 'meanHamming', 'soergel', 'patternDifference', 'variance', 'size', 'shape', 'hamann', 'yule', 'pearson', 'dispersion', 'mcconnaughey', 'stiles', 'simpson', 'petke', 'stanimoto', 'seuclidean' )) { if (method == 'tversky') stop("If Tversky metric is desired, must specify a and b") if ( length(fp1) != length(fp2)) stop("Fingerprints must of the same bit length") method <- match.arg(method) n <- length(fp1) if (method == 'tanimoto') { f1 <- numeric(n) f2 <- numeric(n) f1[fp1@bits] <- 1 f2[fp2@bits] <- 1 sim <- 0.0 ret <- .C("fpdistance", as.double(f1), as.double(f2), as.integer(n), as.integer(1), as.double(sim), PACKAGE="fingerprint") return (ret[[5]]) } else if (method == 'euclidean') { f1 <- numeric(n) f2 <- numeric(n) f1[fp1@bits] <- 1 f2[fp2@bits] <- 1 sim <- 0.0 ret <- .C("fpdistance", as.double(f1), as.double(f2), as.integer(n), as.integer(2), as.double(sim), PACKAGE="fingerprint") return (ret[[5]]) } size <- n ## in A & B tmp <- fp1 & fp2 c <- length(tmp@bits) ## in A not in B tmp <- (fp1 | fp2) & !fp2 a <- length(tmp@bits) ## in B not in A tmp <- (fp1 | fp2) & !fp1 b <- length(tmp@bits) ## not in A, not in B tmp <- !(fp1 | fp2) d <- length(tmp@bits) dist <- NULL ## Simlarity if (method == 'stanimoto') { dist <- c / (a+b+c) } else if (method == 'seuclidean') { dist <- sqrt((d+c) / (a+b+c+d)) } else if (method == 'dice') { dist <- c / (.5*a + .5*b + c) } else if (method == 'mt') { t1 <- c/(size-d) t0 <- d/(size-c) phat <- ((size-d) + c)/(2*size) dist <- (2-phat)*t1/3 + (1+phat)*t0/3 } else if (method == 'simple') { dist <- (c+d)/n } else if (method == 'jaccard') { dist <- c/(a+b+c) } else if (method == 'russelrao') { dist <- c/size } else if (method == 'rodgerstanimoto') { dist <- (c+d)/(2*a+2*b+c+d) } else if (method == 'cosine' || method == 'achiai' || method == 'carbo') { dist <- c/sqrt((a+c)*(b+c)) } else if (method == 'baroniurbanibuser') { dist <- (sqrt(c*d)+c)/(sqrt(c*d)+a+b+c) } else if (method == 'kulczynski2') { dist <- .5*(c/(a+c)+c/(b+c)) } ## Dissimilarity else if (method == 'hamming') { dist <- a+b } else if (method == 'meanHamming') { dist <- (a+b)/(a+b+c+d) }else if (method == 'soergel') { dist <- (a+b)/(a+b+c) } else if (method == 'patternDifference') { dist <- (a*b)/(a+b+c+d)^2 } else if (method == 'variance') { dist <- (a+b)/(4*n) } else if (method == 'size') { dist <- (a-b)^2/n^2 } else if (method == 'shape') { dist <- (a+b)/n-((a-b)/(n))^2 } ## Composite else if (method == 'hamann') { dist <- (c+d-a-b)/(a+b+c+d) } else if (method == 'yule') { dist <- (c*d-a*b)/(c*d+a*b) } else if (method == 'pearson') { dist <- (c*d-a*b)/sqrt((a+c)*(b+c)*(a+d)*(b+d)) } else if (method == 'dispersion') { dist <- (c*d-a*b)/n^2 } else if (method == 'mcconaughey') { dist <- (c^2-a*b)/((a+c)*(b+c)) } else if (method == 'stiles') { dist <- log10(n*(abs(c*d-a*b)-n/2)^2/((a+c)*(b+c)*(a+d)*(b+d))) } ## Asymmetric else if (method == 'simpson') { dist <- c/min((a+c),(b+c)) } else if (method == 'petke') { dist <- c/max((a+c),(b+c)) } dist }) setGeneric("random.fingerprint", function(nbit, on) standardGeneric("random.fingerprint")) setMethod("random.fingerprint", c("numeric", "numeric"), function(nbit, on) { if (nbit <= 0) stop("Bit length must be positive integer") if (on <= 0) stop("Number of bits to be set to 1 must be positive integer") bits <- sample(1:nbit, size=on) new("fingerprint", nbit=nbit, bits=bits, provider="R", folded=FALSE) }) fingerprint/R/read.R0000644000176200001440000000714613156525101014047 0ustar liggesusersjchem.binary.lf <- function(line) { molid <- strsplit(line, "\t")[[1]][1] bitpos <- .Call("parse_jchem_binary", as.character(line), as.integer(nchar(line)) ) if (is.null(bitpos)) return(NULL) list(molid, bitpos+1, list()) ## we add 1, since C does bit positions from 0 } fps.lf <- function(line) { toks <- strsplit(line, "\\s")[[1]]; bitpos <- .Call("parse_hex", as.character(toks[1]), as.integer(nchar(toks[1]))) if (is.null(bitpos)) return(NULL) if (length(toks) > 2) { misc <- list(toks[-c(1,2)]) } else { misc <- list() } list(toks[2], bitpos+1, misc) ## we add 1, since C does bit positions from 0 } cdk.lf <- function(line) { p <- regexpr("{([0-9,\\s]*)}",line,perl=T) s <- gsub(',','',substr(line, p+1, p+attr(p,"match.length")-2)) s <- lapply( strsplit(s,' '), as.numeric ) molid <- gsub("\\s+","", strsplit(line, "\\{")[[1]][1]) list(molid, s[[1]], list()) } moe.lf <- function(line) { p <- regexpr("\"([0-9\\s]*)\"",line, perl=T) s <- substr(line, p+1, p+attr(p,"match.length")-2) s <- lapply( strsplit(s,' '), as.numeric ) list(NA, s[[1]], list()) } bci.lf <- function(line) { tokens <- strsplit(line, '\\s')[[1]] name <- tokens[1] tokens <- tokens[-c(1, length(tokens), length(tokens)-1)] list(name, as.numeric(tokens), list()) } ecfp.lf <- function(line) { tokens <- strsplit(line, '\\s')[[1]] name <- tokens[1] tokens <- tokens[-1] list(name, tokens, list()) } ## TODO we should be iterating over lines and not reading ## them all in fp.read <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE) { lf.name <- deparse(substitute(lf)) provider <- lf.name fplist <- list() fcon <- file(description=f,open='r') lines = readLines(fcon,n=-1) if (header && lf.name != 'fps.lf') lines = lines[-1] if (lf.name == 'fps.lf') { binary <- TRUE size <- NULL ## process the header block nheaderline = 0 for (line in lines) { if (substr(line,1,1) != '#') break nheaderline <- nheaderline + 1 if (nheaderline == 1 && length(grep("#FPS1", line)) != 1) stop("Invalid FPS format") if (length(grep("#num_bits", line)) == 1) size <- as.numeric(strsplit(line, '=')[[1]][2]) if (length(grep("#software", line)) == 1) provider <- as.character(strsplit(line, '=')[[1]][2]) } lines <- lines[ (nheaderline+1):length(lines) ] if (is.null(size)) { # num_bit size <- nchar(strsplit(line, '\\s')[[1]][1]) * 4 } } c = 1 for (line in lines) { dat <- lf(line) if (is.null(dat)) { warning(sprintf("Couldn't parse: %s", line)) next } if (is.na(dat[[1]])) name <- "" else name <- dat[[1]] misc <- dat[[3]] ## usually empty if (binary) { fplist[[c]] <- new("fingerprint", nbit=size, bits=as.numeric(dat[[2]]), folded=FALSE, provider=provider, name=name, misc=misc) } else { ## convert the features to 'feature' objects feats <- lapply(dat[[2]], function(x) new("feature", feature=x)) fplist[[c]] <- new("featvec", features=feats, provider=provider, name=name, misc=misc) } c <- c+1 } close(fcon) fplist } ## Need to supply the length of the bit string since fp.read does ## not provide that information fp.read.to.matrix <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE) { fplist <- fp.read(f, size, lf, header) fpmat <- fp.to.matrix(fplist) fpmat } fingerprint/R/zzz.R0000644000176200001440000000004112224316213013751 0ustar liggesusers.onLoad <- function(lib, pkg) {} fingerprint/R/matrix.R0000644000176200001440000000417013156240271014434 0ustar liggesusersfp.sim.matrix <- function(fplist, fplist2=NULL, method='tanimoto') { sim <- NA if (!is.null(fplist2)) { sim <- do.call('rbind', lapply(fplist, function(fp) unlist(lapply(fplist2, function(x) distance(x,fp, method=method))))) ##diag(sim) <- 1.0 return(sim) } if (method == 'dice') { sim <- .dice.sim.mat(fplist) } else if (method == 'tanimoto') { sim <- .tanimoto.sim.mat(fplist) } else { sim <- matrix(0,nrow=length(fplist), ncol=length(fplist)) for (i in 1:(length(fplist)-1)) { v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method)) sim[i,(i+1):length(fplist)] <- v sim[(i+1):length(fplist),i] <- v } } diag(sim) <- 1.0 return(sim) } ## Takes the fingerprints, P bits, for a set of N molecules supplied as ## a list structure and creates an N x P matrix fp.to.matrix <- function( fplist ) { size <- fplist[[1]]@nbit m <- matrix(0, nrow=length(fplist), ncol=size) cnt <- 1 for ( i in fplist ) { m[cnt,i@bits] <- 1 cnt <- cnt + 1 } m } fp.factor.matrix <- function( fplist ) { size <- fplist[[1]]@nbit m <- data.frame(fp.to.matrix(fplist)) m[] <- lapply(m, factor, levels=0:1) m } .dice.sim.mat <- function(fplist) { m <- fp.to.matrix(fplist) mat<-m%*%t(m) len<-length(m[,1]) s<-mat.or.vec(len,len) rs<-rowSums(m) #since its is binary just add the row values. for (i in 1:(len-1)) { for (j in (i+1):len) { s[i,j]=(2*(mat[i,j])/(rs[i]+rs[j])) s[j,i]=s[i,j] } } diag(s) <- 1.0 return(s) } .tanimoto.sim.mat <- function(fplist){ m <- fp.to.matrix(fplist) mat<-m%*%t(m) len<-length(m[,1]) s<-mat.or.vec(len,len) ret <- .C("m_tanimoto", as.double(mat), as.integer(len), as.double(s), PACKAGE="fingerprint") ret <- matrix(ret[[3]], nrow=len, ncol=len, byrow=TRUE) return(ret) ## for (i in 1:len){ ## for (j in 1:len){ ## s[i,j]<- mat[i,j]/(mat[i,i]+mat[j,j]-mat[i,j]) # Formula for Tanimoto Calculation ## } ## } ## return(s) } fingerprint/MD50000644000176200001440000000457413224521352013122 0ustar liggesusersaa63c800ee74199d6ec4219e2803a9da *ChangeLog baf4c2adc631eea046d95eaa804c1ade *DESCRIPTION 273a1345a5199deee0497cc556812bde *INDEX 836321b1e72589b01e942d88414a1401 *NAMESPACE f28cc7591c8d64e99d90d9f91383a1c6 *R/balance.R 2c642bc1f05561df93d2c76a9fae1748 *R/bitimp.R 0c94477e5334b69df55f95f0e5b86afc *R/bitspec.R 702410c34c11fa12b05cda292e710a5b *R/feature.R c4e0d4b459b8de6a1a982b6cc873d964 *R/featurefp.R b1ea2b3bb57e131aa33a2b58ebc8c85a *R/fingerprint.R 654f051f7cffd07f87bab08eef19dd14 *R/matrix.R 2ff890be163c9de10671cc8f11e0b868 *R/misc.R 8972e18553803e569bf2eb11d586f7af *R/ops.R b57f70112f24a45b0b58464b880003c4 *R/read.R fe6078d069c08a91d845aaf6fafc073f *R/zzz.R 334f0de6ed55dc79f59addf091097353 *inst/unitTests/Makefile f22df9f3397f61d3e9679ad049e14c83 *inst/unitTests/bits.fps 23d32aef831e39a3c1dfe0f0c1a442b7 *inst/unitTests/report.html 5b24b494bcddc74dc9ec078e32306dc5 *inst/unitTests/report.txt 745f49fd8ffeedc56e89413c6487279d *inst/unitTests/reportSummary.txt c7b11a520d31f1e0a0a38ccaee75b0f1 *inst/unitTests/runit.fp.R c7fc4f4bf4a1061b4d609cf20c3b0f34 *inst/unitTests/test.ecfp 0efea50e6b9d5435b4b9704d4391cacd *man/balance.Rd d76542452b5b085238f6708e4c24593c *man/bitimp.Rd d839bab10da6f334946a447b19f4b559 *man/bitspec.Rd b122ce42392d47066017465710d513f7 *man/c.Rd 9807f646ce5102d3e5ea14a9c7e77cbf *man/count-methods.Rd 6b353c421c70366b7eab59c1e7633338 *man/distance-methods.Rd 5c5c1ad2b1efbef8378736a3155f3ace *man/facmat.Rd 8ae5afaebabcc29868fa0d10e6fa0f88 *man/feature-methods.Rd d8c5a144cb28883bc0ce058b79ace5f6 *man/feature.Rd 79473fa713f0131f2a8eeaf07a92a5cb *man/featvec.Rd 49b6393f52ba81a7d27388712fa5311a *man/fingerprint.Rd 6494638b1a4553d6535a5ffb7ce3bd17 *man/fold.Rd 628d41caa54adf763db1ab716b5733b8 *man/fplogical.Rd 37e3fca205a20eed21d28da853c0f3ef *man/length.Rd f195b3813a235780f5de974030e1635d *man/linefunc.Rd 4ff20ca34f27226cf8eea5286100602f *man/mat.Rd 9c6c3ba8ac30ddb3b14803c93dce2a74 *man/read.Rd ce65a09d4194e1d28a28c83a6bc77866 *man/rndfp.Rd a0e79a3d465933089eed74a7f56ac5c4 *man/shannon.Rd 9f69d49ea6ad6a8b2c69b696a8c2715a *man/show.Rd a0c7e4abd4c8f6c862240cc08764cd50 *man/sim.Rd 531d7aae2eb984e674968da4b856a60e *man/string.Rd ca09bc634ca0b37f30dda125555410f6 *man/vec.Rd 7afe32c102f466fbc046191637bc6229 *src/fpdistance.c 3c7a939828404a253b652bf1191c9f60 *src/readfps.c f0b1c7e2062664dbefc7d59abc6a91c4 *src/registerDynamicSymbol.c 7dae096c648f491093bf635c811d5658 *tests/doRUnit.R fingerprint/DESCRIPTION0000644000176200001440000000225113224521352014306 0ustar liggesusersPackage: fingerprint Version: 3.5.7 Date: 2018-01-06 Title: Functions to Operate on Binary Fingerprint Data Author: Rajarshi Guha Maintainer: Rajarshi Guha BugReports: https://github.com/rajarshi/cdkr/issues Description: Functions to manipulate binary fingerprints of arbitrary length. A fingerprint is represented by an object of S4 class 'fingerprint' which is internally represented a vector of integers, such that each element represents the position in the fingerprint that is set to 1. The bitwise logical functions in R are overridden so that they can be used directly with 'fingerprint' objects. A number of distance metrics are also available (many contributed by Michael Fadock). Fingerprints can be converted to Euclidean vectors (i.e., points on the unit hypersphere) and can also be folded using OR. Arbitrary fingerprint formats can be handled via line handlers. Currently handlers are provided for CDK, MOE and BCI fingerprint data. License: GPL Depends: methods LazyLoad: yes Suggests: RUnit NeedsCompilation: yes Packaged: 2018-01-07 00:11:57 UTC; guhar Repository: CRAN Date/Publication: 2018-01-07 22:44:58 UTC fingerprint/ChangeLog0000644000176200001440000001105212233504637014357 0ustar liggesusers2013-10-28 Guha * src/fpdistance.c (m_tanimoto): Reduced the number of iterations based on comments from John May 2013-10-27 Guha * R/matrix.R (.tanimoto.sim.mat): Updated Tanimoto matrix code form Abhik Seal to use C code for nested loops. 2013-10-22 Rajarshi Guha * R/matrix.R (fp.sim.matrix): Updated to use the new matrix multiplication based Tanimoto calculation for similarity matrices contributed by Abhik Seal 2013-10-06 Rajarshi Guha * R/zzz.R (.onLoad): Removed unecessary usage of require * R/misc.R: Removed debug code 2013-10-05 Rajarshi Guha * Added the 'feature' class to represent alphanumeric features (usually substructures but could be arbitrary hashes) and their counts * Updated the 'featvec' fingerprint class to use 'feature' objects * Removed featvec.to.binary since the featvec fingerprint type can also include non numeric features 2013-04-05 Rajarshi Guha * Updated package to remove use of deprecated methods 2012-10-30 Rajarshi Guha * R/matrix.R (fp.sim.matrix): Added code provided by Abhik Seal to speed up pairwise similarity matrix calculation when the Dice metric is specified. 2012-02-21 Rajarshi Guha * R/read.R (jchem.binary.lf): Added a line parser for JChem binary string formatted fingerprints. This is based on a C function to parse the fingerprint portion of a line 2011-07-26 Rajarshi Guha * man/sim.Rd: Updated man page for fp.sim.matrix to indicate the use of two fingerprint lists * R/matrix.R (fp.sim.matrix): Updated similarity matrix calculation to support cross-similarity (ie, similarity matrix from two (possibly different lengths) lists of fingerprints 2011-06-03 Rajarshi Guha * src/fpdistance.c: Cleaned up uncessary headers and unused variables * src/readfps.c: Cleaned up unecessary headers 2011-06-02 Rajarshi Guha * R/read.R (fp.read): Updated line functions to return a third component that can be used to return the remainder of a line if a format allows other items than just a title and fingerprint. (fps.lf): Updated FPS line function to actually return remaining components of a fingerprint line. Updated main reader to set the misc field of a fingerprint object to hold this list 2011-06-01 Rajarshi Guha * src/readfps.c (parse_hex): Added a C function to parse hex-encoded fingerprints from the FPS format and return the bit positions that are set to 1 * R/read.R (fps.lf): Added a new line parser to handle the fingerprint lines from the FPS format (fp.read): Updated main fingerprint reader to handle the multi-line header from FPS format fingerprint files 2011-04-14 Rajarshi Guha * R/bitspec.R (shannon): Added a method to evaluate the Shannon entropy for a list of fingerprints. Also added a man page 2010-11-07 Rajarshi Guha * R/read.R: Updaetd the CDK line parser to extract the molecule id and return it so that the fingerpint object contains the molecule id 2010-10-20 Rajarshi Guha * R/read.R (fp.read): Updated to support reading of feature fingerprints. The user must now indicate whether a binary or a feature fingerprint is being read. Also added a new line parsing function to process generic feature fingerprints. * R/misc.R: Added similarity metrics for feature fingerprints. Currently tanimoto, dice and robust metrics are supported. * R/featurefp.R: Added a class to support fingerprints that are represented as lists of numeric or string features such as circular fingerprints. Also added a method to convert a collection of feature fingerprints to a fixed-length binary string representation. * man/featvec.Rd: Added man page for te featvec class, used to represent fingerprints characterized as numeric or character features 2010-06-02 Rajarshi Guha * R/balance.R (balance): Added a method to generate balanced fingerprints, which have 50% bit density, but are 2x the size of the input fingerprints. * R/bitimp.R (bit.importance): Added a function to evaluate the importance of each bit in a binary fingerprint in terms of the Kullback Liebler divergence between a set of actives and a background collection. fingerprint/man/0000755000176200001440000000000012224277277013370 5ustar liggesusersfingerprint/man/vec.Rd0000644000176200001440000000157411447374434014442 0ustar liggesusers\name{euc.vector} \alias{euc.vector} \title{ Euclidean Representation of Binary Fingerprints } \description{ Ordinarily, a binary fingerprint can be considered to represent a corner of a nD hypercube. However in many cases using such a representation can lead to a very sparse space. Consequently one approach is to convert the fingerprint so that it represents points on a nD unit hypersphere. The resultant fingerprint is then a nD coordinate. } \usage{ euc.vector(fp) } \arguments{ \item{fp}{ An object of class \code{fingerprint}. } } \value{ A numeric of length equal to the bit length of the fingerprint. The result corresponds to a unit vector for a point on the nD hypersphere } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=8, bits=c(1,3,4,5,7)) vec <- euc.vector(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/show.Rd0000644000176200001440000000102112224147737014626 0ustar liggesusers\name{show} \alias{show,fingerprint-method} \alias{show,featvec-method} \alias{show,feature-method} \title{ String Representation of a Fingerprint or Feature } \description{ Simply summarize the fingerprint or feature } \usage{ \S4method{show}{fingerprint}(object) \S4method{show}{featvec}(object) \S4method{show}{feature}(object) } \arguments{ \item{object}{ An object of class \code{fingerprint}, \code{featvec} or \code{feature} } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/linefunc.Rd0000644000176200001440000000407411721013501015442 0ustar liggesusers\name{cdk.lf, moe.lf, bci.lf} \alias{cdk.lf} \alias{moe.lf} \alias{bci.lf} \alias{ecfp.lf} \alias{fps.lf} \alias{jchem.binary.lf} \title{ Functions to parse lines from fingerprint files } \description{ These functions take a single line and parses it to produce a vector of integers which represents the position of the 'on' bits in a fingerprint. This allows the user to use \code{read.fp} with arbitrary fingerprint files. A new file format can be handled by defining a new line parser function. Currently the first three functions process fingerprint files obtained from the CDK (\url{http://cdk.sourceforge.net}), MOE (\url{http://chemcomp.com}), BCI (\url{http://www.digitalchemistry.co.uk/}) and the FPS format (\url{http://code.google.com/p/chem-fingerprints/wiki/FPS}). The last function can be used for any fingerprint that generates hashed features (such as ECFPs or other circular fingerprints). For these cases, it is assumed that features are unsigned integers, so string features are not handled. Note that when the \code{fps.lf} function is specified, items such as the number of bits or the header flag do not need to be specified, as the format requires a header block containing some of these items. } \usage{ cdk.lf(line) moe.lf(line) bci.lf(line) ecfp.lf(line) fps.lf(line) jchem.binary.lf(line) } \arguments{ \item{line}{ The line to parse } } \value{ A list with three componenents - the name associated with the fingerprint (if available) and a vector of integers representing bits set to 1 (for the case of the first three methods) or a vector of characters representing hashed features (characteristic of circular fingerprints) or more generally, any string feature. The third component is a (possibly empty) list, which contains the remaining components of a line, when the format allows items other than an a title and the fingerprint (such as the FPS format). The content of the third component is dependent on the line function that is being used. } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/distance-methods.Rd0000644000176200001440000001157112020462607017102 0ustar liggesusers\name{distance-methods} \docType{methods} \alias{distance} \alias{distance-methods} \alias{distance,featvec,featvec,character,missing,missing-method} \alias{distance,featvec,featvec,missing,missing,missing-method} \alias{distance,fingerprint,fingerprint,character,missing,missing-method} \alias{distance,fingerprint,fingerprint,character,numeric,numeric-method} \alias{distance,fingerprint,fingerprint,missing,missing,missing-method} \title{Calculates the Similarity or Dissimilarity Between Two Fingerprints} \description{ A number of distance metrics can be calculated for binary fingerprints. Some of these are actually similarity metrics and thus represent the reverse of a distance metric. The following are distance (dissimilarity) metrics \itemize{ \item Hamming \item Mean Hamming \item Soergel \item Pattern Difference \item Variance \item Size \item Shape } The following metrics are similarity metrics and so the distance can be obtained by subtracting the value fom 1.0 \itemize{ \item Tanimoto \item Dice \item Modified Tanimoto \item Simple \item Jaccard \item Russel-Rao \item Rodgers Tanimoto \item Cosine \item Achiai \item Carbo \item Baroniurbanibuser \item Kulczynski2 \item Robust } Finally the method also provides a set of composite and asymmetric distance metrics \itemize{ \item Hamann \item Yule \item Pearson \item Dispersion \item McConnaughey \item Stiles \item Simpson \item Petke \item Tversky } The default metric is the Tanimoto coefficient. } \section{Methods}{ \describe{ \item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character", a = "missing", b = "missing")}}{ Similarity method for feature vector type fingerprints, supporting \code{tanimoto}, \code{robust} and \code{dice} metrics. } \item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing", a = "missing", b = "missing")}}{ Evaluate Tanimoto similarity between two feature vector fingerprints } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}}{ Evaluate similarity (or dissimilrity) between two binary fingerprints. See below for a list of possible similarity (or dissimilarity) metrics } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "numeric", b = "numeric")}}{ Evaluate Tversky similarity between two binary fingerprints. } \item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}}{ Evaluate Tanimoto similarity between two binary fingerprints } }} \usage{ distance(fp1, fp2, method, a, b) } \arguments{ \item{fp1}{ An object of class \code{fingerprint} or \code{featvec} } \item{fp2}{ An object of class \code{fingerprint} or \code{featvec} } \item{a}{Parameter for the Tversky index} \item{b}{Parameter for the Tversky index} \item{method}{ The type of distance metric desired. Partial matching is supported and the deault is \code{tanimoto}. Alternative values are \itemize{ \item \code{euclidean} \item \code{hamming} \item \code{meanHamming} \item \code{soergel} \item \code{patternDifference} \item \code{variance} \item \code{size} \item \code{shape} \item \code{jaccard} \item \code{dice} \item \code{mt} \item \code{simple} \item \code{russelrao} \item \code{rodgerstanimoto} \item \code{cosine} \item \code{achiai} \item \code{carbo} \item \code{baroniurbanibuser} \item \code{kulczynski2} \item \code{robust} \item \code{hamann} \item \code{yule} \item \code{pearson} \item \code{mcconnaughey} \item \code{stiles} \item \code{simpson} \item \code{petke} \item \code{tversky} } If the two fingerprints are of class \code{featvec} then the following methods may be specified: \code{tanimoto}, \code{robust} and \code{dice}. } } \value{ Numeric value representing the distance in the specified metric between the supplied fingerprint objects } \examples{ # make a 2 fingerprint vectors fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) # calculate the tanimoto coefficient distance(fp1,fp2) # should be 1 # Invert the second fingerprint fp3 <- !fp2 distance(fp1,fp3) # should be 0 } \references{Fligner, M.A.; Verducci, J.S.; Blower, P.E.; A Modification of the Jaccard-Tanimoto Similarity Index for Diverse Selection of Chemical Compounds Using Binary Strings, \emph{Technometrics}, 2002, \emph{44}(2), 110-119 Monve, V.; Introduction to Similarity Searching in Chemistry, \emph{MATCH - Comm. Math. Comp. Chem.}, 2004, \emph{51}, 7-38 } \keyword{logic} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/shannon.Rd0000644000176200001440000000142711551601031015304 0ustar liggesusers\name{shannon} \alias{shannon} \alias{entropy} \title{ Evaluate Shannon Entropy for a Set of Fingerprints } \description{ This method evaluates the Shannon entropy for a set of fingerprints and utilizes the \code{\link{bit.spectrum}} method to obtain the relative frequencies of individual bits } \usage{ shannon(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}}. All fingerprints in the list should be of the same length. } } \value{ The Shannon entropy for the set of fingerprints } \seealso{ \code{\link{bit.spectrum}}, \code{\link{fp.read}} } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/count-methods.Rd0000644000176200001440000000226012224277233016440 0ustar liggesusers\name{count-methods} \docType{methods} \alias{count-methods} \alias{count} \alias{count,feature-method} \alias{count<--methods} \alias{count<-} \alias{count<-,feature,numeric-method} \title{Get or Set Count of Occurence of a Feature} \description{ Get or set the count of occurence associated with a \code{\link{feature-class}} object. The default value for the getter (as defined in the prototype) is 1. } \section{Methods}{ \describe{ \item{\code{signature(object = "feature")}}{Return the count associated with the feature object} \item{\code{signature(x = "feature", value = "numeric")}}{Set the count associated with the feature object} } } \usage{ \S4method{count}{feature}(object) \S4method{count}{feature,numeric}(x) <- value } \arguments{ \item{object}{ An object of class \code{\link{feature-class}} } \item{x}{ An object of class \code{\link{feature-class}} } \item{value}{ A numeric (which will be coerced to \code{integer}) indicating the count associated with the feature } } \value{ An integer representing count of occurence of the feature } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/mat.Rd0000644000176200001440000000241611447374434014442 0ustar liggesusers\name{fp.to.matrix} \alias{fp.to.matrix} \title{ Converts a List of Fingerprints to a Matrix } \description{ In general, fingerprint data is read from a file or obtained via calls to an external generator and the return value is a list of fingerprints. This function takes the list and returns a matrix having number of rows equal to the number of fingerprints and the number of columns equal to the length of the fingerprint. Each element is 1 or 0 (1's being specified by the positions in each fingerprint vector) } \usage{ fp.to.matrix(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}} } } \value{ A matrix with dimensions equal to \code{length(fplist), bit length)} where bit length is a property of the fingerprint objects in the list. } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.to.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/length.Rd0000644000176200001440000000077211447374434015145 0ustar liggesusers\name{length} \alias{length} \alias{length,fingerprint-method} \title{ Fingerprint Bit Length } \description{ Returns the length of the fingerprint. That is, this is the length of the entire bit string and not simply the number of bits that are on. } \usage{ \S4method{length}{fingerprint}(x) } \arguments{ \item{x}{ An object of class \code{fingerprint} } } \value{ The length of the bit string } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/string.Rd0000644000176200001440000000166712224147550015165 0ustar liggesusers\name{as.character} \alias{as.character} \alias{as.character,fingerprint-method} \alias{as.character,featvec-method} \alias{as.character,feature-method} \title{ Generates a String Representation of a Fingerprint } \description{ The function returns a string of 1's and 0's or a character vector of features depending on the nature of the fingerprint supplied. } \usage{ \S4method{as.character}{fingerprint}(x) \S4method{as.character}{featvec}(x) \S4method{as.character}{feature}(x) } \arguments{ \item{x}{ An object of class \code{fingerprint}, \code{featvec} or \code{feature} } } \value{ A string of 1's and 0's or else a character vector of features (with their counts) } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=32, bits=sample(1:32, 20)) # print out the string representation as.character(fp) } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/facmat.Rd0000644000176200001440000000166511447374434015121 0ustar liggesusers\name{fp.factor.matrix} \alias{fp.factor.matrix} \title{ Converts a List of Fingerprints to a data.frame of Factors } \description{ This function will convert a \code{list} of fingerprint objects to a \code{data.frame} of factors with levels 1 and 0. } \usage{ fp.factor.matrix(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}} } } \value{ A matrix with dimensions equal to \code{(length(fplist), length(fplist))} } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.factor.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/c.Rd0000644000176200001440000000076012224151362014066 0ustar liggesusers\name{c} \alias{c,feature-method} \title{ Combine Multiple Features to Give a List of Features } \description{ Combine multiple \code{feature} objects to give a list of feature objects } \usage{ \S4method{c}{feature}(x, ..., recursive = FALSE) } \arguments{ \item{x}{ An object of class \code{feature} } \item{...}{ One or more \code{feature} objects } \item{recursive}{ Ignored } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/feature-methods.Rd0000644000176200001440000000227512224277277016761 0ustar liggesusers\name{feature-methods} \docType{methods} \alias{feature} \alias{feature-methods} \alias{feature,feature-method} \alias{feature<--methods} \alias{feature<-} \alias{feature<-,feature,character-method} \title{Get or Set the Character String Representing the Feature} \description{ Get or set the character string representing a feature of a \code{\link{feature-class}} object. The default value for the getter (as defined in the prototype) is the empty string. } \section{Methods}{ \describe{ \item{\code{signature(object = "feature")}}{Return the feature associated with the feature object} \item{\code{signature(x = "feature", value = "character")}}{Set the feature associated with the feature object} } } \usage{ \S4method{feature}{feature}(object) \S4method{feature}{feature,character}(x) <- value } \arguments{ \item{object}{ An object of class \code{\link{feature-class}} } \item{x}{ An object of class \code{\link{feature-class}} } \item{value}{ The character string to replace the current feature string with } } \value{ An character string representing the feature } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/fplogical.Rd0000644000176200001440000000147011447374434015620 0ustar liggesusers\name{fplogical} \alias{!} \alias{|} \alias{&} \alias{xor} \alias{|,fingerprint,fingerprint-method} \alias{&,fingerprint,fingerprint-method} \alias{xor,fingerprint,fingerprint-method} \alias{!,fingerprint-method} \title{ Logical Operators for Fingerprints } \description{ These functions perform logical operatiosn (AND, OR, NOT, XOR) on the supplied binary fingerprints. Thus for two fingerprints A and B we have \describe{ \item{\code{&}}{Logical AND} \item{\code{|}}{Logical OR} \item{\code{xor}}{Logical XOR} \item{\code{!}}{Logical NOT (negation)} } } \arguments{ \item{e1}{ An object of class \code{fingerprint} } \item{e2}{ An object of class \code{fingerprint} } } \value{ A fingerprint object } \keyword{logic} \keyword{methods} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/bitspec.Rd0000644000176200001440000000307011551601023015266 0ustar liggesusers\name{bit.spectrum} \alias{bit.spectrum} \title{ Generate a Bit Spectrum from a List of Fingerprints } \description{ The idea of comparing datasets using fingerprints was described in Guha \& Schurer (2008). The idea is that one can summarize the dataset by counting the frequency of occurrence of each bit position. The frequency is normalized by the number of fingerprints considered. Thus a collection of N fingerprints can be converted to a single vector of numbers highlighting the most frequent bits with respect to a given dataset. A plot of this vector looks like a traditional spectrum and hence the name. The bit spectra for two datasets (assuming that the same types of fingerprints have been used) allows one to compare the similarity of the datasets, without having to do a full pairwise similarity calculation. The difference between the structural features of the datasets can be quantified by evaluating the distance between the two bit spectra. } \usage{ bit.spectrum(fplist) } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint}. These will can be constructed by hand or read from disk via \code{\link{fp.read}}. All fingerprints in the list should be of the same length. } } \value{ A numeric vector of length equal to the size of the fingerprints. } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \references{ Guha, R.; Schurer, S.; \emph{J. Comp. Aid. Molec. Des.}, \bold{2008}, \emph{22}, 367-384. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/feature.Rd0000644000176200001440000000234412224270024015274 0ustar liggesusers\name{feature-class} \docType{class} \alias{feature-class} \title{Class "feature"} \description{This class represents features - arbitrary alphanumeric sequences that are used to characterize molecular substructures (though there is no real restriction to molecules). A feature is associated with an integer count, indicating the occurence of that feature in a molecule. The default value is 1. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("feature", ...)}. } \section{Slots}{ \describe{ \item{\code{feature}:}{Object of class \code{"character"} ~ The string representation of a feature } \item{\code{count}:}{Object of class \code{"integer"} ~ The occurence of the feature. Default is 1} \item{\code{.Data}:}{???} } } \section{Methods}{ \describe{ \item{count}{\code{signature(object = "feature")}: Return the count associated with the feature} } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{featvec-class}} } \examples{ ## create a new feature f <- new("feature", feature='ABCD', count=as.integer(1)) ## modify the feature string and the count feature(f) <- 'UXYZ' count(f) <- 10 } \keyword{classes} \keyword{logic}fingerprint/man/read.Rd0000644000176200001440000000377011571331512014564 0ustar liggesusers\name{fp.read, fp.read.to.matrix} \alias{fp.read} \alias{fp.read.to.matrix} \title{ Functions to Read Fingerprints From Files } \description{ \code{fp.read} reads in a set of fingerprints from a file. Fingerprint output from the CDK, MOE and BCI can be handled. Each fingerprint is represented as a \code{fingerprint} object. \code{fp.read} returns a \code{list} structure, each element being a \code{fingerprint} or \code{nfeatvec} object, depending on the value of the \code{binary} argument. \code{fp.read.to.matrix} is a utility function that reads the fingerprints directly to matrix form (columns are the bit positions and the rows are the objects whose fingerprints have been evaluated). Note that this method does not currently work with feature vector fingerprints. } \usage{ fp.read(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE) fp.read.to.matrix(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE) } \arguments{ \item{f}{ File containing the fingperprints } \item{size}{ The bit length of the fingerprints being considered } \item{lf}{ A line reading function that parses a single line from a fingerprint file. A number of functions are provided that parse the fingerprints from the output of the CDK, MOE and the BCI toolkit. In addition, support is now available for the FPS format from the chemfp project (\url{http://code.google.com/p/chem-fingerprints}). } \item{header}{ Indicates whether the first line of the fingerprint file is a header line } \item{binary}{ If \code{TRUE} indicates that a binary fingerprint will be read in. Otherwise indicates that a feature vector style fingerprint (such as from a circular fingerprint) is being read in } } \seealso{ \code{\link{cdk.lf}}, \code{\link{moe.lf}}, \code{\link{bci.lf}}, \code{\link{ecfp.lf}}, \code{\link{fps.lf}} } \value{ A \code{list} or \code{matrix} of fingerprints } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \keyword{logic} fingerprint/man/fold.Rd0000644000176200001440000000170611447374434014606 0ustar liggesusers \name{fold} \alias{fold} \title{ Fold a fingerprint } \description{ In many situations a fingerprint is generated using a large length (such as 1024 bits or more). As a result of this, the fingerprints for a dataset can be very sparse. One approach to increasing bit density of such fingerprints is to fold them. This is performed by dividing the original fingerprint bitstring into two substrings of equal length and then perform an OR on the two substrings. It should be noted that many fingerprint generating routines will perform this internally. } \usage{ fold(fp) } \arguments{ \item{fp}{ The fingerprint to fold. Should be of class \code{fingerprint}. } } \value{ An object of class \code{fingerprint} representing the folded fingerprint. } \examples{ # make a fingerprint vector fp <- new("fingerprint", nbit=64, bits=sample(1:64, 30)) fold(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/man/bitimp.Rd0000644000176200001440000000221211447374434015137 0ustar liggesusers\name{bit.importance} \alias{bit.importance} \title{ Evaluate the Discriminatory Power of Individual Bits in a Binary Fingerprint } \description{ This method evaluates the Kullback-Leibler (KL) divergence to rank the individual bits in a binary fingerprint in their ability to discriminate between database and active compounds. This method is implemented based on Nisius and Bajorath and includes an m-estimate correction. } \usage{ bit.importance(actives, background) } \arguments{ \item{actives}{A list of fingerprints for the actives} \item{background}{A list of fingerprints representing the background collection} } \value{ A numeric vector of length equal to the size of the fingerprints. Each element of the vector is the KL divergence for the corresponding bit. If a bit position is never set to 1 in any of the compounds from the actives and the background, then the KL divergence for that position is undefined and \code{NA} is returned. } \seealso{ \code{\link{bit.spectrum}} } \references{ Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010}, \emph{5}, 859-868. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/featvec.Rd0000644000176200001440000000445412224262341015265 0ustar liggesusers\name{featvec-class} \docType{class} \alias{featvec-class} \alias{distance,featvec,featvec,missing-method} \alias{distance,featvec,featvec,character-method} \alias{length,featvec-method} \title{Class "featvec"} \description{This class represents feature vector style fingerprints, where, rather than a bit string, the fingerprint is represented as a sequence of (signed) integers or strings. Each element of the collection is a representation of a structural feature. For cases where the features are integers, this usually corresponds to a hash of the original feature string. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("featvec", ...)}. In contrast to traditional binary fingerprints, operations on feature vectors are slightly different and essentially correspond to operations on sets. Thus the logical and (&) would correspond to the union of the two feature vectors. } \section{Slots}{ \describe{ \item{\code{features}:}{Object of class \code{"character"} ~~ A vector containing the numeric or character features. Numeric features are treated as character strings } \item{\code{provider}:}{Object of class \code{"character"} ~~ Indicates the source of the fingerprint. Can be useful to keep track of what software generated the fingerprint.} \item{\code{name}:}{Object of class \code{"character"} ~~ The name associated with the fingerprint. If not name is available this gets set to an empty string} \item{\code{misc}:}{A list to hold arbitrary items associated with a fingerprint (such as extra fields from a fingerprint file)} } } \section{Methods}{ \describe{ \item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing")}: ... } \item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character")}: ... } \item{as.character}{\code{signature(fp = "featvec")}: ... } \item{length}{\code{signature(fp = "featvec")}: ... } \item{show}{\code{signature(fp = "featvec")}: ... } } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{fp.read}}, \code{\link{fp.read.to.matrix}} \code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}}, \code{\link{fp.factor.matrix}} \code{\link{random.fingerprint}} } \keyword{classes} \keyword{logic}fingerprint/man/balance.Rd0000644000176200001440000000234611447374434015250 0ustar liggesusers\name{balance} \alias{balance} \title{ Generate a Balanced Code Fingerprint } \description{ It has been noted that the bit density in a fingerprint can affect its ability to retrieve similar compounds from a database primarily due to complexity effects. One approach to alleviating these effects is to generate fingerprints that have a bit density of 50% (i.e., half the bits are set to 1). This method implements the balanced code approach described by Nisius and Bajorath to convert an ordinary binary fingerprint (whose bit density is not 50%) to one that has a bit density of 50%. This is acheived by appending the complement of the input fingerprint to itself (resulting in a fingerprint twice the size of the original). } \usage{ balance(fplist) } \arguments{ \item{fplist}{A single fingerprint or a list of fingerprints} } \value{ A single fingerprint objects or list of fingerprint objects that are "balanced", in that they have a bit density of 50%. Their size is 2x the size of the input fingerprints. } \seealso{ \code{\link{bit.spectrum}}, \code{\link{bit.importance}} } \references{ Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010}, \emph{5}, 859-868. } \keyword{programming} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/fingerprint.Rd0000644000176200001440000000627612020461306016177 0ustar liggesusers\name{fingerprint-class} \docType{class} \alias{fingerprint-class} \alias{euc.vector,fingerprint-method} \alias{fold,fingerprint-method} \alias{random.fingerprint,numeric,numeric-method} \title{Class "fingerpint"} \description{This class represents binary fingerprints, usually generated by a variety of cheminformatics software, but not restricted to such } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("fingerprint", ...)}. Fingerprints can traditionally thought of as a vector of 1's and 0's. However for large fingerprints this is inefficient and instead we simply store the positions of the bits that are on. Certain operations also need to know the length of the original bit string and this length is stored in the object at construction. Even though we store extra information along with the bit positions, conceptually we still consider the objects as simple bit strings. Thus the usual bitwise logical operations (&, |, !, xor) can be applied to objects of this class. } \section{Slots}{ \describe{ \item{\code{bits}:}{Object of class \code{"numeric"} ~~ A vector indicating the bit positions that are on. } \item{\code{nbit}:}{Object of class \code{"numeric"} ~~ Indicates the length of the original bit string.} \item{\code{folded}:}{Object of class \code{"logical"} ~~ Indicates whether the fingerprint has been folded.} \item{\code{provider}:}{Object of class \code{"character"} ~~ Indicates the source of the fingerprint. Can be useful to keep track of what software generated the fingerprint.} \item{\code{name}:}{Object of class \code{"character"} ~~ The name associated with the fingerprint. If not name is available this gets set to an empty string} \item{\code{misc}:}{Object of class \code{"list"} ~~ A holder for arbitrary items that may have been stored along with the fingerprint. Only certain formats allow extra items to be stored with the fingerprint, so in many cases this field is just an empty list} } } \section{Methods}{ \describe{ \item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}: ... } \item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}: ... } \item{euc.vector}{\code{signature(fp = "fingerprint")}: ... } \item{fold}{\code{signature(fp = "fingerprint")}: ... } \item{random.fingerprint}{\code{signature(nbit = "numeric", on = "numeric")}: ... } } } \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} \seealso{ \code{\link{fp.read}}, \code{\link{fp.read.to.matrix}} \code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}}, \code{\link{fp.factor.matrix}} \code{\link{random.fingerprint}} } \examples{ ## make fingerprints x <- new("fingerprint", nbit=128, bits=sample(1:128, 100)) y <- x distance(x,y) # should be 1 x <- new("fingerprint", nbit=128, bits=sample(1:128, 100)) distance(x,y) folded <- fold(x) ## binary operations on fingerprints x <- new("fingerprint", nbit=8, bits=c(1,2,3,6,8)) y <- new("fingerprint", nbit=8, bits=c(1,2,4,5,7,8)) x & y x | y !x } \keyword{classes} \keyword{logic}fingerprint/man/sim.Rd0000644000176200001440000000363511613543531014444 0ustar liggesusers\name{fp.sim.matrix} \alias{fp.sim.matrix} \title{ Calculates a Similarity Matrix for a Set of Fingerprints } \description{ Given a set of fingerprints, a pairwise similarity can be calculated using the various distance metrics defined for binary strings. This function calculates the pairwise similarity matrix for a set of \code{fingerprint} or \code{featvec} objects supplied in a \code{list} structure. Any of the distance metrics provided by \code{\link{distance}} can be used and the default is the Tanimoto metric. Note that if the the Euclidean distance is specified then the resultant matrix is a distance matrix and not a similarity matrix } \usage{ fp.sim.matrix(fplist, fplist2=NULL, method='tanimoto') } \arguments{ \item{fplist}{ A list structure with each element being an object of class \code{fingerprint} or \code{featvec}. These can be constructed by hand or read from disk via \code{\link{fp.read}} } \item{fplist2}{A list structure with each element being an object of class \code{fingerprint} or \code{featvec}. if \code{NULL} then traditional pairwise similarity is calculated with each member in \code{fplist}, otherwise the resultant N x M matrix is derived from the similarity between each member of \code{fplist} and \code{fplist2}} \item{method}{ The type of distance metric to use. The default is \code{tanimoto}. Partial matching is supported. } } \value{ A matrix with dimensions equal to \code{(length(fplist), length(fplist))} if \code{fplist2} is NULL, otherwise \code{(length(fplist), length(fplist2))} } \seealso{ \code{\link{distance}}, \code{\link{fp.read}} } \examples{ # make fingerprint objects fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6)) fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6)) fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6)) fp.sim.matrix( list(fp1,fp2,fp3) ) } \keyword{logic} \author{Rajarshi Guha \email{rajarshi.guha@gmail.com}} fingerprint/man/rndfp.Rd0000644000176200001440000000141311447374434014766 0ustar liggesusers\name{random.fingerprint} \alias{random.fingerprint} \title{ Generate Randomized Fingerprints } \description{ A utility function that can be used to generate binary fingerprints of a specified length with a specifed number of bit positions (selected randomly) set to 1. Currently bit positions are selected uniformly } \usage{ random.fingerprint(nbit,on) } \arguments{ \item{nbit}{ The length of the fingerprint, that is, the total number of bits. Must be a positive integer. } \item{on}{ How many positions should be set to 1 } } \value{ An object of class \code{fingerprint} } \examples{ # make a fingerprint vector fp <- random.fingerprint(32, 16) as.character(fp) } \keyword{logic} \author{Rajarshi Guha \email{rguha@indiana.edu}} fingerprint/INDEX0000644000176200001440000000270411457725752013415 0ustar liggesusers! Logical Operators for Fingerprints as.character Generates a String Representation of a Fingerprint balance Generate a Balanced Code Fingerprint bit.importance Evaluate the Discriminatory Power of Individual Bits in a Binary Fingerprint bit.spectrum Generate a Bit Spectrum from a List of Fingerprints cdk.lf Functions to parse lines from fingerprint files distance Calculates the Similarity or Dissimilarity Between Two Fingerprints euc.vector Euclidean Representation of Binary Fingerprints featvec-class Class "featvec" featvec.to.binaryfp Convert a Set of Feature Fingerprints to Binary Fingerprints fingerprint-class Class "fingerpint" fold Fold a fingerprint fp.factor.matrix Converts a List of Fingerprints to a data.frame of Factors fp.read Functions to Read Fingerprints From Files fp.sim.matrix Calculates a Similarity Matrix for a Set of Fingerprints fp.to.matrix Converts a List of Fingerprints to a Matrix length Fingerprint Bit Length random.fingerprint Generate Randomized Fingerprints show,fingerprint-method String Representation of a Fingerprint