fingerprint/ 0000755 0001762 0000144 00000000000 13224521352 012600 5 ustar ligges users fingerprint/inst/ 0000755 0001762 0000144 00000000000 11447374434 013571 5 ustar ligges users fingerprint/inst/unitTests/ 0000755 0001762 0000144 00000000000 12224264435 015564 5 ustar ligges users fingerprint/inst/unitTests/reportSummary.txt 0000644 0001762 0000144 00000000414 11571714217 021217 0 ustar ligges users RUNIT TEST PROTOCOL -- Thu Jun 2 10:05:03 2011
***********************************************
Number of test functions: 16
Number of errors: 0
Number of failures: 0
1 Test Suite :
fingerprint fingerprint Unit Tests - 16 test functions, 0 errors, 0 failures
fingerprint/inst/unitTests/Makefile 0000644 0001762 0000144 00000000363 13224262715 017226 0 ustar ligges users TOP=../..
PKG=${shell cd ${TOP};pwd}
SUITE=doRUnit.R
R=R
all: inst test
inst: # Install package
cd ${TOP}/..;\
${R} CMD INSTALL ${PKG}
test: # Run unit tests
export RCMDCHECK=FALSE;\
cd ${TOP}/tests;\
${R} --vanilla --slave < ${SUITE}
fingerprint/inst/unitTests/test.ecfp 0000644 0001762 0000144 00000013055 12224260514 017400 0 ustar ligges users mol01 17 0 16 3 1 1747237384 1499521844 -1539132615 1294255210 332760439 -1549163031 1035613116 1618154665 590925877 1872154524 -1143715940 203677720 -1272768868 136120670 136597326 -1460348762 -1262922302 -1201618245 -402549409 -1270820019 929601590 -1597477966 -1274743746 -1155471474 1258428229 -1838187238 -798628285 -1773728142 -773983804 -453677277 1674451008 65948508 991735244 -1412946825 846704869 -2103621484 -886204842 1725648567 -353343892 -585443181 -533273616 2031084733 -801248129 1752802620 -976015189 -992213424 2109043264 -790336137 630139722 -505031736 -1427697183 -2090462286 -1724769936
mol02 16 9 1 0 17 32 332760439 -1362791977 367998008 1035613116 -1277879912 1747237384 71476542 -124655670 203677720 1618154665 907007053 -1707366455 1969481564 -1597477966 1966552162 547884906 -1270820019 -2135641502 -497728148 1674451008 -453677277 -2005085798 2047992816 786486417 1523337873 -2045753164 859018953 404853571 1383886699 -745001879 1985089045 -1445962196
mol03 16 1 0 17 3 32 7 332760439 367998008 1035613116 566058135 1747237384 580900652 907007053 1070061035 71476542 203677720 -124655670 -548602426 1618154665 -1707366455 1969481564 -1597477966 -881072729 547884906 -1564724132 -1270820019 -2004812302 -497728148 -2135641502 1674451008 -453677277 2047992816 786486417 1523337873 242457334 -2045753164 859018953 265023308 1381300059 404853571 -745001879 1985089045
mol04 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -453677277 -745491832 551850122 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -1206981816 -1508180856 -149636017 -505031736 -1427697183 -2090462286 -1724769936
mol05 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -773983804 1674451008 -453677277 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 -505031736 -1427697183 -1205069278 -2090462286 -1724769936 -1698724694 -2093839777
mol06 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 -1549103449 1747237384 1035613116 1294255210 590925877 332760439 -124655670 260476081 1872392852 -836633685 1872154524 71953198 136597326 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1925475824 885225145 -1598679931 1175232969 -1155471474 1258428229 1506190109 -581879738 -1454111645 289095609 -453677277 -773983804 1674451008 991735244 689610531 -888075169 650647287 -1799143719 241406177 1119771930 -1139544385 1139671217 2111406068 -800045143 -10819545 384221478 1724895444 1790572653 1785362907 -505031736 -1427697183 -1724769936 -2090462286
mol07 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 -1549103449 1618154665 1747237384 1035613116 1294255210 -1539132615 590925877 332760439 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -1926447181 -98859492 885225145 -1598679931 1175232969 -1199556931 -1155471474 1258428229 -1462709112 1506190109 -1280036918 -1695756380 730557100 -773983804 1674451008 991735244 689610531 -888075169 650647287 1033863897 -1799143719 1119771930 -1139544385 1646645826 1040131620 2111406068 -800045143 1132802373 -10819545 137138064 -505031736 -1427697183 -2090462286 -1724769936
mol08 16 17 1 9 0 32 -1410079687 1747237384 675769755 178336375 -1362791977 -1343180157 1618154665 -1277879912 -1272768868 367998008 -587569116 71476542 -939475899 -1044865801 946229467 193705859 1852108031 557002734 1967609676 -822042736 713358128 -745491832 -964367925 -270564593 551850122 -2122102020 679321016 48182684 210231571 281647195 516865083 1706555375 -362593762 1475536852 -1294566343 461422072 -1516875559 566085027
mol09 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 367998008 71476542 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 -745491832 -773983804 551850122 1674451008 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1508180856 -794597678 -175681259 -1427697183 -505031736 -1724769936 -2090462286
mol10 16 8 0 17 1 3 5 32 203677720 -1338588315 -1410049896 -828984032 -1029533685 1618154665 1499521844 1747237384 1035613116 1294255210 332760439 590925877 -124655670 260476081 1872392852 1872154524 71953198 134603128 1579401580 -1641408229 1997806766 192331578 -98859492 -1262922302 885225145 -1598679931 -402549409 1258428229 -1155471474 1506190109 -1838187238 1674451008 -773983804 991735244 689610531 -888075169 650647287 -1799143719 846704869 1119771930 -1139544385 -886204842 -800045143 2031084733 -10819545 1752802620 -976015189 -1427697183 -505031736 -792685140 -1724769936 -2090462286 -2093839777
fingerprint/inst/unitTests/report.html 0000644 0001762 0000144 00000011452 11571714217 017772 0 ustar ligges users
RUNIT TEST PROTOCOL--Thu Jun 2 10:05:03 2011
RUNIT TEST PROTOCOL--Thu Jun 2 10:05:03 2011
Number of test functions: 16
Number of errors: 0
Number of failures: 0
1 Test suite
Details
Test Suite: fingerprint fingerprint Unit Tests
Test function regexp: ^test.+
Test file regexp: ^runit.+\.[rR]$
Involved directory:
/Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests
Name |
Value |
platform |
i386-apple-darwin9.8.0 |
arch |
i386 |
os |
darwin9.8.0 |
system |
i386, darwin9.8.0 |
status |
|
major |
2 |
minor |
11.0 |
year |
2010 |
month |
04 |
day |
22 |
svn rev |
51801 |
language |
R |
version.string |
R version 2.11.0 (2010-04-22) |
host |
Rajarshi-Guha-MacBook-Pro.local |
compiler |
NA |
fingerprint/inst/unitTests/runit.fp.R 0000644 0001762 0000144 00000015566 12224264435 017471 0 ustar ligges users test.new.fp <- function()
{
fp <- new("fingerprint", bits=c(1,2,3,4), nbit=8, provider='rg',name='foo')
checkTrue(!is.null(fp))
}
test.distance1 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(5,6,7,8), nbit=8)
d <- distance(fp1,fp2)
checkEquals(d, 0)
}
test.distance2 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
d <- distance(fp1,fp2)
checkEquals(d, 1)
}
test.and1 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fpnew <- fp1 & fp2
bits <- fpnew@bits
checkTrue( all(bits == c(1,2,3,4)))
}
test.and2 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(5,6,7,8), nbit=8)
fpnew <- fp1 & fp2
bits <- fpnew@bits
checkEquals(length(bits),0)
}
test.or1 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(5,6,7,8), nbit=8)
fpnew <- fp1 | fp2
bits <- fpnew@bits
checkTrue(all(bits == c(1,2,3,4,5,6,7,8)))
}
test.or2 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fpnew <- fp1 | fp2
bits <- fpnew@bits
checkTrue(all(bits == c(1,2,3,4)))
}
test.not <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
nfp1 <- !fp1
checkTrue(all(nfp1@bits == c(5,6,7,8)))
checkTrue(all(fp1@bits == (!nfp1)@bits))
}
test.xor1 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fpnew <- xor(fp1,fp2)
bits <- fpnew@bits
checkEquals(length(bits),0)
}
test.xor2 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint",
bits=c(5,6,7,8), nbit=8)
fpnew <- xor(fp1,fp2)
bits <- fpnew@bits
checkEquals(length(bits),8)
checkTrue(all(bits == c(1,2,3,4,5,6,7,8)))
}
test.fold1 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4), nbit=8)
nfp <- fold(fp1)
checkTrue(all(nfp@bits == c(1,2,3,4)))
}
test.fold2 <- function() {
fp1 <- new("fingerprint",
bits=c(1,2,3,4,8), nbit=8)
nfp <- fold(fp1)
checkTrue(all(nfp@bits == c(1,2,3,4)))
}
test.fp.to.matrix <- function() {
fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8)
fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8)
m1 <- fp.to.matrix(list(fp1,fp2,fp3))
m2 <- rbind(c(1,1,1,1,0,0,0,0),
c(0,0,0,0,1,1,1,1),
c(1,1,1,0,1,1,1,1))
checkTrue(all(m1 == m2))
}
test.tversky.1 <- function() {
fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
s <- distance(fp1, fp2, "tversky", a=1,b=1)
checkEquals(1.0, s)
}
test.tversky.2 <- function() {
fp1 <- new("fingerprint", bits=c(5,6,7,8), nbit=8)
fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
s <- distance(fp1, fp2, "tversky", a=1,b=1)
checkEquals(0.0, s)
}
test.tversky.3 <- function() {
fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8)
fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
stv <- distance(fp1, fp2, "tversky", a=1,b=1)
sta <- distance(fp1, fp2)
checkEquals(stv, sta)
}
test.tversky.4 <- function() {
fp1 <- new("fingerprint", bits=c(4,6,7,8), nbit=8)
fp2 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
stv <- distance(fp1, fp2, "tversky", a=0.5,b=0.5)
std <- distance(fp1, fp2, "dice")
checkEquals(stv, std)
}
test.fp.sim.matrix <- function() {
fp1 <- new("fingerprint", bits=c(1,2,3,4), nbit=8)
fp2 <- new("fingerprint", bits=c(5,6,7,8), nbit=8)
fp3 <- new("fingerprint", bits=c(1,2,3,5,6,7,8), nbit=8)
fpl <- list(fp1,fp2,fp3)
sm <- round(fp.sim.matrix(fpl),2)
am <- rbind(c(1,0,0.38),
c(0,1,0.57),
c(0.38,0.57,1))
checkTrue(all(sm == am))
}
test.fp.balance <- function() {
fp1 <- new("fingerprint", bits=c(1,2,3), nbit=6)
fp2 <- balance(fp1)
checkTrue(12 == length(fp2))
checkEquals(c(1,2,3,10,11,12), fp2@bits)
}
test.fps.reader <- function() {
data.file <- file.path(system.file("unitTests", "bits.fps", package="fingerprint"))
fps <- fp.read(data.file, lf=fps.lf)
checkEquals(323, length(fps))
## OK, we need to pull in the bit positions Andrew specified
for (i in seq_along(fps)) {
expected <- sort(as.numeric(strsplit(fps[[i]]@misc[[1]],",")[[1]])+1)
observed <- sort(fps[[i]]@bits)
checkEquals(expected, observed, msg = sprintf("%s had a mismatch in bit positions", fps[[i]]@name))
}
}
#######################################
##
## Feature vector tests
##
#######################################
test.feature <- function() {
f1 <- new('feature', feature='F1')
checkEquals(1, f1@count)
f2 <- new('feature', feature='F2', count=as.integer(12))
checkEquals(12, f2@count)
}
test.feature.c <- function() {
f1 <- new('feature', feature='F1', count=as.integer(2))
f2 <- new('feature', feature='F2', count=as.integer(3))
fl <- c(f1, f2)
checkEquals(2, length(fl))
checkEquals("list", class(fl))
checkTrue(identical(f1, fl[[1]]))
checkTrue(identical(f2, fl[[2]]))
}
test.feature.fp <- function() {
feats <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1)))
fv <- new('featvec', features=feats)
checkEquals(10, length(fv))
}
test.feature.dist1 <- function() {
f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1)))
f2 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1)))
fv1 <- new('featvec', features=f1)
fv2 <- new('featvec', features=f2)
d <- distance(fv1, fv2, method='tanimoto')
checkEquals(1, d)
}
test.feature.dist2 <- function() {
f1 <- sapply(letters[1:10], function(x) new('feature', feature=x, count=as.integer(1)))
f2 <- sapply(letters[11:20], function(x) new('feature', feature=x, count=as.integer(1)))
fv1 <- new('featvec', features=f1)
fv2 <- new('featvec', features=f2)
d <- distance(fv1, fv2, method='tanimoto')
checkEquals(0, d)
}
test.featvec.read <- function() {
data.file <- file.path(system.file("unitTests", "test.ecfp", package="fingerprint"))
fps <- fp.read(data.file, lf=ecfp.lf, binary=FALSE)
checkEquals(10, length(fps))
lengths <- c(58L, 38L, 43L, 66L, 62L, 66L, 65L, 44L, 66L, 61L)
ol <- sapply(fps, length)
checkTrue(identical(lengths, ol))
}
tester.getters.setters <- function() {
f <- new("feature", feature='ABCD', count=as.integer(1))
checkEquals("ABCD", feature(f))
checkEquals(1, count(f))
feature(f) <- 'UXYZ'
count(f) <- 10
checkEquals("UXYZ", feature(f))
checkEquals(10, count(f))
}
fingerprint/inst/unitTests/bits.fps 0000644 0001762 0000144 00000064501 11571516543 017251 0 ustar ligges users #FPS1
#num_bits=166
#software=OEChem/1.7.4 (20100809)
#type=RDMACCS-OpenEye/1
#source=bits.smi
#date=2011-06-01T12:49:56
010000000000000000000000000000000000000000 Fake-0 0
000000000008000000000000000000000000000000 Fake-43 43
000000000400000000000000000000000000000000 [Li] 34
000200000000000000000000000000000000000000 [Be] 9
000002000000000000000000000000000000000000 [B] 17
000000000000000000000000000000000000000001 [N] 160
000000000000000000000000000000000000000008 [O] 163
000000000002000000000000000000002000000000 [F] 41,133
000000000400000000000000000000000000000000 [Na] 34
000200000000000000000000000000000000000000 [Mg] 9
000002000000000000000000000000000000000000 [Al] 17
000008000000000000000000000000000000000000 [Si] 19
000000100000000000000000000000000000000000 [P] 28
000000000000000000008000000000000000000000 [S] 87
000000000000000000000000400000002000000000 [Cl] 102,133
000000000400000000000000000000000000000000 [K] 34
000200000000000000000000000000000000000000 [Ca] 9
100000000000000000000000000000000000000000 [Sc] 4
100000000000000000000000000000000000000000 [Ti] 4
400000000000000000000000000000000000000000 [V] 6
400000000000000000000000000000000000000000 [Cr] 6
400000000000000000000000000000000000000000 [Mn] 6
000100000000000000000000000000000000000000 [Fe] 8
000100000000000000000000000000000000000000 [Co] 8
000100000000000000000000000000000000000000 [Ni] 8
000800000000000000000000000000000000000000 [Cu] 11
000800000000000000000000000000000000000000 [Zn] 11
000002000000000000000000000000000000000000 [Ga] 17
040000000000000000000000000000000000000000 [Ge] 2
040000000000000000000000000000000000000000 [As] 2
040000000000000000000000000000000000000000 [Se] 2
000000000020000000000000000000002000000000 [Br] 45,133
000000000400000000000000000000000000000000 [Rb] 34
000200000000000000000000000000000000000000 [Sr] 9
100000000000000000000000000000000000000000 [Y] 4
100000000000000000000000000000000000000000 [Zr] 4
400000000000000000000000000000000000000000 [Nb] 6
400000000000000000000000000000000000000000 [Mo] 6
400000000000000000000000000000000000000000 [Tc] 6
000100000000000000000000000000000000000000 [Ru] 8
000100000000000000000000000000000000000000 [Rh] 8
000100000000000000000000000000000000000000 [Pd] 8
000800000000000000000000000000000000000000 [Ag] 11
000800000000000000000000000000000000000000 [Cd] 11
000002000000000000000000000000000000000000 [In] 17
040000000000000000000000000000000000000000 [Sn] 2
040000000000000000000000000000000000000000 [Sb] 2
040000000000000000000000000000000000000000 [Te] 2
000000040000000000000000000000002000000000 [I] 26,133
000000000400000000000000000000000000000000 [Cs] 34
000200000000000000000000000000000000000000 [Ba] 9
200000000000000000000000000000000000000000 [La] 5
200000000000000000000000000000000000000000 [Ce] 5
200000000000000000000000000000000000000000 [Pr] 5
200000000000000000000000000000000000000000 [Nd] 5
200000000000000000000000000000000000000000 [Pm] 5
200000000000000000000000000000000000000000 [Sm] 5
200000000000000000000000000000000000000000 [Eu] 5
200000000000000000000000000000000000000000 [Gd] 5
200000000000000000000000000000000000000000 [Tb] 5
200000000000000000000000000000000000000000 [Dy] 5
200000000000000000000000000000000000000000 [Ho] 5
200000000000000000000000000000000000000000 [Er] 5
200000000000000000000000000000000000000000 [Tm] 5
200000000000000000000000000000000000000000 [Yb] 5
200000000000000000000000000000000000000000 [Lu] 5
100000000000000000000000000000000000000000 [Hf] 4
400000000000000000000000000000000000000000 [Ta] 6
400000000000000000000000000000000000000000 [W] 6
400000000000000000000000000000000000000000 [Re] 6
000100000000000000000000000000000000000000 [Os] 8
000100000000000000000000000000000000000000 [Ir] 8
000100000000000000000000000000000000000000 [Pt] 8
000800000000000000000000000000000000000000 [Au] 11
000800000000000000000000000000000000000000 [Hg] 11
040002000000000000000000000000000000000000 [Tl] 2,17
040000000000000000000000000000000000000000 [Pb] 2
040000000000000000000000000000000000000000 [Bi] 2
000000000400000000000000000000000000000000 [Fr] 34
000200000000000000000000000000000000000000 [Ra] 9
080000000000000000000000000000000000000000 [Ac] 3
080000000000000000000000000000000000000000 [Th] 3
080000000000000000000000000000000000000000 [Pa] 3
080000000000000000000000000000000000000000 [U] 3
080000000000000000000000000000000000000000 [Np] 3
080000000000000000000000000000000000000000 [Pu] 3
080000000000000000000000000000000000000000 [Am] 3
080000000000000000000000000000000000000000 [Cm] 3
080000000000000000000000000000000000000000 [Bk] 3
080000000000000000000000000000000000000000 [Cf] 3
080000000000000000000000000000000000000000 [Es] 3
080000000000000000000000000000000000000000 [Fm] 3
080000000000000000000000000000000000000000 [Md] 3
080000000000000000000000000000000000000000 [No] 3
0a0000000000000000000000000000000000000000 [Lr] 3,1
000000000000000000000000000000000000000016 c1ccccc1 161,162,164
000000000000000000000000000000100000010036 c1ccccc1.c1ccccc1 124,144,161,162,164,165
000000100000000000000000020000000001000016 c1ccccp1 28,97,136,161,162,164
00000000000000010000048000000000000100001a c1ccco1 56,82,95,163,136,161,164
000000000800000000008480000000000001000012 c1cccs1 35,82,87,95,136,161,164
040000000000000000000480000000000001000012 [se]1cccc1 2,82,95,136,161,164
040000000000000000000480000000000001000012 [te]1cccc1 2,82,95,136,161,164
000400000000000000000000000020000000040010 C1CCC1 10,117,146,164
800400000004280038500020000080090621400011 N1NNN1 7,10,42,51,53,67,68,69,76,78,93,119,120,123,129,130,136,141,150,160,164
0010800000000000500220302000000a000418a009 ON(C)C 12,23,68,70,73,85,92,93,101,121,123,138,147,148,157,159,160,163
0010840000100000503400602400c04b0065082019 On1cncccc1 12,18,23,44,68,70,74,76,77,93,94,98,101,118,119,120,121,123,126,136,138,141,142,147,157,160,163,164
0010800000100001400400302600800b000108b01d c1ccon(C)c1 12,23,44,56,70,74,92,93,97,98,101,119,120,121,123,136,147,156,157,159,160,162,163,164
0010840000000001410004a03401810b008108181b c1cccon2c13.c2cc3 12,18,23,56,64,70,82,93,95,98,100,101,104,112,119,120,121,123,136,143,147,155,156,160,161,163,164
0010000000000140400220302000400a000018a009 O=[N+](C)C 12,48,62,70,73,85,92,93,101,118,121,123,147,148,157,159,160,163
0010800002000100502000302000400a000408a009 O[N+](=C)C 12,23,33,48,68,70,77,92,93,101,118,121,123,138,147,157,159,160,163
00200000000000001c008000000000080400000000 SS 13,66,67,68,87,123,130
004000000004000000000000000200040404025008 OC(O)O 14,42,105,122,130,138,145,156,158,163
004000000004000000000000000200040404025208 O=C(O)O 14,42,105,122,130,138,145,153,156,158,163
004000000004000000000000000200040404025208 OC(=O)O 14,42,105,122,130,138,145,153,156,158,163
004000000004000000000000000200040404025208 OC(O)=O 14,42,105,122,130,138,145,153,156,158,163
00400000000000018000048000028144004102421a c1coc(=O)o1 14,56,71,82,95,105,112,119,122,126,136,142,145,153,158,161,163,164
008020000000000100002000001000000001041118 C1CO1 15,21,56,85,108,136,146,152,156,163,164
000020000000000000000000000020000000040010 C1CC1 21,117,146,164
000000000200000000000000040000000000000000 C=C 33,98
000001000000000000000000000000000000000000 C#C 16
000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164
00000000000000000000000000082e800100148400 CCCCCCC 107,113,114,115,117,127,128,146,148,154,159
000010000214000000080808042010000404403809 C=C(N)O 20,33,42,44,75,83,91,98,109,116,130,138,150,155,156,157,160,163
000000000220000000080000040400002000000016 C=C(c1ccccc1)Br 33,45,75,98,106,133,161,162,164
000040000004000000000800002210040404407809 NC(O)O 22,42,83,105,109,116,122,130,138,150,155,156,157,158,160,163
000040000004000000000800002210040404407a09 NC(=O)O 22,42,83,105,109,116,122,130,138,150,153,155,156,157,158,160,163
000040000004000000200000002250040404405809 N=C(O)O 22,42,77,105,109,116,118,122,130,138,150,155,156,158,160,163
0000400000000801018004e80132936d006110dd1b CCOc1nnc(o1)C 22,51,56,64,79,82,91,93,94,95,96,105,108,109,112,113,116,119,120,122,123,125,126,136,141,142,148,152,154,155,156,158,159,160,161,163,164
000000010004000000100800000200000420402801 NC(N)N 24,42,76,83,105,130,141,150,155,157,160
00000002000002000008008604012040014484101c C1CC2=CC(C1C2)O 25,49,75,89,90,95,98,104,117,126,128,138,142,146,151,156,162,163,164
000000080004000000100a00080000000420402501 NCN 27,42,76,81,83,99,130,141,150,152,154,157,160
000000080004000000100a00080000000420402501 NC([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160
000000080004000000100a00080000000420402501 NC([H])([H])N 27,42,76,81,83,99,130,141,150,152,154,157,160
000008200000000000022010008000000010188000 C[Si](C)(C)C 19,29,73,85,92,111,140,147,148,159
000000400002000010000820000000082000400001 NF 30,41,68,83,93,123,133,150,160
000000800100000014008830000000080000408001 CSN 31,32,66,68,83,87,92,93,123,150,159,160
00000000010000001c008820000000080400400001 NS 32,66,67,68,83,87,93,123,130,150,160
000000000100000014018020000040080000400001 N=S 32,66,68,72,87,93,118,123,150,160
000000000200000000000000040000000000000000 C=C 33,98
000000000200000000000000040000000000000000 [H]C([H])=C([H])[H] 33,98
000000000200000000200000000040000000400001 C=N 33,77,118,150,160
000000000000000000200000000040000000400001 [CH]=N 77,118,150,160
000000000000000000200000000040000000400001 [CH]=N 77,118,150,160
00000000080000000000a480000020000103040110 S1CCCC1 35,82,85,87,95,117,128,136,137,146,152,164
00000000080000000000a480000020000103040110 S1C([H])C([H])C([H])C1 35,82,85,87,95,117,128,136,137,146,152,164
00000000080000000000a480000020000103040110 S1CCCC1[H] 35,82,85,87,95,117,128,136,137,146,152,164
000000000800000000008480000000000001000012 s1cccc1 35,82,87,95,136,161,164
000000001004000000100800002210000424403809 NC(O)N 36,42,76,83,105,109,116,130,138,141,150,155,156,157,160,163
000000001004000000300800002250000424403809 N=C(O)N 36,42,76,77,83,105,109,116,118,130,138,141,150,155,156,157,160,163
000000001004000000100800002210000420402a09 NC(=O)N 36,42,76,83,105,109,116,130,141,150,153,155,157,160,163
000000801940300015d08da0012291495461402a1b c1(nc(=O)[nH]s1)N 31,32,35,36,46,52,53,64,66,68,76,78,79,80,82,83,87,93,95,96,105,109,112,116,119,120,123,126,130,132,134,136,141,142,150,153,155,157,160,161,163,164
00000000200400000010080000000000042040a801 NC(C)N 37,42,76,83,130,141,150,155,157,159,160
00000000200400000030080000004000042040a801 N=C(C)N 37,42,76,77,83,118,130,141,150,155,157,159,160
000010002214000000180800040000000420402801 NC(=C)N 20,33,37,42,44,75,76,83,98,130,141,150,155,157,160
000000802940080025d08ca0000080095221402813 c1c(nns1)N 31,32,35,37,46,51,64,66,69,76,78,79,82,83,87,93,95,119,120,123,129,132,134,136,141,150,155,157,160,161,164
000000002000080011d004e0010080090021408a1b Cc1[nH]nc(n1)C=O 37,51,64,68,76,78,79,82,93,94,95,96,119,120,123,136,141,150,153,155,159,160,161,163,164
00000000c080411a1401800020820008820c0a4008 OS(=O)(=O)[O-] 38,39,47,48,54,57,59,60,66,68,72,87,101,105,111,123,129,135,138,139,145,147,158,163
00000000c080401a140180002002000802040a4008 OS(=O)[O] 38,39,47,54,57,59,60,66,68,72,87,101,105,123,129,138,145,147,158,163
00000000c080541a14018202a08220088e060e4508 C(CS(=O)(=O)O)S 38,39,47,50,52,54,57,59,60,66,68,72,81,87,89,101,103,105,111,117,123,129,130,131,135,137,138,145,146,147,152,154,158,163
00000000800000001c008000200000080404000008 S-O 39,66,67,68,87,101,123,130,138,163
00000000800000001c008000200000080404000008 [H]S-O[H] 39,66,67,68,87,101,123,130,138,163
000000000001000000000000000000000000000001 C#N 40,160
000000000200000000200000000040000000400001 C=N 33,77,118,150,160
000000000200000000200000000040000000400001 [H]C=N[H] 33,77,118,150,160
000000000200000000200000000040000000400001 [H]C([H])=N[H] 33,77,118,150,160
000000080004000000000a00083010000404403509 NCO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163
000000080004000000000a00083010000404403509 [H][NH]CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163
000000080004000000000a00083010000404403509 [H][N]([H])CO 27,42,81,83,99,108,109,116,130,138,150,152,154,156,157,160,163
000000000210000000000800040000000000402001 C=CN 33,44,83,98,150,157,160
000000000210000000200000040040000000400001 C=C=N 33,44,77,98,118,150,160
000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160
000080008044000054008820200000280600400009 SON 23,39,42,46,66,68,70,83,87,93,101,123,125,129,130,150,160,163
000000000040000000018800000000000000402001 S=CN 46,72,83,87,150,157,160
000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160
000000000040000000218000000040000000400001 S=C=N 46,72,77,87,118,150,160
0010800088400410550484a02400816b024108001b c1ccc[sH]1On1cccc1 12,23,35,39,46,50,60,64,66,68,70,74,82,87,93,95,98,101,112,119,120,121,123,125,126,129,136,142,147,160,161,163,164
000100400000010000000000400000082000000000 Cl[Rh+2] 8,30,48,102,123,133
000000000000010000000000000000000000000008 [O-2] 48,163
000000000000010000008000000000000000000000 [35S-2] 48,87
000100000000010000000000000000000000000000 [Pt+2] 8,48
0000000002000200000a0000040000000000108000 C=C(C)C 33,49,73,75,98,148,159
000000000200020000080000040000000000008016 C=C(C)c1ccccc1 33,49,75,98,159,161,162,164
000000008000040014008010200000080004008008 CSO 39,50,66,68,87,92,101,123,138,159,163
00000000800004841400810020000008000400001e c1ccccc1SO 39,50,58,63,66,68,80,87,101,123,138,161,162,163,164
000000000000080018000820000000080420400001 NN 51,67,68,83,93,123,130,141,150,160
000000000000080018000820000000080420400001 [H]NN[H] 51,67,68,83,93,123,130,141,150,160
000000000000080018000820000000080420400001 [H]N([H])N([H])[H] 51,67,68,83,93,123,130,141,150,160
000000000000080018000020000040080420400001 N=N 51,67,68,93,118,123,130,141,150,160
000000000000080018000020000040080420400001 N=N[H] 51,67,68,93,118,123,130,141,150,160
000000000000080018000020000040080420400001 [H]N=N[H] 51,67,68,93,118,123,130,141,150,160
000000000000080000000020000000080020000001 N#N 51,93,123,141,160
000000000000080001000020020080090021000017 c1cnncc1 51,64,93,97,119,120,123,136,141,160,161,162,164
000000000000100000800a02884020000422442501 NCCCN 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160
000000000000100000800a02884020000422442501 N([H])([H])C([H])([H])C([H])([H])C([H])([H])N([H])([H]) 52,79,81,83,89,99,103,110,117,130,137,141,146,150,152,154,157,160
000000001004100001900009032291418465425a1f c1c([nH]c(=O)[nH]c1=O)O 36,42,52,64,76,79,88,91,96,97,105,109,112,116,119,120,126,130,135,136,138,141,142,145,150,153,155,156,158,160,161,162,163,164
000000000000200080000200801020000c06045508 OCCO 53,71,81,103,108,117,130,131,137,138,146,152,154,156,158,163
000000002002300001d044c8132791512461400a1f c1[nH]c2c(n1)[nH]c(nc2=O)F 37,41,52,53,64,76,78,79,82,86,91,94,95,96,97,100,104,105,106,109,112,116,119,120,124,126,130,133,136,141,142,150,153,155,160,161,162,163,164
000000008004400214008000200000080604004008 OSO 39,42,54,57,66,68,87,101,123,129,130,138,158,163
0000800000048000700000302000000a060408e009 ON(O)C 23,42,55,68,69,70,92,93,101,121,123,129,130,138,147,157,158,159,160,163
0000800000048000700000202000000a560408681f ON(O)c1ccccc1 23,42,55,68,69,70,93,101,121,123,129,130,132,134,138,147,155,157,158,160,161,162,163,164
00000000000000010000200002102080090304111c O1CCCCC1 56,85,97,108,117,127,128,131,136,137,146,152,156,162,163,164
00000000000000010000048000000000000100001a o1cccc1 56,82,95,136,161,163,164
080000100000000214008000000000080200000000 PS[U] 3,28,57,66,68,87,123,129
000000000000008400008100000000000000000016 Sc1ccccc1 58,63,80,87,161,162,164
000000000000000804018000200000080000000008 S=O 59,66,72,87,101,123,163
00000000800000001c008000200000080404000008 SO 39,66,67,68,87,101,123,130,138,163
000000088040641a14018a40a80000080e04486509 C(N)S(=O)O 27,39,46,50,53,54,57,59,60,66,68,72,81,83,87,94,99,101,103,123,129,130,131,138,147,150,152,154,157,158,160,163
00000000000000100002a010000000000010188000 CS(C)C 60,73,85,87,92,140,147,148,159
000000400820449e0401c58060860008a28128401a c1c(c(sc1S(=O)(=O)Cl)Br)Br 30,35,45,50,54,57,58,59,60,63,66,72,80,82,86,87,95,101,102,105,106,111,123,129,133,135,136,143,147,149,158,161,163,164
0000000008400021010085c000008011000180081b c1cc(oc1)c2nccs2 35,46,56,61,64,80,82,87,94,95,119,120,124,136,151,155,160,161,163,164
000020000000002002000000048000000000008016 CC1(C=C1)c2ccccc2 21,61,65,98,111,159,161,162,164
000000000000004050000020200040080000400009 N=O 62,68,70,93,101,118,123,150,160,163
0010800000000140414400202300c14b026108401f c1c[n+](=O)ccn1[O-] 12,23,48,62,64,70,74,78,93,96,97,101,112,118,119,120,121,123,126,129,136,141,142,147,158,160,161,162,163,164
000020000000008000008102800000000000040010 C1CC1S 21,63,80,87,89,103,146,164
00000000000000850000858000000000000100001a c1cocc1S 56,58,63,80,82,87,95,136,161,163,164
88042000080000b0140083028000a08801030c0110 C1CC1S1[U]CC1 3,7,10,21,35,60,61,63,66,68,80,81,87,89,103,117,119,123,127,128,136,137,146,147,152,164
000000000000000002020000008000000010108000 CC(C)(C)C 65,73,111,140,148,159
00000010000000001c008000000000080400000000 PS 28,66,67,68,87,123,130
000000000100000014018020000040080000400001 S=N 32,66,68,72,87,93,118,123,150,160
00000000880004010400800026008008000100101c c1ccosc1 35,39,50,56,66,87,97,98,101,119,123,136,156,162,163,164
000000100000000018000000000000080400000000 PP 28,67,68,123,130
000000100000000018000000000000080400000000 P=P 28,67,68,123,130
000000500002000010000000000000082000000000 PF 28,30,41,68,123,133
000000100000000018000000200000080404000008 PO 28,67,68,101,123,130,138,163
000000100000000010000000200000080000000008 P=O 28,68,101,123,163
000000100000000000000010000000000000008000 PC 28,92,159
000000100004000038000020000000080600400001 PNP 28,42,67,68,69,93,123,129,130,150,160
000000100000000018000820000000080400400001 [H]NP 28,67,68,83,93,123,130,150,160
0000800000000940e10400202200d14b02e128481f c1cc[n+](=O)n(c1)[O-] 23,48,51,62,64,69,70,71,74,93,97,101,112,116,118,119,120,121,123,126,129,136,141,142,143,147,149,155,158,160,161,162,163,164
000000002000200141d00ce02100800956a160281b c1(c(non1)N)N 37,53,56,64,70,76,78,79,82,83,93,94,95,96,101,119,120,123,129,130,132,134,136,141,143,149,150,155,157,160,161,163,164
080000000000200090000000200000080604004008 O[U][Np]O 3,53,68,71,101,123,129,130,138,158,163
000000000000000000020000000002000000108400 CCC 73,113,148,154,159
000000000000000000020000000002000000108400 C([2H])([2H])([2H])C([2H])([2H])C([2H])([2H])[2H] 73,113,148,154,159
000000000000000000008200800026000000048500 SCCC 81,87,103,113,114,117,146,152,154,159
00802000000000000004301008000c0300010ca111 CN1CC1 15,21,74,84,85,92,99,114,115,120,121,136,146,147,152,157,159,160,164
008020000000000000002200080000010001442111 [H]N1CC1 15,21,81,85,99,120,136,146,150,152,157,160,164
000000000600000000080000040000000000008000 C=C([Li])C 33,34,75,98,159
000000000400000000020000000000000000108000 CC([Li])C 34,73,148,159
000000000200000000000000040000000000008000 C=C([H])C 33,98,159
400100000000200010400820000000080620400001 N[Fe][W]N 6,8,53,68,78,83,93,123,129,130,141,150,160
0000000000000000014414800840a01300230c2513 n12cccc1.C2C3.n13cccc1 64,74,78,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164
0000000000000000004434800840a08301230c2511 N12CCCC1.C2C3.N13CCCC1 74,78,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164
00000010810030021c808820200010280620400009 NSOPN 28,32,39,52,53,57,66,67,68,79,83,87,93,101,116,123,125,129,130,141,150,160,163
0000000000000000018414800840a01300230c2513 n12cccc1.C2CC3.n13cccc1 64,74,79,82,84,95,99,110,117,119,120,121,124,136,137,141,146,147,152,154,157,160,161,164
0000000000000000008434800840a08301230c2511 N12CCCC1.C2CC3.N13CCCC1 74,79,82,84,85,95,99,110,117,119,120,121,127,128,136,137,141,146,147,152,154,157,160,164
000000000000000000008200000002000000008500 SC(C)[H] 81,87,113,152,154,159
000000080002000000000a00080000002000402501 NCF 27,41,81,83,99,133,150,152,154,157,160
000000000002000000000010000000002000008000 [H]CF 41,92,133,159
000000000002000000000000000002002000008500 CCF 41,113,133,152,154,159
000000080002000000000000000000002000000500 FCF 27,41,133,152,154
000000000000000000022010000000200000109008 COC 73,85,92,125,148,156,159,163
000000080002000000002010001004202000009508 COCF 27,41,85,92,108,114,125,133,152,154,156,159,163
000000000200000000000010040000200000009008 COC=C 33,92,98,125,156,159,163
000001000000000000000010000000200000009008 COC#C 16,92,125,156,159,163
000020000002000000004000000400002000040010 FC1CC1 21,41,86,106,133,146,164
000020000000000000004000400400002000040010 ClC1CC1 21,86,102,106,133,146,164
000020000020000000004000000400002000040010 BrC1CC1 21,45,86,106,133,146,164
000020040000000000004000000400002000040010 [I]C1CC1 21,26,86,106,133,146,164
000000100004000018000a22800002080600408501 NPPCC 28,42,67,68,81,83,89,93,103,113,123,129,130,150,152,154,159,160
000000100004000018000202a00002080604008508 OPPCC 28,42,67,68,81,89,101,103,113,123,129,130,138,152,154,159,163
000000100000000018000820000000080600408001 NPP=CC 28,67,68,83,93,123,129,130,150,159,160
000000100004000018000830000000080600408001 NPPC[H] 28,42,67,68,83,92,93,123,129,130,150,159,160
000000500020000018000200800002082600008500 BrPPCC 28,30,45,67,68,81,103,113,123,129,130,133,152,154,159
80040000000000000000020288402c01000144a911 CC1CCN1 7,10,81,89,99,103,110,114,115,117,120,136,146,150,152,155,157,159,160,164
800400000000200000000246884020050405447b19 C1CN[C@H]1C(=O)O 7,10,53,81,89,90,94,99,103,110,117,120,122,130,136,138,146,150,152,153,155,156,157,158,160,163,164
00002000000100000240080280c000001020442811 C1CC1(C#N)N 21,40,65,78,83,89,103,110,111,132,141,146,150,155,157,160,164
000000100004000010000222000042080600408501 N=PPCC 28,42,68,81,89,93,113,118,123,129,130,150,152,154,159,160
000000100000000010000822000002080200408501 NP=PCC 28,68,83,89,93,113,123,129,150,152,154,159,160
000000100000000010000200200002080200008508 O=PPCC 28,68,81,101,113,123,129,152,154,159,163
0000001000802000900000062092022c860c0ad708 CCOP(=O)(C(=O)O)O 28,47,53,68,71,89,90,101,105,108,111,113,122,123,125,129,130,135,138,139,145,147,152,153,154,156,158,159,163
000480000000000050200026204060081004040819 C1CC(=NO)C1 10,23,68,70,77,89,90,93,101,110,117,118,123,132,138,146,155,160,163,164
000000000840200000408fce88e8b801042344ab19 CC1(NCCS1)C(=O)N 35,46,53,78,80,81,82,83,87,89,90,91,94,95,99,103,107,109,110,111,115,116,117,119,120,130,136,137,141,146,150,152,153,155,157,159,160,163,164
0000000800000001000026c68870b0010903443119 C1COCN1 27,56,81,82,85,89,90,94,95,99,103,108,109,110,116,117,119,120,128,131,136,137,146,150,152,156,157,160,163,164
000400000000008000008106800020000000040010 C1CC(C1)S 10,63,80,87,89,90,103,117,146,164
000020000200020000080206041000000104201518 C=C1CC1CO 21,33,49,75,81,89,90,98,108,128,138,149,152,154,156,163,164
000020000000000000004206409400002104201518 C1C(C1(Cl)Cl)CO 21,81,86,89,90,102,106,108,111,128,133,138,149,152,154,156,163,164
0000040000100001002000080420d001000100b819 c1cccoc(C)n1 18,44,56,77,91,98,109,116,118,119,120,136,155,156,157,159,160,163,164
000000100000000000000010000000000000008000 PC 28,92,159
000000100000000000000010000000000000008000 P[CH3] 28,92,159
000000100000000000000010000000000000008000 P[C]([H])([H])[H] 28,92,159
000000100200000000000000000000000000000000 P=C 28,33
000000100000000000000020000000080000000001 [P]#[N] 28,93,123,160
000000100000000000000020000000080000000001 P#N 28,93,123,160
000000000000000000000000000000000000400001 [H]N[H] 150,160
0000000000000000010416c08850200308070c351b c1cccn1CCO 64,74,81,82,84,94,95,99,103,108,110,117,120,121,131,136,137,138,146,147,152,154,156,157,160,161,163,164
00000008004000000104b68209100003000708351b c1cccn1CSCO 27,46,64,74,81,82,84,85,87,89,95,96,99,108,120,121,136,137,138,147,152,154,156,157,160,161,163,164
000000080044000000008a00080000000400402501 SCN 27,42,46,81,83,87,99,130,150,152,154,157,160
00000000000000000104148008000203000108a513 CCn1cccc1 64,74,82,84,95,99,113,120,121,136,147,152,154,157,159,160,161,164
00000000000000000000081000000000000040a001 [H]CN 83,92,150,157,159,160
000000000044000000208000000040000400400001 SC=N 42,46,77,87,118,130,150,160
000004000000000000000000000020800100040010 C1CCCCCC1 18,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCCCCCC1 100,117,127,128,146,164
000000000000000000000000100020800100040010 C1CCCCCCCCCCCCC1 100,117,127,128,146,164
000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCC1 117,127,128,146,164
000000000000000000000000000020800100040010 C1CCCCCCCCCCCCCCC1 117,127,128,146,164
000000080004000000008200001000000404001508 S[CH2]O 27,42,81,87,108,130,138,152,154,156,163
000000000000000000000000000000000004001008 [H][CH]O 138,156,163
000000100000000000000200000002000000008500 CCP 28,81,113,152,154,159
000000000000000000000000000000000000008000 C[CH][2H] 159
00000000000000000002201000000000000050a001 [H]N(C)C 73,85,92,148,150,157,159,160
000000000000010000000000000000000004000008 [O-][2H] 48,138,163
000000000800000000008580100180100001000012 c1csc2c1csc2 35,80,82,87,95,100,104,119,124,136,161,164
000400000200020000080202041000800004001518 C=C1CC(C1)CO 10,33,49,75,81,89,98,108,127,138,152,154,156,163,164
80040000000000010000000000182a800901849518 CCC1CCO1 7,10,56,107,108,113,115,117,127,128,131,136,146,151,152,154,156,159,163,164
00802000000000010200000000982e800901949518 CCCC1(CO1)C 15,21,56,65,107,108,111,113,114,115,117,127,128,131,136,146,148,151,152,154,156,159,163,164
000020000200000000000000040020800100040410 C=CCCC1CC1 21,33,98,117,127,128,146,154,164
000000000200000000000000000000000000000208 O=C 33,153,163
000000000200000000000000000000008000004228 O=C.O=C 33,135,153,158,163,165
080000000004380038d00020020080090621400015 N1NN[U]NN1 3,42,51,52,53,67,68,69,76,78,79,93,97,119,120,123,129,130,136,141,150,160,162,164
000000000000000000008000000000000000000000 S[CH][2H] 87
000000000000000000000000000000000004000008 [OH] 138,163
000000000000000000000000000000000004000008 [O][3H] 138,163
000000000000000000000000000000000004000008 [3H][O][3H] 138,163
000000000000000000000000000000000000108000 CC 148,159
000000000000000000020000000002000000108400 CCC 73,113,148,154,159
000000000000000000000000000026000000148400 CCCC 113,114,117,146,148,154,159
000000000000000000000000000000000000108020 C.C.C.C.C.C.C.C.C.C.C.C 148,159,165
000000000000000000000000000000000000108000 C1.C1 148,159
fingerprint/inst/unitTests/report.txt 0000644 0001762 0000144 00000002461 11571714217 017645 0 ustar ligges users RUNIT TEST PROTOCOL -- Thu Jun 2 10:05:03 2011
***********************************************
Number of test functions: 16
Number of errors: 0
Number of failures: 0
1 Test Suite :
fingerprint fingerprint Unit Tests - 16 test functions, 0 errors, 0 failures
Details
***************************
Test Suite: fingerprint fingerprint Unit Tests
Test function regexp: ^test.+
Test file regexp: ^runit.+\.[rR]$
Involved directory:
/Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests
---------------------------
Test file: /Users/guhar/src/cdkr/fingerprint/tests/../inst/unitTests/runit.fp.R
test.and1: (1 checks) ... OK (0 seconds)
test.and2: (1 checks) ... OK (0 seconds)
test.distance1: (1 checks) ... OK (0 seconds)
test.distance2: (1 checks) ... OK (0 seconds)
test.fold1: (1 checks) ... OK (0.01 seconds)
test.fold2: (1 checks) ... OK (0.01 seconds)
test.fp.balance: (2 checks) ... OK (0 seconds)
test.fp.sim.matrix: (1 checks) ... OK (0.01 seconds)
test.fp.to.matrix: (1 checks) ... OK (0 seconds)
test.fps.reader: (324 checks) ... OK (0.56 seconds)
test.new.fp: (1 checks) ... OK (0 seconds)
test.not: (2 checks) ... OK (0 seconds)
test.or1: (1 checks) ... OK (0 seconds)
test.or2: (1 checks) ... OK (0 seconds)
test.xor1: (1 checks) ... OK (0.01 seconds)
test.xor2: (2 checks) ... OK (0.01 seconds)
fingerprint/tests/ 0000755 0001762 0000144 00000000000 13224262707 013750 5 ustar ligges users fingerprint/tests/doRUnit.R 0000644 0001762 0000144 00000004000 13224262707 015451 0 ustar ligges users if(require("RUnit", quietly=TRUE)) {
## --- Setup ---
pkg <- "fingerprint" # <-- Change to package name!
if(Sys.getenv("RCMDCHECK") == "FALSE") {
## Path to unit tests for standalone running under Makefile (not R CMD check)
## PKG/tests/../inst/unitTests
path <- file.path(getwd(), "..", "inst", "unitTests")
} else {
## Path to unit tests for R CMD check
## PKG.Rcheck/tests/../PKG/unitTests
path <- system.file(package=pkg, "unitTests")
}
cat("\nRunning unit tests\n")
print(list(pkg=pkg, getwd=getwd(), pathToUnitTests=path))
library(package=pkg, character.only=TRUE)
## If desired, load the name space to allow testing of private functions
## if (is.element(pkg, loadedNamespaces()))
## attach(loadNamespace(pkg), name=paste("namespace", pkg, sep=":"), pos=3)
##
## or simply call PKG:::myPrivateFunction() in tests
## --- Testing ---
## Define tests
testSuite <- defineTestSuite(name=paste(pkg, "fingerprint Unit Tests"),
dirs=path)
## Run
tests <- runTestSuite(testSuite)
## Default report name
pathReport <- file.path(path, "report")
## Report to stdout and text files
cat("------------------- UNIT TEST SUMMARY ---------------------\n\n")
printTextProtocol(tests, showDetails=FALSE)
#printTextProtocol(tests, showDetails=FALSE,
# fileName=paste(pathReport, "Summary.txt", sep=""))
#printTextProtocol(tests, showDetails=TRUE,
# fileName=paste(pathReport, ".txt", sep=""))
## Report to HTML file
#printHTMLProtocol(tests, fileName=paste(pathReport, ".html", sep=""))
## Return stop() to cause R CMD check stop in case of
## - failures i.e. FALSE to unit tests or
## - errors i.e. R errors
tmp <- getErrors(tests)
if(tmp$nFail > 0 | tmp$nErr > 0) {
stop(paste("\n\nunit testing failed (#test failures: ", tmp$nFail,
", #R errors: ", tmp$nErr, ")\n\n", sep=""))
}
} else {
warning("cannot run unit tests -- package RUnit is not available")
}
fingerprint/src/ 0000755 0001762 0000144 00000000000 13224262715 013374 5 ustar ligges users fingerprint/src/fpdistance.c 0000644 0001762 0000144 00000003307 13224262715 015663 0 ustar ligges users #include
#define X(_m,_i,_j,_nrow) _m[ _i + _nrow * _j ]
#define METRIC_TANIMOTO 1
#define METRIC_EUCLIDEAN 2
double d_tanimoto(double*,double*,int);
double d_euclidean(double*,double*,int);
void m_tanimoto(double *m, int *nrow, double *ret) {
int i,j;
for (i = 0; i < *nrow; i++) {
for (j = i+1; j < *nrow; j++) {
double mij = X(m, i,j, *nrow);
double mii = X(m, i,i, *nrow);
double mjj = X(m, j,j, *nrow);
X(ret, i, j, *nrow) = X(ret, j, i, *nrow) = mij / (mii+mjj-mij);
}
}
return;
}
/**
fp1 and fp2 should be an array of 1's and 0's, of
length equal to the size of the fingerprint
**/
void fpdistance(double *fp1, double *fp2, int *nbit, int *metric, double *ret) {
double r = 0.0;
switch(*metric) {
case METRIC_TANIMOTO:
r = d_tanimoto(fp1, fp2, *nbit);
break;
case METRIC_EUCLIDEAN:
r = d_euclidean(fp1, fp2, *nbit);
}
*ret = r;
return;
}
/**
http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
**/
double d_tanimoto(double *fp1, double *fp2, int nbit) {
int i;
int nc = 0;
int na = 0;
int nb = 0;
if (nbit <= 0) return(-1.0);
for (i = 0; i < nbit; i++) {
if (fp1[i] == 1 && fp2[i] == 1) nc++;
if (fp1[i] == 1 && fp2[i] == 0) na++;
if (fp2[i] == 1 && fp1[i] == 0) nb++;
}
return ((double) nc) / (double) (na + nb + nc);
}
/**
http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
**/
double d_euclidean(double *fp1, double *fp2, int nbit) {
int i;
int nc = 0;
int nd = 0;
if (nbit <= 0) return(-1.0);
for (i = 0; i < nbit; i++) {
if (fp1[i] == 1 && fp2[i] == 1) nc++;
if (fp1[i] == 0 && fp2[i] == 0) nd++;
}
return sqrt(((double) nc + (double) nd) / (double) nbit);
}
fingerprint/src/registerDynamicSymbol.c 0000644 0001762 0000144 00000000306 13224262715 020056 0 ustar ligges users #include
#include
#include
void R_init_markovchain(DllInfo* info) {
R_registerRoutines(info, NULL, NULL, NULL, NULL);
R_useDynamicSymbols(info, TRUE);
}
fingerprint/src/readfps.c 0000644 0001762 0000144 00000004343 13224262715 015170 0 ustar ligges users #include
#include
/* Bulk of the code provided by Andrew Dalke, modified
by me to be usable from R */
int bit_is_on(char*,int);
#define charmask(c) ((unsigned char)((c) & 0xff))
static int to_int(int c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
if (c >= 'A' && c <= 'F') {
return c - 'A' + 10;
}
if (c >= 'a' && c <= 'f') {
return c - 'a' + 10;
}
return -1;
}
SEXP parse_hex(SEXP hexstr, SEXP hexlen) {
int i,j;
const char *argbuf;
int arglen;
argbuf = (const char*) CHAR(STRING_ELT(hexstr,0));
arglen = INTEGER(hexlen)[0];
char* retbuf = (char*) R_alloc(arglen/2, sizeof(char));
for (i=j=0; i < arglen; i += 2) {
int top = to_int(charmask(argbuf[i]));
int bot = to_int(charmask(argbuf[i+1]));
if (top == -1 || bot == -1) {
return R_NilValue;
}
retbuf[j++] = (top << 4) + bot;
}
// determine the number of on bits
int n_on = 0;
for (i = 0; i < arglen*4; i++) if (bit_is_on(retbuf, i)) n_on++;
// now, we save the positions of the bits
int *bitpos = (int*) R_alloc(n_on, sizeof(int));
j = 0;
for (i = 0; i < arglen*4; i++) {
if (bit_is_on(retbuf, i)) bitpos[j++] = i;
}
SEXP retsexp;
PROTECT(retsexp = allocVector(INTSXP, n_on));
for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i];
UNPROTECT(1);
return(retsexp);
}
int bit_is_on(char *fp, int B) {
return fp[B / 8] >> (B%8) & 0x01;
}
SEXP parse_jchem_binary(SEXP bstr, SEXP len) {
int i,j;
const char *argbuf;
int arglen;
argbuf = (const char*) CHAR(STRING_ELT(bstr,0));
arglen = INTEGER(len)[0];
// determine number of 1's
int n_on = 0;
i = 0;
while (i < arglen) {
if (argbuf[i++] == 9) break;
}
int startPos = i;
while (i < arglen) {
if (argbuf[i++] == 49) n_on++;
}
// no get the actual bit positions
int *bitpos = (int*) R_alloc(n_on, sizeof(int));
int bitIdx = 0;
j = 0;
for (i = startPos; i < arglen; i++) {
int c = argbuf[i];
if (c != 49 && c != 48) continue;
if (c == 49) bitpos[j++] = bitIdx;
bitIdx++;
}
SEXP retsexp;
PROTECT(retsexp = allocVector(INTSXP, n_on));
for (i = 0; i < n_on; i++) INTEGER(retsexp)[i] = bitpos[i];
UNPROTECT(1);
return(retsexp);
}
fingerprint/NAMESPACE 0000644 0001762 0000144 00000001135 13160063475 014025 0 ustar ligges users importFrom("methods", "new")
exportClasses("fingerprint")
exportClasses("featvec")
exportClasses("feature")
exportMethods("fold", "euc.vector", "distance",
"random.fingerprint", "as.character",
"length",
"feature", "count", 'feature<-', 'count<-')
export("fp.sim.matrix", "fp.to.matrix", "fp.factor.matrix",
"fp.read.to.matrix", "fp.read","shannon",
##"featvec.to.binaryfp",
"moe.lf", "bci.lf", "cdk.lf", "ecfp.lf", "fps.lf", "jchem.binary.lf",
"bit.spectrum", "balance", "bit.importance")
useDynLib(fingerprint,.registration = TRUE)
fingerprint/R/ 0000755 0001762 0000144 00000000000 13156525101 013001 5 ustar ligges users fingerprint/R/balance.R 0000644 0001762 0000144 00000000720 11447374434 014524 0 ustar ligges users balance <- function(fplist) {
if (is.list(fplist)) {
lapply(fplist, function(fp) {
compl <- !fp
new('fingerprint',
nbit = 2 * length(fp),
bits = c(fp@bits, compl@bits+length(fp)),
provider='R', name='balanced')
})
} else {
fp <- fplist
compl <- !fp
new('fingerprint',
nbit = 2 * length(fp),
bits = c(fp@bits, compl@bits+length(fp)),
provider='R', name='balanced')
}
}
fingerprint/R/featurefp.R 0000644 0001762 0000144 00000003647 12224262153 015117 0 ustar ligges users ## A feature fingerprint will be a vector of feature objects
setClass("featvec",
representation(features="list",
provider="character",
name="character",
misc="list"),
validity=function(object) {
## features must be a list of feature objects
klasses <- unique(sapply(object@features, class))
if (length(klasses) != 1 || klasses != 'feature')
return("Must supply a list of 'feature' objects")
iss4s <- sapply(object@features, isS4)
if (!all(iss4s))
return("Must supply a list of 'feature' objects")
return(TRUE)
},
prototype(features=list(),
provider="",
name="",
misc=list()))
setMethod('show', 'featvec',
function(object) {
cat("Feature fingerprint\n")
cat(" name = ", object@name, "\n")
cat(" source = ", object@provider, "\n")
cat(" features = ", paste(sapply(object@features, as.character), collapse=' '), "\n")
})
setMethod('as.character', 'featvec', function(x) {
return(paste(sapply(x@features, as.character), collapse=' '))
})
setMethod("length", "featvec", function(x) {
length(x@features)
})
## featvec.to.binaryfp <- function(fps, bit.length = 256) {
## if (!all(sapply(fps, class) == 'featvec'))
## stop("Must supply a list of feature vector fingerprints")
## ## get all the features
## features <- sort(unique(unlist(lapply(fps, as.numeric))))
## nbit <- length(features)
## if (nbit %% 2 == 1) nbit <- nbit + 1
## ## based on the entire feature set, convert original fps to binary fps
## fps <- lapply(fps, function(x) {
## bitpos <- match(as.numeric(x), features)
## new("fingerprint", nbit=nbit, folded=FALSE, provider=x@provider,name=x@name, bits=bitpos)
## })
## return(fps)
## }
fingerprint/R/bitimp.R 0000644 0001762 0000144 00000000553 11447374434 014427 0 ustar ligges users bit.importance <- function(actives, background) {
bs.actives <- bit.spectrum(actives)
bs.background <- bit.spectrum(background)
m <- length(actives)
n <- length(background)
pa <- (m*bs.actives+bs.background)/(m+1)
pb <- (n*bs.background+bs.actives)/(n+1)
kl <- pa * log(pa/pb) + (1-pa) * log( (1-pa)/(1-pb) )
kl[is.nan(kl)] <- NA
return(kl)
}
fingerprint/R/feature.R 0000644 0001762 0000144 00000003421 13003502273 014553 0 ustar ligges users ## Define a feature and its count
setClass("feature",
contains = 'integer',
representation(feature='character',
count='integer'),
validity=function(object) {
if (is.na(object@feature) || is.null(object@feature)) return("feature must be a string")
if (object@count < 0) return("count must be zero or a positive integer")
return(TRUE)
},
prototype(feature='', count=as.integer(1))
)
setMethod('show', 'feature',
function(object) {
cat(sprintf('%s:%d', object@feature, object@count), '\n')
})
setMethod('as.character', signature(x='feature'), function(x) sprintf("%s:%d", x@feature, x@count))
setMethod('c', signature(x='feature'), function(x, ..., recursive=FALSE) {
elems <- list(x, ...)
ret <- list()
for (i in seq_along(elems)) {
ret[[i]] <- new("feature", feature=elems[[i]]@feature, count=as.integer(elems[[i]]@count))
}
return(ret)
})
## getters/setters
setGeneric("feature", function(object) standardGeneric("feature"))
setMethod("feature", "feature", function(object) object@feature)
setGeneric("feature<-", function(this, value) standardGeneric("feature<-"))
setReplaceMethod("feature", signature=signature("feature", "character"),
function(this, value) {
this@feature <- value
this
})
setGeneric("count", function(object) standardGeneric("count"))
setMethod("count", "feature", function(object) object@count)
setGeneric("count<-", function(this, value) standardGeneric("count<-"))
setReplaceMethod("count", signature=signature("feature", "numeric"),
function(this, value) {
this@count <- as.integer(value)
this
})
fingerprint/R/ops.R 0000644 0001762 0000144 00000003116 11447374434 013742 0 ustar ligges users setMethod("&", c("fingerprint", "fingerprint"),
function(e1, e2) {
if (e1@nbit != e2@nbit)
stop("fp1 & fp2 must of the same bit length")
andbits <- intersect(e1@bits, e2@bits)
new("fingerprint",
bits=andbits,
nbit=e1@nbit,
provider="R")
})
setMethod("|", c("fingerprint", "fingerprint"),
function(e1, e2) {
if (e1@nbit != e2@nbit)
stop("fp1 & fp2 must of the same bit length")
orbits <- union(e1@bits, e2@bits)
new("fingerprint",
bits=orbits,
nbit=e1@nbit,
provider="R")
})
setMethod("!", c("fingerprint"),
function(x) {
bs <- 1:(x@nbit)
if (length(x@bits) > 0) b <- bs[ -x@bits ]
else b <- bs
ret <- new("fingerprint",
bits=b,
nbit=x@nbit,
provider="R")
return(ret)
})
setMethod("xor", c("fingerprint", "fingerprint"),
function(x,y) {
if (x@nbit != y@nbit)
stop("e1 & e2 must of the same bit length")
tmp1 <- rep(FALSE, x@nbit)
tmp2 <- rep(FALSE, y@nbit)
tmp1[x@bits] <- TRUE
tmp2[y@bits] <- TRUE
tmp3 <- xor(tmp1,tmp2)
xorbits <- which(tmp3)
new("fingerprint",
bits=xorbits,
nbit=x@nbit,
provider="R")
})
fingerprint/R/fingerprint.R 0000644 0001762 0000144 00000004254 11571707721 015471 0 ustar ligges users setClass("fingerprint",
representation(bits="numeric",
nbit="numeric",
folded="logical",
provider="character",
name="character",
misc="list"),
validity=function(object) {
if (any(object@bits > object@nbit))
return("Bit positions were greater than the specified bit length")
else return(TRUE)
},
prototype(bits=c(),
nbit=0,
folded=FALSE,
provider="",
name="",
misc=list()))
#setGeneric("show", function(object) standardGeneric("show"))
setMethod("show", "fingerprint",
function(object) {
cat("Fingerprint object\n")
cat(" name = ", object@name, "\n")
cat(" length = ", object@nbit, "\n")
cat(" folded = ", object@folded, "\n")
cat(" source = ", object@provider, "\n")
cat(" bits on = ", paste(sort(object@bits), collapse=' '), "\n")
})
setMethod('as.character', "fingerprint",
function(x) {
s <- numeric(x@nbit)
s[x@bits] <- 1
paste(s,sep='',collapse='')
})
setMethod("length", "fingerprint",
function(x) {
x@nbit
})
parseCall <- function (obj)
{
if (class(obj) != "call") {
stop("Must supply a 'call' object")
}
srep <- deparse(obj)
if (length(srep) > 1)
srep <- paste(srep, sep = "", collapse = "")
fname <- unlist(strsplit(srep, "\\("))[1]
func <- unlist(strsplit(srep, paste(fname, "\\(", sep = "")))[2]
func <- unlist(strsplit(func, ""))
func <- paste(func[-length(func)], sep = "", collapse = "")
func <- unlist(strsplit(func, ","))
vals <- list()
nms <- c()
cnt <- 1
for (args in func) {
arg <- unlist(strsplit(args, "="))[1]
val <- unlist(strsplit(args, "="))[2]
arg <- gsub(" ", "", arg)
val <- gsub(" ", "", val)
vals[[cnt]] <- val
nms[cnt] <- arg
cnt <- cnt + 1
}
names(vals) <- nms
vals
}
fingerprint/R/bitspec.R 0000644 0001762 0000144 00000001316 11551603371 014561 0 ustar ligges users bit.spectrum <- function(fplist) {
if (class(fplist) != 'list') stop("Must provide a list of fingerprint objects")
if (any(unlist(lapply(fplist, class)) != 'fingerprint'))
stop("Must provide a list of fingerprint objects");
nbit <- length(fplist[[1]])
spec <- numeric(nbit)
for (i in 1:length(fplist)) {
bits <- fplist[[i]]@bits
spec[bits] <- spec[bits]+1
}
spec / length(fplist)
}
shannon <- function(fplist) {
if (class(fplist) != 'list') stop("Must provide a list of fingerprint objects")
if (any(unlist(lapply(fplist, class)) != 'fingerprint'))
stop("Must provide a list of fingerprint objects");
bs <- bit.spectrum(fplist)
bs <- bs[ bs != 0 ]
-1 * sum( bs * log2(bs) )
}
fingerprint/R/misc.R 0000644 0001762 0000144 00000021100 12777037330 014063 0 ustar ligges users
setGeneric("fold", function(fp) standardGeneric("fold"))
setMethod("fold", "fingerprint",
function(fp) {
size <- fp@nbit
if (size %% 2 != 0) {
stop('Need to supply a fingerprint of even numbered length')
}
bfp <- rep(FALSE, size)
bfp[fp@bits] <- TRUE
subfplen <- size/2
b1 <- which(bfp[1:subfplen])
b2 <- which(bfp[(subfplen+1):size])
subfp1 <- new("fingerprint",
nbit=subfplen,
bits=b1,
provider="R");
subfp2 <- new("fingerprint",
nbit=subfplen,
bits=b2,
provider="R")
foldedfp <- subfp1 | subfp2
foldedfp@folded <- TRUE
return(foldedfp)
})
setGeneric("euc.vector", function(fp) standardGeneric("euc.vector"))
setMethod("euc.vector", "fingerprint",
function(fp) {
coord <- rep(0,length(fp))
coord[fp@bits] <- 1.0 / sqrt(length(fp))
coord
})
setGeneric("distance", function(fp1,fp2,method,a,b) standardGeneric("distance"))
setMethod("distance", c("featvec", "featvec", "missing", "missing", "missing"),
function(fp1, fp2) {
distance(fp1, fp2, "tanimoto" )
})
setMethod("distance", c("featvec", "featvec", "character", "missing", "missing"),
function(fp1, fp2, method=c("tanimoto", "dice", "robust")) {
method <- match.arg(method)
n1 <- length(fp1)
n2 <- length(fp2)
## extract the feature strings, ignoring counts for now
f1 <- sapply(fp1@features, function(x) x@feature)
f2 <- sapply(fp2@features, function(x) x@feature)
n12 <- length(intersect(f1,f2))
if (method == 'tanimoto') {
return(n12/(n1+n2-n12))
} else if (method == "robust") {
return(0.5 + 0.5 * n12 * n12 / (n1*n2))
} else if (method == "dice") {
return(2.0 * n12 / (n1+n2))
}
})
setMethod("distance", c("fingerprint", "fingerprint", "missing", "missing", "missing"),
function(fp1,fp2) {
distance(fp1,fp2,"tanimoto")
})
setMethod("distance", c("fingerprint", "fingerprint", "character", "numeric", "numeric"),
function(fp1, fp2, method="tversky", a, b) {
if (!is.null(method) && !is.na(method) && method != "tversky") distance(fp1, fp2, method)
if ( length(fp1) != length(fp2))
stop("Fingerprints must of the same bit length")
if (a < 0 || b < 0) stop("a and b must be positive")
tmp <- fp1 & fp2
xiy <- length(tmp@bits)
tmp <- fp1 | fp2
xuy <- length(tmp@bits)
x <- length(fp1@bits)
y <- length(fp2@bits)
return( xiy / (a*x + b*y + (1-a-b)*xiy ) )
})
setMethod("distance", c("fingerprint", "fingerprint", "character", "missing", "missing"),
function(fp1,fp2, method=c('tanimoto', 'euclidean', 'mt',
'simple', 'jaccard', 'dice',
'russelrao', 'rodgerstanimoto','cosine',
'achiai', 'carbo', 'baroniurbanibuser',
'kulczynski2',
'hamming', 'meanHamming', 'soergel',
'patternDifference', 'variance', 'size', 'shape',
'hamann', 'yule', 'pearson', 'dispersion',
'mcconnaughey', 'stiles',
'simpson', 'petke',
'stanimoto', 'seuclidean'
)) {
if (method == 'tversky')
stop("If Tversky metric is desired, must specify a and b")
if ( length(fp1) != length(fp2))
stop("Fingerprints must of the same bit length")
method <- match.arg(method)
n <- length(fp1)
if (method == 'tanimoto') {
f1 <- numeric(n)
f2 <- numeric(n)
f1[fp1@bits] <- 1
f2[fp2@bits] <- 1
sim <- 0.0
ret <- .C("fpdistance", as.double(f1), as.double(f2),
as.integer(n), as.integer(1),
as.double(sim),
PACKAGE="fingerprint")
return (ret[[5]])
} else if (method == 'euclidean') {
f1 <- numeric(n)
f2 <- numeric(n)
f1[fp1@bits] <- 1
f2[fp2@bits] <- 1
sim <- 0.0
ret <- .C("fpdistance", as.double(f1), as.double(f2),
as.integer(n), as.integer(2),
as.double(sim),
PACKAGE="fingerprint")
return (ret[[5]])
}
size <- n
## in A & B
tmp <- fp1 & fp2
c <- length(tmp@bits)
## in A not in B
tmp <- (fp1 | fp2) & !fp2
a <- length(tmp@bits)
## in B not in A
tmp <- (fp1 | fp2) & !fp1
b <- length(tmp@bits)
## not in A, not in B
tmp <- !(fp1 | fp2)
d <- length(tmp@bits)
dist <- NULL
## Simlarity
if (method == 'stanimoto') {
dist <- c / (a+b+c)
} else if (method == 'seuclidean') {
dist <- sqrt((d+c) / (a+b+c+d))
} else if (method == 'dice') {
dist <- c / (.5*a + .5*b + c)
} else if (method == 'mt') {
t1 <- c/(size-d)
t0 <- d/(size-c)
phat <- ((size-d) + c)/(2*size)
dist <- (2-phat)*t1/3 + (1+phat)*t0/3
} else if (method == 'simple') {
dist <- (c+d)/n
} else if (method == 'jaccard') {
dist <- c/(a+b+c)
} else if (method == 'russelrao') {
dist <- c/size
} else if (method == 'rodgerstanimoto') {
dist <- (c+d)/(2*a+2*b+c+d)
} else if (method == 'cosine' || method == 'achiai' || method == 'carbo') {
dist <- c/sqrt((a+c)*(b+c))
} else if (method == 'baroniurbanibuser') {
dist <- (sqrt(c*d)+c)/(sqrt(c*d)+a+b+c)
} else if (method == 'kulczynski2') {
dist <- .5*(c/(a+c)+c/(b+c))
}
## Dissimilarity
else if (method == 'hamming') {
dist <- a+b
} else if (method == 'meanHamming') {
dist <- (a+b)/(a+b+c+d)
}else if (method == 'soergel') {
dist <- (a+b)/(a+b+c)
} else if (method == 'patternDifference') {
dist <- (a*b)/(a+b+c+d)^2
} else if (method == 'variance') {
dist <- (a+b)/(4*n)
} else if (method == 'size') {
dist <- (a-b)^2/n^2
} else if (method == 'shape') {
dist <- (a+b)/n-((a-b)/(n))^2
}
## Composite
else if (method == 'hamann') {
dist <- (c+d-a-b)/(a+b+c+d)
} else if (method == 'yule') {
dist <- (c*d-a*b)/(c*d+a*b)
} else if (method == 'pearson') {
dist <- (c*d-a*b)/sqrt((a+c)*(b+c)*(a+d)*(b+d))
} else if (method == 'dispersion') {
dist <- (c*d-a*b)/n^2
} else if (method == 'mcconaughey') {
dist <- (c^2-a*b)/((a+c)*(b+c))
} else if (method == 'stiles') {
dist <- log10(n*(abs(c*d-a*b)-n/2)^2/((a+c)*(b+c)*(a+d)*(b+d)))
}
## Asymmetric
else if (method == 'simpson') {
dist <- c/min((a+c),(b+c))
} else if (method == 'petke') {
dist <- c/max((a+c),(b+c))
}
dist
})
setGeneric("random.fingerprint",
function(nbit, on) standardGeneric("random.fingerprint"))
setMethod("random.fingerprint", c("numeric", "numeric"),
function(nbit, on) {
if (nbit <= 0) stop("Bit length must be positive integer")
if (on <= 0) stop("Number of bits to be set to 1 must be positive integer")
bits <- sample(1:nbit, size=on)
new("fingerprint", nbit=nbit, bits=bits, provider="R", folded=FALSE)
})
fingerprint/R/read.R 0000644 0001762 0000144 00000007146 13156525101 014047 0 ustar ligges users jchem.binary.lf <- function(line) {
molid <- strsplit(line, "\t")[[1]][1]
bitpos <- .Call("parse_jchem_binary", as.character(line), as.integer(nchar(line)) )
if (is.null(bitpos)) return(NULL)
list(molid, bitpos+1, list()) ## we add 1, since C does bit positions from 0
}
fps.lf <- function(line) {
toks <- strsplit(line, "\\s")[[1]];
bitpos <- .Call("parse_hex", as.character(toks[1]), as.integer(nchar(toks[1])))
if (is.null(bitpos)) return(NULL)
if (length(toks) > 2) {
misc <- list(toks[-c(1,2)])
} else { misc <- list() }
list(toks[2], bitpos+1, misc) ## we add 1, since C does bit positions from 0
}
cdk.lf <- function(line) {
p <- regexpr("{([0-9,\\s]*)}",line,perl=T)
s <- gsub(',','',substr(line, p+1, p+attr(p,"match.length")-2))
s <- lapply( strsplit(s,' '), as.numeric )
molid <- gsub("\\s+","", strsplit(line, "\\{")[[1]][1])
list(molid, s[[1]], list())
}
moe.lf <- function(line) {
p <- regexpr("\"([0-9\\s]*)\"",line, perl=T)
s <- substr(line, p+1, p+attr(p,"match.length")-2)
s <- lapply( strsplit(s,' '), as.numeric )
list(NA, s[[1]], list())
}
bci.lf <- function(line) {
tokens <- strsplit(line, '\\s')[[1]]
name <- tokens[1]
tokens <- tokens[-c(1, length(tokens), length(tokens)-1)]
list(name, as.numeric(tokens), list())
}
ecfp.lf <- function(line) {
tokens <- strsplit(line, '\\s')[[1]]
name <- tokens[1]
tokens <- tokens[-1]
list(name, tokens, list())
}
## TODO we should be iterating over lines and not reading
## them all in
fp.read <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE) {
lf.name <- deparse(substitute(lf))
provider <- lf.name
fplist <- list()
fcon <- file(description=f,open='r')
lines = readLines(fcon,n=-1)
if (header && lf.name != 'fps.lf') lines = lines[-1]
if (lf.name == 'fps.lf') {
binary <- TRUE
size <- NULL
## process the header block
nheaderline = 0
for (line in lines) {
if (substr(line,1,1) != '#') break
nheaderline <- nheaderline + 1
if (nheaderline == 1 && length(grep("#FPS1", line)) != 1) stop("Invalid FPS format")
if (length(grep("#num_bits", line)) == 1) size <- as.numeric(strsplit(line, '=')[[1]][2])
if (length(grep("#software", line)) == 1) provider <- as.character(strsplit(line, '=')[[1]][2])
}
lines <- lines[ (nheaderline+1):length(lines) ]
if (is.null(size)) { # num_bit
size <- nchar(strsplit(line, '\\s')[[1]][1]) * 4
}
}
c = 1
for (line in lines) {
dat <- lf(line)
if (is.null(dat)) {
warning(sprintf("Couldn't parse: %s", line))
next
}
if (is.na(dat[[1]])) name <- ""
else name <- dat[[1]]
misc <- dat[[3]] ## usually empty
if (binary) {
fplist[[c]] <- new("fingerprint",
nbit=size,
bits=as.numeric(dat[[2]]),
folded=FALSE,
provider=provider,
name=name,
misc=misc)
} else {
## convert the features to 'feature' objects
feats <- lapply(dat[[2]], function(x) new("feature", feature=x))
fplist[[c]] <- new("featvec",
features=feats,
provider=provider,
name=name,
misc=misc)
}
c <- c+1
}
close(fcon)
fplist
}
## Need to supply the length of the bit string since fp.read does
## not provide that information
fp.read.to.matrix <- function(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE) {
fplist <- fp.read(f, size, lf, header)
fpmat <- fp.to.matrix(fplist)
fpmat
}
fingerprint/R/zzz.R 0000644 0001762 0000144 00000000041 12224316213 013751 0 ustar ligges users .onLoad <- function(lib, pkg) {}
fingerprint/R/matrix.R 0000644 0001762 0000144 00000004170 13156240271 014434 0 ustar ligges users fp.sim.matrix <- function(fplist, fplist2=NULL, method='tanimoto') {
sim <- NA
if (!is.null(fplist2)) {
sim <- do.call('rbind', lapply(fplist,
function(fp) unlist(lapply(fplist2,
function(x) distance(x,fp, method=method)))))
##diag(sim) <- 1.0
return(sim)
}
if (method == 'dice') {
sim <- .dice.sim.mat(fplist)
} else if (method == 'tanimoto') {
sim <- .tanimoto.sim.mat(fplist)
} else {
sim <- matrix(0,nrow=length(fplist), ncol=length(fplist))
for (i in 1:(length(fplist)-1)) {
v <- unlist(lapply( fplist[(i+1):length(fplist)], distance, fp2=fplist[[i]], method=method))
sim[i,(i+1):length(fplist)] <- v
sim[(i+1):length(fplist),i] <- v
}
}
diag(sim) <- 1.0
return(sim)
}
## Takes the fingerprints, P bits, for a set of N molecules supplied as
## a list structure and creates an N x P matrix
fp.to.matrix <- function( fplist ) {
size <- fplist[[1]]@nbit
m <- matrix(0, nrow=length(fplist), ncol=size)
cnt <- 1
for ( i in fplist ) {
m[cnt,i@bits] <- 1
cnt <- cnt + 1
}
m
}
fp.factor.matrix <- function( fplist ) {
size <- fplist[[1]]@nbit
m <- data.frame(fp.to.matrix(fplist))
m[] <- lapply(m, factor, levels=0:1)
m
}
.dice.sim.mat <- function(fplist) {
m <- fp.to.matrix(fplist)
mat<-m%*%t(m)
len<-length(m[,1])
s<-mat.or.vec(len,len)
rs<-rowSums(m) #since its is binary just add the row values.
for (i in 1:(len-1)) {
for (j in (i+1):len) {
s[i,j]=(2*(mat[i,j])/(rs[i]+rs[j]))
s[j,i]=s[i,j]
}
}
diag(s) <- 1.0
return(s)
}
.tanimoto.sim.mat <- function(fplist){
m <- fp.to.matrix(fplist)
mat<-m%*%t(m)
len<-length(m[,1])
s<-mat.or.vec(len,len)
ret <- .C("m_tanimoto", as.double(mat), as.integer(len), as.double(s),
PACKAGE="fingerprint")
ret <- matrix(ret[[3]], nrow=len, ncol=len, byrow=TRUE)
return(ret)
## for (i in 1:len){
## for (j in 1:len){
## s[i,j]<- mat[i,j]/(mat[i,i]+mat[j,j]-mat[i,j]) # Formula for Tanimoto Calculation
## }
## }
## return(s)
}
fingerprint/MD5 0000644 0001762 0000144 00000004574 13224521352 013122 0 ustar ligges users aa63c800ee74199d6ec4219e2803a9da *ChangeLog
baf4c2adc631eea046d95eaa804c1ade *DESCRIPTION
273a1345a5199deee0497cc556812bde *INDEX
836321b1e72589b01e942d88414a1401 *NAMESPACE
f28cc7591c8d64e99d90d9f91383a1c6 *R/balance.R
2c642bc1f05561df93d2c76a9fae1748 *R/bitimp.R
0c94477e5334b69df55f95f0e5b86afc *R/bitspec.R
702410c34c11fa12b05cda292e710a5b *R/feature.R
c4e0d4b459b8de6a1a982b6cc873d964 *R/featurefp.R
b1ea2b3bb57e131aa33a2b58ebc8c85a *R/fingerprint.R
654f051f7cffd07f87bab08eef19dd14 *R/matrix.R
2ff890be163c9de10671cc8f11e0b868 *R/misc.R
8972e18553803e569bf2eb11d586f7af *R/ops.R
b57f70112f24a45b0b58464b880003c4 *R/read.R
fe6078d069c08a91d845aaf6fafc073f *R/zzz.R
334f0de6ed55dc79f59addf091097353 *inst/unitTests/Makefile
f22df9f3397f61d3e9679ad049e14c83 *inst/unitTests/bits.fps
23d32aef831e39a3c1dfe0f0c1a442b7 *inst/unitTests/report.html
5b24b494bcddc74dc9ec078e32306dc5 *inst/unitTests/report.txt
745f49fd8ffeedc56e89413c6487279d *inst/unitTests/reportSummary.txt
c7b11a520d31f1e0a0a38ccaee75b0f1 *inst/unitTests/runit.fp.R
c7fc4f4bf4a1061b4d609cf20c3b0f34 *inst/unitTests/test.ecfp
0efea50e6b9d5435b4b9704d4391cacd *man/balance.Rd
d76542452b5b085238f6708e4c24593c *man/bitimp.Rd
d839bab10da6f334946a447b19f4b559 *man/bitspec.Rd
b122ce42392d47066017465710d513f7 *man/c.Rd
9807f646ce5102d3e5ea14a9c7e77cbf *man/count-methods.Rd
6b353c421c70366b7eab59c1e7633338 *man/distance-methods.Rd
5c5c1ad2b1efbef8378736a3155f3ace *man/facmat.Rd
8ae5afaebabcc29868fa0d10e6fa0f88 *man/feature-methods.Rd
d8c5a144cb28883bc0ce058b79ace5f6 *man/feature.Rd
79473fa713f0131f2a8eeaf07a92a5cb *man/featvec.Rd
49b6393f52ba81a7d27388712fa5311a *man/fingerprint.Rd
6494638b1a4553d6535a5ffb7ce3bd17 *man/fold.Rd
628d41caa54adf763db1ab716b5733b8 *man/fplogical.Rd
37e3fca205a20eed21d28da853c0f3ef *man/length.Rd
f195b3813a235780f5de974030e1635d *man/linefunc.Rd
4ff20ca34f27226cf8eea5286100602f *man/mat.Rd
9c6c3ba8ac30ddb3b14803c93dce2a74 *man/read.Rd
ce65a09d4194e1d28a28c83a6bc77866 *man/rndfp.Rd
a0e79a3d465933089eed74a7f56ac5c4 *man/shannon.Rd
9f69d49ea6ad6a8b2c69b696a8c2715a *man/show.Rd
a0c7e4abd4c8f6c862240cc08764cd50 *man/sim.Rd
531d7aae2eb984e674968da4b856a60e *man/string.Rd
ca09bc634ca0b37f30dda125555410f6 *man/vec.Rd
7afe32c102f466fbc046191637bc6229 *src/fpdistance.c
3c7a939828404a253b652bf1191c9f60 *src/readfps.c
f0b1c7e2062664dbefc7d59abc6a91c4 *src/registerDynamicSymbol.c
7dae096c648f491093bf635c811d5658 *tests/doRUnit.R
fingerprint/DESCRIPTION 0000644 0001762 0000144 00000002251 13224521352 014306 0 ustar ligges users Package: fingerprint
Version: 3.5.7
Date: 2018-01-06
Title: Functions to Operate on Binary Fingerprint Data
Author: Rajarshi Guha
Maintainer: Rajarshi Guha
BugReports: https://github.com/rajarshi/cdkr/issues
Description: Functions to manipulate binary fingerprints
of arbitrary length. A fingerprint is represented by an object of S4 class 'fingerprint'
which is internally represented a vector of integers, such
that each element represents the position in the fingerprint that is set to 1.
The bitwise logical functions in R are overridden so that they can be used directly
with 'fingerprint' objects. A number of distance metrics are also
available (many contributed by Michael Fadock). Fingerprints
can be converted to Euclidean vectors (i.e., points on the unit hypersphere) and
can also be folded using OR. Arbitrary fingerprint formats can be handled via line
handlers. Currently handlers are provided for CDK, MOE and BCI fingerprint data.
License: GPL
Depends: methods
LazyLoad: yes
Suggests: RUnit
NeedsCompilation: yes
Packaged: 2018-01-07 00:11:57 UTC; guhar
Repository: CRAN
Date/Publication: 2018-01-07 22:44:58 UTC
fingerprint/ChangeLog 0000644 0001762 0000144 00000011052 12233504637 014357 0 ustar ligges users 2013-10-28 Guha
* src/fpdistance.c (m_tanimoto): Reduced the number of iterations
based on comments from John May
2013-10-27 Guha
* R/matrix.R (.tanimoto.sim.mat): Updated Tanimoto matrix code
form Abhik Seal to use C code for nested loops.
2013-10-22 Rajarshi Guha
* R/matrix.R (fp.sim.matrix): Updated to use the new matrix
multiplication based Tanimoto calculation for similarity matrices
contributed by Abhik Seal
2013-10-06 Rajarshi Guha
* R/zzz.R (.onLoad): Removed unecessary usage of require
* R/misc.R: Removed debug code
2013-10-05 Rajarshi Guha
* Added the 'feature' class to represent alphanumeric features
(usually substructures but could be arbitrary hashes) and
their counts
* Updated the 'featvec' fingerprint class to use 'feature'
objects
* Removed featvec.to.binary since the featvec fingerprint
type can also include non numeric features
2013-04-05 Rajarshi Guha
* Updated package to remove use of deprecated methods
2012-10-30 Rajarshi Guha
* R/matrix.R (fp.sim.matrix): Added code provided by Abhik Seal
to speed up pairwise similarity matrix calculation when the Dice
metric is specified.
2012-02-21 Rajarshi Guha
* R/read.R (jchem.binary.lf): Added a line parser for JChem binary
string formatted fingerprints. This is based on a C function to
parse the fingerprint portion of a line
2011-07-26 Rajarshi Guha
* man/sim.Rd: Updated man page for fp.sim.matrix to indicate the
use of two fingerprint lists
* R/matrix.R (fp.sim.matrix): Updated similarity matrix
calculation to support cross-similarity (ie, similarity matrix
from two (possibly different lengths) lists of fingerprints
2011-06-03 Rajarshi Guha
* src/fpdistance.c: Cleaned up uncessary headers and unused variables
* src/readfps.c: Cleaned up unecessary headers
2011-06-02 Rajarshi Guha
* R/read.R (fp.read): Updated line functions to return a third
component that can be used to return the remainder of a line if a
format allows other items than just a title and fingerprint.
(fps.lf): Updated FPS line function to actually return remaining
components of a fingerprint line. Updated main reader to set the
misc field of a fingerprint object to hold this list
2011-06-01 Rajarshi Guha
* src/readfps.c (parse_hex): Added a C function to parse
hex-encoded fingerprints from the FPS format and return the bit
positions that are set to 1
* R/read.R (fps.lf): Added a new line parser to handle the
fingerprint lines from the FPS format
(fp.read): Updated main fingerprint reader to handle the
multi-line header from FPS format fingerprint files
2011-04-14 Rajarshi Guha
* R/bitspec.R (shannon): Added a method to evaluate the Shannon
entropy for a list of fingerprints. Also added a man page
2010-11-07 Rajarshi Guha
* R/read.R: Updaetd the CDK line parser to extract the
molecule id and return it so that the fingerpint object
contains the molecule id
2010-10-20 Rajarshi Guha
* R/read.R (fp.read): Updated to support reading of feature
fingerprints. The user must now indicate whether a binary or a
feature fingerprint is being read. Also added a new line parsing
function to process generic feature fingerprints.
* R/misc.R: Added similarity metrics for feature
fingerprints. Currently tanimoto, dice and robust metrics are
supported.
* R/featurefp.R: Added a class to support fingerprints that are
represented as lists of numeric or string features such as
circular fingerprints. Also added a method to convert a collection
of feature fingerprints to a fixed-length binary string
representation.
* man/featvec.Rd: Added man page for te featvec class, used to
represent fingerprints characterized as numeric or character
features
2010-06-02 Rajarshi Guha
* R/balance.R (balance): Added a method to generate balanced fingerprints, which have
50% bit density, but are 2x the size of the input fingerprints.
* R/bitimp.R (bit.importance): Added a function to evaluate the importance of each bit
in a binary fingerprint in terms of the Kullback Liebler divergence between a set of
actives and a background collection.
fingerprint/man/ 0000755 0001762 0000144 00000000000 12224277277 013370 5 ustar ligges users fingerprint/man/vec.Rd 0000644 0001762 0000144 00000001574 11447374434 014442 0 ustar ligges users \name{euc.vector}
\alias{euc.vector}
\title{
Euclidean Representation of Binary Fingerprints
}
\description{
Ordinarily, a binary fingerprint can be considered to represent a
corner of a nD hypercube. However in many cases using such a representation
can lead to a very sparse space. Consequently one approach is to convert
the fingerprint so that it represents points on a nD unit hypersphere.
The resultant fingerprint is then a nD coordinate.
}
\usage{
euc.vector(fp)
}
\arguments{
\item{fp}{
An object of class \code{fingerprint}.
}
}
\value{
A numeric of length equal to the bit length of the fingerprint. The
result corresponds to a unit vector for a point
on the nD hypersphere
}
\examples{
# make a fingerprint vector
fp <- new("fingerprint", nbit=8, bits=c(1,3,4,5,7))
vec <- euc.vector(fp)
}
\keyword{logic}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/show.Rd 0000644 0001762 0000144 00000001021 12224147737 014626 0 ustar ligges users \name{show}
\alias{show,fingerprint-method}
\alias{show,featvec-method}
\alias{show,feature-method}
\title{
String Representation of a Fingerprint or Feature
}
\description{
Simply summarize the fingerprint or feature
}
\usage{
\S4method{show}{fingerprint}(object)
\S4method{show}{featvec}(object)
\S4method{show}{feature}(object)
}
\arguments{
\item{object}{
An object of class \code{fingerprint}, \code{featvec} or \code{feature}
}
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\keyword{logic}
fingerprint/man/linefunc.Rd 0000644 0001762 0000144 00000004074 11721013501 015442 0 ustar ligges users \name{cdk.lf, moe.lf, bci.lf}
\alias{cdk.lf}
\alias{moe.lf}
\alias{bci.lf}
\alias{ecfp.lf}
\alias{fps.lf}
\alias{jchem.binary.lf}
\title{
Functions to parse lines from fingerprint files
}
\description{
These functions take a single line and parses it to produce
a vector of integers which represents the position of the 'on' bits in
a fingerprint. This allows the user to use \code{read.fp} with arbitrary fingerprint
files. A new file format can be handled by defining a new line parser function.
Currently the first three functions process fingerprint files obtained from the
CDK (\url{http://cdk.sourceforge.net}), MOE (\url{http://chemcomp.com}), BCI
(\url{http://www.digitalchemistry.co.uk/}) and the FPS format
(\url{http://code.google.com/p/chem-fingerprints/wiki/FPS}). The last function can be used
for any fingerprint that generates hashed features (such as ECFPs or other
circular fingerprints). For these cases, it is assumed that features are unsigned
integers, so string features are not handled.
Note that when the \code{fps.lf} function is specified, items such as the number of bits
or the header flag do not need to be specified, as the format requires a header block
containing some of these items.
}
\usage{
cdk.lf(line)
moe.lf(line)
bci.lf(line)
ecfp.lf(line)
fps.lf(line)
jchem.binary.lf(line)
}
\arguments{
\item{line}{
The line to parse
}
}
\value{
A list with three componenents - the name associated with the fingerprint (if available)
and a vector of integers representing bits set to 1 (for the case of the first three
methods) or a vector of characters representing hashed features (characteristic of
circular fingerprints) or more generally, any string feature. The third component is a
(possibly empty) list, which contains the remaining components of a line, when the format
allows items other than an a title and the fingerprint (such as the FPS format). The content
of the third component is dependent on the line function that is being used.
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\keyword{logic}
fingerprint/man/distance-methods.Rd 0000644 0001762 0000144 00000011571 12020462607 017102 0 ustar ligges users \name{distance-methods}
\docType{methods}
\alias{distance}
\alias{distance-methods}
\alias{distance,featvec,featvec,character,missing,missing-method}
\alias{distance,featvec,featvec,missing,missing,missing-method}
\alias{distance,fingerprint,fingerprint,character,missing,missing-method}
\alias{distance,fingerprint,fingerprint,character,numeric,numeric-method}
\alias{distance,fingerprint,fingerprint,missing,missing,missing-method}
\title{Calculates the Similarity or Dissimilarity Between Two Fingerprints}
\description{
A number of distance metrics can be calculated for binary
fingerprints. Some of these are actually similarity metrics and
thus represent the reverse of a distance metric.
The following are distance (dissimilarity) metrics
\itemize{
\item Hamming
\item Mean Hamming
\item Soergel
\item Pattern Difference
\item Variance
\item Size
\item Shape
}
The following metrics are similarity metrics and so the distance can
be obtained by subtracting the value fom 1.0
\itemize{
\item Tanimoto
\item Dice
\item Modified Tanimoto
\item Simple
\item Jaccard
\item Russel-Rao
\item Rodgers Tanimoto
\item Cosine
\item Achiai
\item Carbo
\item Baroniurbanibuser
\item Kulczynski2
\item Robust
}
Finally the method also provides a set of composite and asymmetric
distance metrics
\itemize{
\item Hamann
\item Yule
\item Pearson
\item Dispersion
\item McConnaughey
\item Stiles
\item Simpson
\item Petke
\item Tversky
}
The default metric is the Tanimoto coefficient.
}
\section{Methods}{
\describe{
\item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character", a = "missing", b = "missing")}}{
Similarity method for feature vector type fingerprints, supporting \code{tanimoto}, \code{robust} and \code{dice}
metrics.
}
\item{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing", a = "missing", b = "missing")}}{
Evaluate Tanimoto similarity between two feature vector fingerprints
}
\item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}}{
Evaluate similarity (or dissimilrity) between two binary fingerprints. See below for a list of possible
similarity (or dissimilarity) metrics
}
\item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "numeric", b = "numeric")}}{
Evaluate Tversky similarity between two binary fingerprints.
}
\item{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}}{
Evaluate Tanimoto similarity between two binary fingerprints
}
}}
\usage{
distance(fp1, fp2, method, a, b)
}
\arguments{
\item{fp1}{
An object of class \code{fingerprint} or \code{featvec}
}
\item{fp2}{
An object of class \code{fingerprint} or \code{featvec}
}
\item{a}{Parameter for the Tversky index}
\item{b}{Parameter for the Tversky index}
\item{method}{
The type of distance metric desired. Partial matching is
supported and the deault is \code{tanimoto}. Alternative values are
\itemize{
\item \code{euclidean}
\item \code{hamming}
\item \code{meanHamming}
\item \code{soergel}
\item \code{patternDifference}
\item \code{variance}
\item \code{size}
\item \code{shape}
\item \code{jaccard}
\item \code{dice}
\item \code{mt}
\item \code{simple}
\item \code{russelrao}
\item \code{rodgerstanimoto}
\item \code{cosine}
\item \code{achiai}
\item \code{carbo}
\item \code{baroniurbanibuser}
\item \code{kulczynski2}
\item \code{robust}
\item \code{hamann}
\item \code{yule}
\item \code{pearson}
\item \code{mcconnaughey}
\item \code{stiles}
\item \code{simpson}
\item \code{petke}
\item \code{tversky}
}
If the two fingerprints are of class \code{featvec} then the following methods
may be specified: \code{tanimoto}, \code{robust} and \code{dice}.
}
}
\value{
Numeric value representing the distance in the specified metric between the
supplied fingerprint objects
}
\examples{
# make a 2 fingerprint vectors
fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6))
fp2 <- new("fingerprint", nbit=6, bits=c(1,2,5,6))
# calculate the tanimoto coefficient
distance(fp1,fp2) # should be 1
# Invert the second fingerprint
fp3 <- !fp2
distance(fp1,fp3) # should be 0
}
\references{Fligner, M.A.; Verducci, J.S.; Blower, P.E.;
A Modification of the Jaccard-Tanimoto Similarity Index for
Diverse Selection of Chemical Compounds Using Binary Strings,
\emph{Technometrics}, 2002, \emph{44}(2), 110-119
Monve, V.; Introduction to Similarity Searching in
Chemistry, \emph{MATCH - Comm. Math. Comp. Chem.}, 2004, \emph{51}, 7-38
}
\keyword{logic}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/shannon.Rd 0000644 0001762 0000144 00000001427 11551601031 015304 0 ustar ligges users \name{shannon}
\alias{shannon}
\alias{entropy}
\title{
Evaluate Shannon Entropy for a Set of Fingerprints
}
\description{
This method evaluates the Shannon entropy for a set of fingerprints
and utilizes the \code{\link{bit.spectrum}} method to obtain the relative
frequencies of individual bits
}
\usage{
shannon(fplist)
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint}. These will can be constructed by hand or
read from disk via \code{\link{fp.read}}.
All fingerprints in the list should be of the same length.
}
}
\value{
The Shannon entropy for the set of fingerprints
}
\seealso{
\code{\link{bit.spectrum}}, \code{\link{fp.read}}
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/count-methods.Rd 0000644 0001762 0000144 00000002260 12224277233 016440 0 ustar ligges users \name{count-methods}
\docType{methods}
\alias{count-methods}
\alias{count}
\alias{count,feature-method}
\alias{count<--methods}
\alias{count<-}
\alias{count<-,feature,numeric-method}
\title{Get or Set Count of Occurence of a Feature}
\description{
Get or set the count of occurence associated with a
\code{\link{feature-class}} object. The default value for the getter
(as defined in the prototype) is 1.
}
\section{Methods}{
\describe{
\item{\code{signature(object = "feature")}}{Return the count associated with the
feature object}
\item{\code{signature(x = "feature", value = "numeric")}}{Set the count associated with the
feature object}
}
}
\usage{
\S4method{count}{feature}(object)
\S4method{count}{feature,numeric}(x) <- value
}
\arguments{
\item{object}{
An object of class \code{\link{feature-class}}
}
\item{x}{
An object of class \code{\link{feature-class}}
}
\item{value}{
A numeric (which will be coerced to \code{integer}) indicating the
count associated with the feature
}
}
\value{
An integer representing count of occurence of the feature
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/mat.Rd 0000644 0001762 0000144 00000002416 11447374434 014442 0 ustar ligges users \name{fp.to.matrix}
\alias{fp.to.matrix}
\title{
Converts a List of Fingerprints to a Matrix
}
\description{
In general, fingerprint data is read from a file or obtained via
calls to an external generator and the return value is a list of fingerprints.
This function takes the list and returns a matrix having number of rows equal to
the number of fingerprints and the number of columns equal to the length of
the fingerprint. Each element is 1 or 0 (1's being specified by the positions
in each fingerprint vector)
}
\usage{
fp.to.matrix(fplist)
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint}. These will can be constructed by hand or
read from disk via \code{\link{fp.read}}
}
}
\value{
A matrix with dimensions equal to \code{length(fplist), bit length)}
where bit length is a property of the fingerprint objects in the list.
}
\seealso{
\code{\link{distance}}, \code{\link{fp.read}}
}
\examples{
# make fingerprint objects
fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6))
fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6))
fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6))
fp.to.matrix( list(fp1,fp2,fp3) )
}
\keyword{logic}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/length.Rd 0000644 0001762 0000144 00000000772 11447374434 015145 0 ustar ligges users \name{length}
\alias{length}
\alias{length,fingerprint-method}
\title{
Fingerprint Bit Length
}
\description{
Returns the length of the fingerprint. That is, this is the length of
the entire bit string and not simply the number of bits that are on.
}
\usage{
\S4method{length}{fingerprint}(x)
}
\arguments{
\item{x}{
An object of class \code{fingerprint}
}
}
\value{
The length of the bit string
}
\keyword{logic}
\keyword{methods}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/string.Rd 0000644 0001762 0000144 00000001667 12224147550 015165 0 ustar ligges users \name{as.character}
\alias{as.character}
\alias{as.character,fingerprint-method}
\alias{as.character,featvec-method}
\alias{as.character,feature-method}
\title{
Generates a String Representation of a Fingerprint
}
\description{
The function returns a string of 1's and 0's or a character vector of
features depending on the nature of the fingerprint supplied.
}
\usage{
\S4method{as.character}{fingerprint}(x)
\S4method{as.character}{featvec}(x)
\S4method{as.character}{feature}(x)
}
\arguments{
\item{x}{
An object of class \code{fingerprint}, \code{featvec} or \code{feature}
}
}
\value{
A string of 1's and 0's or else a character vector of features (with their counts)
}
\examples{
# make a fingerprint vector
fp <- new("fingerprint", nbit=32, bits=sample(1:32, 20))
# print out the string representation
as.character(fp)
}
\keyword{logic}
\keyword{methods}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/facmat.Rd 0000644 0001762 0000144 00000001665 11447374434 015121 0 ustar ligges users \name{fp.factor.matrix}
\alias{fp.factor.matrix}
\title{
Converts a List of Fingerprints to a data.frame of Factors
}
\description{
This function will convert a \code{list} of fingerprint objects
to a \code{data.frame} of factors with levels 1 and 0.
}
\usage{
fp.factor.matrix(fplist)
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint}. These will can be constructed by hand or
read from disk via \code{\link{fp.read}}
}
}
\value{
A matrix with dimensions equal to \code{(length(fplist), length(fplist))}
}
\seealso{
\code{\link{distance}}, \code{\link{fp.read}}
}
\examples{
# make fingerprint objects
fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6))
fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6))
fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6))
fp.factor.matrix( list(fp1,fp2,fp3) )
}
\keyword{logic}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/c.Rd 0000644 0001762 0000144 00000000760 12224151362 014066 0 ustar ligges users \name{c}
\alias{c,feature-method}
\title{
Combine Multiple Features to Give a List of Features
}
\description{
Combine multiple \code{feature} objects to give a list of feature objects
}
\usage{
\S4method{c}{feature}(x, ..., recursive = FALSE)
}
\arguments{
\item{x}{
An object of class \code{feature}
}
\item{...}{
One or more \code{feature} objects
}
\item{recursive}{ Ignored }
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\keyword{logic}
fingerprint/man/feature-methods.Rd 0000644 0001762 0000144 00000002275 12224277277 016761 0 ustar ligges users \name{feature-methods}
\docType{methods}
\alias{feature}
\alias{feature-methods}
\alias{feature,feature-method}
\alias{feature<--methods}
\alias{feature<-}
\alias{feature<-,feature,character-method}
\title{Get or Set the Character String Representing the Feature}
\description{
Get or set the character string representing a feature of a
\code{\link{feature-class}} object. The default value for the getter
(as defined in the prototype) is the empty string.
}
\section{Methods}{
\describe{
\item{\code{signature(object = "feature")}}{Return the feature associated with the
feature object}
\item{\code{signature(x = "feature", value = "character")}}{Set the feature associated with the
feature object}
}
}
\usage{
\S4method{feature}{feature}(object)
\S4method{feature}{feature,character}(x) <- value
}
\arguments{
\item{object}{
An object of class \code{\link{feature-class}}
}
\item{x}{
An object of class \code{\link{feature-class}}
}
\item{value}{
The character string to replace the current feature string with
}
}
\value{
An character string representing the feature
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/fplogical.Rd 0000644 0001762 0000144 00000001470 11447374434 015620 0 ustar ligges users \name{fplogical}
\alias{!}
\alias{|}
\alias{&}
\alias{xor}
\alias{|,fingerprint,fingerprint-method}
\alias{&,fingerprint,fingerprint-method}
\alias{xor,fingerprint,fingerprint-method}
\alias{!,fingerprint-method}
\title{
Logical Operators for Fingerprints
}
\description{
These functions perform logical operatiosn (AND, OR, NOT, XOR) on the supplied
binary fingerprints. Thus for two fingerprints A and B we have
\describe{
\item{\code{&}}{Logical AND}
\item{\code{|}}{Logical OR}
\item{\code{xor}}{Logical XOR}
\item{\code{!}}{Logical NOT (negation)}
}
}
\arguments{
\item{e1}{
An object of class \code{fingerprint}
}
\item{e2}{
An object of class \code{fingerprint}
}
}
\value{
A fingerprint object
}
\keyword{logic}
\keyword{methods}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/bitspec.Rd 0000644 0001762 0000144 00000003070 11551601023 015266 0 ustar ligges users \name{bit.spectrum}
\alias{bit.spectrum}
\title{
Generate a Bit Spectrum from a List of Fingerprints
}
\description{
The idea of comparing datasets using fingerprints was described in
Guha \& Schurer (2008). The idea is that one can summarize the dataset
by counting the frequency of occurrence of each bit position. The
frequency is normalized by the number of fingerprints considered. Thus a
collection of N fingerprints can be converted to a single vector of
numbers highlighting the most frequent bits with respect to a given
dataset. A plot of this vector looks like a traditional spectrum and
hence the name.
The bit spectra for two datasets (assuming that the same types of
fingerprints have been used) allows one to compare the similarity of
the datasets, without having to do a full pairwise similarity
calculation. The difference between the structural features of the
datasets can be quantified by evaluating the distance between the two
bit spectra.
}
\usage{
bit.spectrum(fplist)
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint}. These will can be constructed by hand or
read from disk via \code{\link{fp.read}}.
All fingerprints in the list should be of the same length.
}
}
\value{
A numeric vector of length equal to the size of the fingerprints.
}
\seealso{
\code{\link{distance}}, \code{\link{fp.read}}
}
\references{
Guha, R.; Schurer, S.; \emph{J. Comp. Aid. Molec. Des.}, \bold{2008},
\emph{22}, 367-384.
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/feature.Rd 0000644 0001762 0000144 00000002344 12224270024 015274 0 ustar ligges users \name{feature-class}
\docType{class}
\alias{feature-class}
\title{Class "feature"}
\description{This class represents features - arbitrary alphanumeric sequences
that are used to characterize molecular substructures (though there is no real
restriction to molecules). A feature is associated with an integer count,
indicating the occurence of that feature in a molecule. The default value is 1.
}
\section{Objects from the Class}{
Objects can be created by calls of the form \code{new("feature", ...)}.
}
\section{Slots}{
\describe{
\item{\code{feature}:}{Object of class \code{"character"} ~ The string representation of a feature }
\item{\code{count}:}{Object of class \code{"integer"} ~ The occurence of the feature. Default is 1}
\item{\code{.Data}:}{???}
}
}
\section{Methods}{
\describe{
\item{count}{\code{signature(object = "feature")}: Return the count associated with the
feature}
}
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\seealso{
\code{\link{featvec-class}}
}
\examples{
## create a new feature
f <- new("feature", feature='ABCD', count=as.integer(1))
## modify the feature string and the count
feature(f) <- 'UXYZ'
count(f) <- 10
}
\keyword{classes}
\keyword{logic} fingerprint/man/read.Rd 0000644 0001762 0000144 00000003770 11571331512 014564 0 ustar ligges users \name{fp.read, fp.read.to.matrix}
\alias{fp.read}
\alias{fp.read.to.matrix}
\title{
Functions to Read Fingerprints From Files
}
\description{
\code{fp.read} reads in a set of fingerprints from a file. Fingerprint
output from the CDK, MOE and BCI can be handled.
Each fingerprint is represented as a \code{fingerprint} object.
\code{fp.read} returns a \code{list} structure, each element being a
\code{fingerprint} or \code{nfeatvec} object, depending on the value
of the \code{binary} argument.
\code{fp.read.to.matrix} is a utility function that reads the fingerprints directly to
matrix form (columns are the bit positions and the rows are the objects whose fingerprints
have been evaluated). Note that this method does not currently work with feature vector
fingerprints.
}
\usage{
fp.read(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE, binary=TRUE)
fp.read.to.matrix(f='fingerprint.txt', size=1024, lf=cdk.lf, header=FALSE)
}
\arguments{
\item{f}{
File containing the fingperprints
}
\item{size}{
The bit length of the fingerprints being considered
}
\item{lf}{
A line reading function that parses a single line from
a fingerprint file. A number of functions are provided
that parse the fingerprints from the output of the CDK, MOE and the
BCI toolkit. In addition, support is now available for the FPS format
from the chemfp project (\url{http://code.google.com/p/chem-fingerprints}).
}
\item{header}{
Indicates whether the first line of the fingerprint file is
a header line
}
\item{binary}{
If \code{TRUE} indicates that a binary fingerprint will be read in. Otherwise indicates
that a feature vector style fingerprint (such as from a circular fingerprint) is being
read in
}
}
\seealso{
\code{\link{cdk.lf}},
\code{\link{moe.lf}},
\code{\link{bci.lf}},
\code{\link{ecfp.lf}},
\code{\link{fps.lf}}
}
\value{
A \code{list} or \code{matrix} of fingerprints
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\keyword{logic}
fingerprint/man/fold.Rd 0000644 0001762 0000144 00000001706 11447374434 014606 0 ustar ligges users
\name{fold}
\alias{fold}
\title{
Fold a fingerprint
}
\description{
In many situations a fingerprint is generated using a large length (such as 1024 bits or more).
As a result of this, the fingerprints for a dataset can be very sparse. One approach to increasing
bit density of such fingerprints is to fold them. This is performed by dividing the original
fingerprint bitstring into two substrings of equal length and then perform an OR on
the two substrings.
It should be noted that many fingerprint generating routines will perform this internally.
}
\usage{
fold(fp)
}
\arguments{
\item{fp}{
The fingerprint to fold. Should be of class \code{fingerprint}.
}
}
\value{
An object of class \code{fingerprint} representing the folded fingerprint.
}
\examples{
# make a fingerprint vector
fp <- new("fingerprint", nbit=64, bits=sample(1:64, 30))
fold(fp)
}
\keyword{logic}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/man/bitimp.Rd 0000644 0001762 0000144 00000002212 11447374434 015137 0 ustar ligges users \name{bit.importance}
\alias{bit.importance}
\title{
Evaluate the Discriminatory Power of Individual Bits in a Binary Fingerprint
}
\description{
This method evaluates the Kullback-Leibler (KL) divergence to rank the
individual bits in a binary fingerprint in their ability to discriminate
between database and active compounds. This method is implemented based on
Nisius and Bajorath and includes an m-estimate correction.
}
\usage{
bit.importance(actives, background)
}
\arguments{
\item{actives}{A list of fingerprints for the actives}
\item{background}{A list of fingerprints representing the background collection}
}
\value{
A numeric vector of length equal to the size of the fingerprints. Each element
of the vector is the KL divergence for the corresponding bit. If a bit position
is never set to 1 in any of the compounds from the actives and the background, then
the KL divergence for that position is undefined and \code{NA} is returned.
}
\seealso{
\code{\link{bit.spectrum}}
}
\references{
Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010},
\emph{5}, 859-868.
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/featvec.Rd 0000644 0001762 0000144 00000004454 12224262341 015265 0 ustar ligges users \name{featvec-class}
\docType{class}
\alias{featvec-class}
\alias{distance,featvec,featvec,missing-method}
\alias{distance,featvec,featvec,character-method}
\alias{length,featvec-method}
\title{Class "featvec"}
\description{This class represents feature vector style fingerprints, where, rather than
a bit string, the fingerprint is represented as a sequence of (signed) integers or strings.
Each element of the collection is a representation of a structural feature. For cases where the
features are integers, this usually corresponds to a hash of the original feature string.
}
\section{Objects from the Class}{
Objects can be created by calls of the form \code{new("featvec", ...)}.
In contrast to traditional binary fingerprints, operations on feature vectors
are slightly different and essentially correspond to operations on sets. Thus
the logical and (&) would correspond to the union of the two feature vectors.
}
\section{Slots}{
\describe{
\item{\code{features}:}{Object of class \code{"character"} ~~ A vector
containing the numeric or character features. Numeric features are treated
as character strings }
\item{\code{provider}:}{Object of class \code{"character"} ~~
Indicates the source of the fingerprint. Can be useful to keep
track of what software generated the fingerprint.}
\item{\code{name}:}{Object of class \code{"character"} ~~
The name associated with the fingerprint. If not name is available
this gets set to an empty string}
\item{\code{misc}:}{A list to hold arbitrary items associated with a fingerprint (such as
extra fields from a fingerprint file)}
}
}
\section{Methods}{
\describe{
\item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "missing")}: ... }
\item{distance}{\code{signature(fp1 = "featvec", fp2 = "featvec", method = "character")}: ... }
\item{as.character}{\code{signature(fp = "featvec")}: ... }
\item{length}{\code{signature(fp = "featvec")}: ... }
\item{show}{\code{signature(fp = "featvec")}: ... }
}
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\seealso{
\code{\link{fp.read}}, \code{\link{fp.read.to.matrix}}
\code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}},
\code{\link{fp.factor.matrix}}
\code{\link{random.fingerprint}}
}
\keyword{classes}
\keyword{logic} fingerprint/man/balance.Rd 0000644 0001762 0000144 00000002346 11447374434 015250 0 ustar ligges users \name{balance}
\alias{balance}
\title{
Generate a Balanced Code Fingerprint
}
\description{
It has been noted that the bit density in a fingerprint can affect its ability to
retrieve similar compounds from a database primarily due to complexity effects. One
approach to alleviating these effects is to generate fingerprints that have a bit
density of 50% (i.e., half the bits are set to 1). This method implements the
balanced code approach described by Nisius and Bajorath to convert an ordinary
binary fingerprint (whose bit density is not 50%) to one that has a bit density of
50%. This is acheived by appending the complement of the input fingerprint to itself
(resulting in a fingerprint twice the size of the original).
}
\usage{
balance(fplist)
}
\arguments{
\item{fplist}{A single fingerprint or a list of fingerprints}
}
\value{
A single fingerprint objects or list of fingerprint objects that are "balanced",
in that they have a bit density of 50%. Their size is 2x the size of the input
fingerprints.
}
\seealso{
\code{\link{bit.spectrum}}, \code{\link{bit.importance}}
}
\references{
Nisius, B.; Bajorath, J.; \emph{ChemMedChem}, \bold{2010},
\emph{5}, 859-868.
}
\keyword{programming}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/fingerprint.Rd 0000644 0001762 0000144 00000006276 12020461306 016177 0 ustar ligges users \name{fingerprint-class}
\docType{class}
\alias{fingerprint-class}
\alias{euc.vector,fingerprint-method}
\alias{fold,fingerprint-method}
\alias{random.fingerprint,numeric,numeric-method}
\title{Class "fingerpint"}
\description{This class represents binary fingerprints, usually
generated by a variety of cheminformatics software, but not
restricted to such
}
\section{Objects from the Class}{
Objects can be created by calls of the form \code{new("fingerprint", ...)}.
Fingerprints can traditionally thought of as a vector of 1's and
0's. However for large fingerprints this is inefficient and
instead we simply store the positions of the bits that are
on. Certain operations also need to know the length of the
original bit string and this length is stored in the object at
construction. Even though we store extra information along with
the bit positions, conceptually we still consider the objects as
simple bit strings. Thus the usual bitwise logical operations
(&, |, !, xor) can be applied to objects of this class.
}
\section{Slots}{
\describe{
\item{\code{bits}:}{Object of class \code{"numeric"} ~~ A vector
indicating the bit positions that are on. }
\item{\code{nbit}:}{Object of class \code{"numeric"} ~~ Indicates the length of the original bit string.}
\item{\code{folded}:}{Object of class \code{"logical"} ~~ Indicates
whether the fingerprint has been folded.}
\item{\code{provider}:}{Object of class \code{"character"} ~~
Indicates the source of the fingerprint. Can be useful to keep
track of what software generated the fingerprint.}
\item{\code{name}:}{Object of class \code{"character"} ~~
The name associated with the fingerprint. If not name is available
this gets set to an empty string}
\item{\code{misc}:}{Object of class \code{"list"} ~~
A holder for arbitrary items that may have been stored along with the fingerprint. Only
certain formats allow extra items to be stored with the fingerprint, so in many cases
this field is just an empty list}
}
}
\section{Methods}{
\describe{
\item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "missing", a = "missing", b = "missing")}: ... }
\item{distance}{\code{signature(fp1 = "fingerprint", fp2 = "fingerprint", method = "character", a = "missing", b = "missing")}: ... }
\item{euc.vector}{\code{signature(fp = "fingerprint")}: ... }
\item{fold}{\code{signature(fp = "fingerprint")}: ... }
\item{random.fingerprint}{\code{signature(nbit = "numeric", on = "numeric")}: ... }
}
}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
\seealso{
\code{\link{fp.read}}, \code{\link{fp.read.to.matrix}}
\code{\link{fp.sim.matrix}}, \code{\link{fp.to.matrix}},
\code{\link{fp.factor.matrix}}
\code{\link{random.fingerprint}}
}
\examples{
## make fingerprints
x <- new("fingerprint", nbit=128, bits=sample(1:128, 100))
y <- x
distance(x,y) # should be 1
x <- new("fingerprint", nbit=128, bits=sample(1:128, 100))
distance(x,y)
folded <- fold(x)
## binary operations on fingerprints
x <- new("fingerprint", nbit=8, bits=c(1,2,3,6,8))
y <- new("fingerprint", nbit=8, bits=c(1,2,4,5,7,8))
x & y
x | y
!x
}
\keyword{classes}
\keyword{logic} fingerprint/man/sim.Rd 0000644 0001762 0000144 00000003635 11613543531 014444 0 ustar ligges users \name{fp.sim.matrix}
\alias{fp.sim.matrix}
\title{
Calculates a Similarity Matrix for a Set of Fingerprints
}
\description{
Given a set of fingerprints, a pairwise similarity can be calculated using the
various distance metrics defined for binary strings. This function calculates
the pairwise similarity matrix for a set of \code{fingerprint} or
\code{featvec} objects supplied in a \code{list}
structure. Any of the distance metrics provided by \code{\link{distance}} can be used and the
default is the Tanimoto metric.
Note that if the the Euclidean distance is specified then the resultant matrix is a
distance matrix and not a similarity matrix
}
\usage{
fp.sim.matrix(fplist, fplist2=NULL, method='tanimoto')
}
\arguments{
\item{fplist}{
A list structure with each element being an object of class
\code{fingerprint} or \code{featvec}. These can be constructed by hand or
read from disk via \code{\link{fp.read}}
}
\item{fplist2}{A list structure with each element being an object of class
\code{fingerprint} or \code{featvec}. if \code{NULL} then traditional pairwise
similarity is calculated with each member in \code{fplist}, otherwise the
resultant N x M matrix is derived from the similarity between each member of
\code{fplist} and \code{fplist2}}
\item{method}{
The type of distance metric to use. The default is \code{tanimoto}. Partial
matching is supported.
}
}
\value{
A matrix with dimensions equal to \code{(length(fplist), length(fplist))} if
\code{fplist2} is NULL, otherwise \code{(length(fplist), length(fplist2))}
}
\seealso{
\code{\link{distance}}, \code{\link{fp.read}}
}
\examples{
# make fingerprint objects
fp1 <- new("fingerprint", nbit=6, bits=c(1,2,5,6))
fp2 <- new("fingerprint", nbit=6, bits=c(1,4,5,6))
fp3 <- new("fingerprint", nbit=6, bits=c(2,3,4,5,6))
fp.sim.matrix( list(fp1,fp2,fp3) )
}
\keyword{logic}
\author{Rajarshi Guha \email{rajarshi.guha@gmail.com}}
fingerprint/man/rndfp.Rd 0000644 0001762 0000144 00000001413 11447374434 014766 0 ustar ligges users \name{random.fingerprint}
\alias{random.fingerprint}
\title{
Generate Randomized Fingerprints
}
\description{
A utility function that can be used to generate binary fingerprints
of a specified length with a specifed number of bit positions
(selected randomly) set to 1. Currently bit positions are selected uniformly
}
\usage{
random.fingerprint(nbit,on)
}
\arguments{
\item{nbit}{
The length of the fingerprint, that is, the total number of bits.
Must be a positive integer.
}
\item{on}{
How many positions should be set to 1
}
}
\value{
An object of class \code{fingerprint}
}
\examples{
# make a fingerprint vector
fp <- random.fingerprint(32, 16)
as.character(fp)
}
\keyword{logic}
\author{Rajarshi Guha \email{rguha@indiana.edu}}
fingerprint/INDEX 0000644 0001762 0000144 00000002704 11457725752 013415 0 ustar ligges users ! Logical Operators for Fingerprints
as.character Generates a String Representation of a
Fingerprint
balance Generate a Balanced Code Fingerprint
bit.importance Evaluate the Discriminatory Power of Individual
Bits in a Binary Fingerprint
bit.spectrum Generate a Bit Spectrum from a List of
Fingerprints
cdk.lf Functions to parse lines from fingerprint files
distance Calculates the Similarity or Dissimilarity
Between Two Fingerprints
euc.vector Euclidean Representation of Binary Fingerprints
featvec-class Class "featvec"
featvec.to.binaryfp Convert a Set of Feature Fingerprints to Binary
Fingerprints
fingerprint-class Class "fingerpint"
fold Fold a fingerprint
fp.factor.matrix Converts a List of Fingerprints to a data.frame
of Factors
fp.read Functions to Read Fingerprints From Files
fp.sim.matrix Calculates a Similarity Matrix for a Set of
Fingerprints
fp.to.matrix Converts a List of Fingerprints to a Matrix
length Fingerprint Bit Length
random.fingerprint Generate Randomized Fingerprints
show,fingerprint-method
String Representation of a Fingerprint