kaos/0000755000176200001440000000000013543346477011227 5ustar liggesuserskaos/NAMESPACE0000644000176200001440000000044313543337633012441 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(cgr) export(cgr.plot) export(cgr.res) export(vectorize) import(ggplot2) import(reshape2) importFrom(grDevices,xy.coords) importFrom(graphics,plot.new) importFrom(graphics,plot.window) importFrom(graphics,points) importFrom(graphics,text) kaos/data/0000755000176200001440000000000013441446010012115 5ustar liggesuserskaos/data/HIV.rda0000644000176200001440000001051413437225174013247 0ustar liggesusers͙=%IFtX p1и1e+6!tΉU*3##~Ͽ>ӷ7˷w?LJï_7_l%sy.˨(cϯƞI?/y\ȿ%=Vmc46 .Tg-A`N` 4r_cVwCB v_kRhkt\%Y$M2 EPk| $pq7ȨE $r %][eQ%*O`AYR "3 qP2ZGgp3d<^MBě"Z4xdt4"e蚳#L}q#%S(J.tneKمa%玑!)Du'M#IaML([jQro`9ގ!(&reP*e\Q'6XRqN Wŕ*G ,77`P8)$nx. B͗X$nkfkMJB)kiAeKVKMJ7"i*LPciZ"31T\J6H[crɓǘjQcFO 3U(ĉ`8/Uq#kDܱr٤눢&UgN (7*&|d+-JR- ! =?:jT:>0a42na҄/H5J5)*R>-H\\ni6G!RP*H9"{(u4*!E\ezRb'+h]UkFZ\RsȏTL-}HHJYEdHHBK1b(P_",$RY#)BT8%w^-;*W;65B J$xKt,Q#84qYITcEdCr) Y3ms)7ARb*U RCThi5fdr.^59U+ BQM*d@T|4-Q8i6$!jȰt^MӶ% aX(5FٜfՇV]#)PszXUL2iW;ޛjDPB]n%WL[ R,Lr)o=#@./TM)F3G}xNRSJ6O_kq I4N!LL2*lB42U9 $kVQ*AQBTX=1jԦfoIRAs?[)=O$z i!!Ho4~رqtw.*'2LQd-:PӇd*$(Ufu,&qv|Pj(8ɼ%5TآLR#SI^I+L1".nZt\J@TC\sҰGTچ }). ;ُQu; \.{ȫ18rF?t1dW$\6~EcFש+PqzF QEd@SWIB*j P .ԛYz!؇`ɑT E@aOd ]mTCUIIyFE`(\sA5-YK RñwƒD)U@㠜yoPJ^IqP8-]!p 1Y*F4=;yJ}s5"E PlNSe;$PR`R-A԰c.l!lXj)jS$EID')Bu2Ԟ2W@Ah*<-Dʒ*hS¤b/T5~ ͷ*(Q#XOyM:JG'E4ROUFU MNA+%iT3nY37HZ~&#2nw10AM.ilՇD+!8EoG1F)򎌀jR=jHچ(|B@#2 L62ini"+i4?S2."jGF#gWo+% |5I`G0YZ\iEHl^!Ǟ (7z$z廿_8r>d?W;$ԫ֌r (eɏ~Tob"Hq{CѱUbH!"Ηh[љ@5CGM!xDxe??$^aSdWFYԻI ELw"mc\ʿPy_4)Kc߄JE:dRJ5ء cCuEDL =4[JҢrPv\q!(% s$4Kvd1GH|c|Ry$o!,CXDټ H8N OU1-!`ཥG$97u)*8 (+ ]B$F{ 1Ȋ!ߌ$ fE[Z*SXBK5RP[GHo #(C7@OEJTr#cAj4 SM}EW Eu)ꀍariEA(x(GE w7#?V0x1UK6V" I!w+Bʫ5MȨ͓HpX]17e7M4!D&F5H(٬ i6b4mhc$ %#Ԧ:S >vHLlNVC1j=FR!$Q*F R!FRyZШjlHt1}mq>f<緾Rֿ4qq.S 4'E3Xtq*jQ1ۤIA$ Z*0!5 JMH]q*a \p4;%6 ~JPEИ)aW'-d J*L2BAS I-r;w."T@. A #rIY+5D%({(s!Qq.Wc5ܪ1ή(RB@*%|uT6mLʢ3yн [q,pJ%Ih\EJ'(8EQdx1B!UFMZ`y/d6&׭}cP̈́ȔLkcR~4-Kj9ڥq%59$"2 bj$:\AASjxE}%&Ȍqˤ3(yR1RZϷ>l*E)OQ#*A[]pLArI!!Xre\(AՒ 𥼃ŒĀ{KG>$0!?LZ&5)deP24\'7@0Bkaos/man/0000755000176200001440000000000013522243413011761 5ustar liggesuserskaos/man/cgr.plot.Rd0000644000176200001440000000116613441464325014013 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cgr.plot.R \name{cgr.plot} \alias{cgr.plot} \title{Plot from a CGR object} \usage{ cgr.plot(data, mode, corners = F, labels = F) } \arguments{ \item{data}{CGR object} \item{mode}{character string \itemize{ \item "points": CGR plot \item "matrix": FCGR plot }} \item{corners}{if true, the corners are added as red dots} \item{labels}{if true, the symbol associated with the corner is added} } \description{ Plot from a CGR object } \details{ This function plots the chaos game reprasentation as points or as frequency matrix representation } kaos/man/vectorize.Rd0000644000176200001440000000054213441464325014272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vectorize.R \name{vectorize} \alias{vectorize} \title{Vectorizes the FCGR} \usage{ vectorize(data) } \arguments{ \item{data}{CGR object from cgr.R} } \value{ Vector with FCGR-encoding } \description{ Vectorizes the FCGR } \details{ This function returns the FCGR as vector. } kaos/man/cgr.res.Rd0000644000176200001440000000132313441464325013621 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cgr.res.R \name{cgr.res} \alias{cgr.res} \title{Produces a new matrix with new resolution} \usage{ cgr.res(data, res) } \arguments{ \item{data}{CGR object} \item{res}{new resolution} } \description{ Produces a new matrix with new resolution } \details{ This function adjust the resolution of the FCGR of a CGR object. \itemize{ \item matrix: frequency matrix with new given resolution \item x: x-coordinates for the CGR \item y: y-coordinates for the CGR \item sf: applied scaling factor for the CGR \item res: applied resolution to calculate the FCGR \item base.seq: chars or letters to build the edges of the CGR } } kaos/man/cgr.Rd0000644000176200001440000000402013522243413013017 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/cgr.R \name{cgr} \alias{cgr} \title{Chaos Game Representation Object} \usage{ cgr(data, seq.base = row.names(table(data)), sf = F, res = 100) } \arguments{ \item{data}{Input as list/vector of characters from fasta file or similar} \item{seq.base}{By default the included unique elements in data will be used in alphabetical order. It is also possible to define the alphabet explicitly.\cr Predefined alphabets can be used as well:\cr \itemize{ \item "digits": numbers from 0 to 9 \item "AMINO": alphabetical order of the amino acids in capital letters \item "amino": alphabetical order of the amino acids in lowercase letters \item "DNA": the four bases of DNA ("A","G","T","C") in capital letters \item "dna": the four bases of DNA ("a","g","t","c") in lowercase letters \item "LETTERS": The alphabetical order of capital letters from A to Z \item "letters": The alphabetical order of lowercase Letters from a to z }} \item{sf}{By default, the scaling factor for fractal polygons is used; the scaling factor can also explicitly set to values between 0 and 1.} \item{res}{resolution of the frequency matrix} } \value{ CGR object as list of: \itemize{ \item matrix: frequency matrix with given resolution \item x: x-coordinates for the CGR \item y: y-coordinates for the CGR \item sf: applied scaling factor for the CGR \item res: applied resolution to calculate the FCGR \item base.seq: chars or letters to build the edges of the CGR } } \description{ Chaos Game Representation Object } \details{ This function produces a chaos game representation (CGR) object from a sequence (data) } \examples{ ###HIV data data("HIV") ### encoding the sequence HIV.cgr = cgr(HIV, res = 100) ###plot the sequence cgr.plot(HIV.cgr, mode = "points") ###plot the FCGR cgr.plot(HIV.cgr, mode = "matrix") ###change the resolution of matrix from 100x100 to 200x200 cgr.res(HIV.cgr, 200) ### get the FCGR encoded vector vectorize(HIV.cgr) } kaos/man/HIV.Rd0000644000176200001440000000047413441464325012712 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/HIV.R \name{HIV} \alias{HIV} \title{Complete HIV 1 genome} \format{A vector containing the genome of HIV-1 as single characters} \source{ NC_001802.1 Human immunodeficiency virus 1, complete genome } \description{ Complete HIV 1 genome } kaos/DESCRIPTION0000644000176200001440000000114213543346477012733 0ustar liggesusersPackage: kaos Type: Package Title: Encoding of Sequences Based on Frequency Matrix Chaos Game Representation Version: 0.1.2 Author: Dominik Eger and Hannah Franziska Löchel Maintainer: Hannah Franziska Löchel Description: Sequences encoding by using the chaos game representation. Löchel et al. (2019) . License: GPL (>= 2) Encoding: UTF-8 LazyData: true RoxygenNote: 6.1.1 Imports: ggplot2, reshape2 NeedsCompilation: no Packaged: 2019-09-27 07:51:55 UTC; franzi Repository: CRAN Date/Publication: 2019-09-27 08:50:07 UTC kaos/R/0000755000176200001440000000000013522243410011404 5ustar liggesuserskaos/R/HIV.R0000644000176200001440000000030113441463016012155 0ustar liggesusers#' Complete HIV 1 genome #' #'@name HIV #' #' @format A vector containing the genome of HIV-1 as single characters #' #'@source NC_001802.1 Human immunodeficiency virus 1, complete genome NULL kaos/R/distr.pts.R0000644000176200001440000000057213441451232013470 0ustar liggesusers distr.pts = function(n, r, plot = F){ #get coordinates for a regular polygon x = vector("double", n) y = vector("double", n) for (i in 1:n){ x[i] = r*sinpi((2*i+1)/n) y[i] = r*cospi((2*i+1)/n) } #generates a plot if required if (plot) {plot(x, y, pch = 20)} #return coordinates return(xy.coords(x, y)) } kaos/R/cgr.plot.R0000644000176200001440000000425313441463014013267 0ustar liggesusers#'Plot from a CGR object #' #'@name cgr.plot #'@param data CGR object #'@param mode character string #'\itemize{ #' \item "points": CGR plot #' \item "matrix": FCGR plot #'} #'@param corners if true, the corners are added as red dots #'@param labels if true, the symbol associated with the corner is added #' #'@details This function plots the chaos game reprasentation as points or as #'frequency matrix representation #' #' #'@export #' #' @import ggplot2 #' @import reshape2 #' @importFrom grDevices xy.coords #' @importFrom graphics plot.new plot.window points text cgr.plot = function(data, mode, corners = F, labels = F) { if (mode == "matrix") { Var1<-Var2<-value<-x<-y<-NULL matrixplot = ggplot(melt(data$matrix), aes(x = Var1, y = Var2)) + geom_raster(aes(fill = value)) + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line=element_blank(), axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), axis.title.x=element_blank(), axis.title.y=element_blank(), legend.position="none")+ scale_fill_gradient(low = "white", high="black") + labs(x = "", y = "", title = "") if (corners){ matrixplot = matrixplot + geom_point(data = data.frame( x = (data$base$x + 0.5)*data$resolution,y = ( data$base$y + 0.5)*data$resolution), mapping = aes(x, y), colour = "red", shape = 20) } if (labels){ matrixplot = matrixplot + geom_text(data = data.frame(x = ( data$base$x*1.1 + 0.5)*data$resolution,y = ( data$base$y*1.1 + 0.5)*data$resolution), mapping = aes(x, y), label = rownames(data$base)) } matrixplot } else if (mode == "points") { r = 0.5 plot.new() plot.window(xlim = c(-1.1, 1.1), ylim = c(-1.1, 1.1)) points(data$y ~ data$x, pch = 46) if (corners){ points(data$base$y ~ data$base$x, col = "red", pch = 20, cex = 0.7) } if (labels){ text(x = data$base$x*1.1, y = data$base$y*1.1, labels = rownames( data$base)) } } } kaos/R/cgr.res.R0000644000176200001440000000214513441463023013100 0ustar liggesusers#'Produces a new matrix with new resolution #' #'@name cgr.res #'@param data CGR object #'@param res new resolution #' #'@details This function adjust the resolution of the FCGR of a CGR object. #' #@return CGR object as list of: #'\itemize{ #' \item matrix: frequency matrix with new given resolution #' \item x: x-coordinates for the CGR #' \item y: y-coordinates for the CGR #' \item sf: applied scaling factor for the CGR #' \item res: applied resolution to calculate the FCGR #' \item base.seq: chars or letters to build the edges of the CGR #'} #' #'@export #' #' #' cgr.res = function (data, res) { r = 1 A = matrix(data = 0, ncol = res, nrow = res) for (i in 1:length(data$x)) { x.matrix = ceiling((data$x[i]+r ) * res/(2*r)) y.matrix = ceiling((data$y[i]+r ) * res/(2*r)) A[x.matrix, y.matrix] = A[x.matrix, y.matrix] + 1 } #return matrix, coordinates, kissing number, resolution return(list(matrix = A, x = data$x, y = data$y, scaling_factor = data$sf, resolution = res, base = data$base)) } kaos/R/cgr.R0000644000176200001440000001104713522243355012315 0ustar liggesusers#'Chaos Game Representation Object #' #'@param data Input as list/vector of characters from fasta file or similar #'@param seq.base By default the included unique elements in data will be used #' in alphabetical order. It is also possible to define the alphabet #' explicitly.\cr #' Predefined alphabets can be used as well:\cr #'\itemize{ #' \item "digits": numbers from 0 to 9 #' \item "AMINO": alphabetical order of the amino acids in capital letters #' \item "amino": alphabetical order of the amino acids in lowercase letters #' \item "DNA": the four bases of DNA ("A","G","T","C") in capital letters #' \item "dna": the four bases of DNA ("a","g","t","c") in lowercase letters #' \item "LETTERS": The alphabetical order of capital letters from A to Z #' \item "letters": The alphabetical order of lowercase Letters from a to z #'} #'@param sf By default, the scaling factor for fractal polygons is used; the #' scaling factor can also explicitly set to values between 0 and 1. #'@param res resolution of the frequency matrix #' #'@details This function produces a chaos game representation (CGR) #' object from a sequence (data) #' #'@return CGR object as list of: #'\itemize{ #' \item matrix: frequency matrix with given resolution #' \item x: x-coordinates for the CGR #' \item y: y-coordinates for the CGR #' \item sf: applied scaling factor for the CGR #' \item res: applied resolution to calculate the FCGR #' \item base.seq: chars or letters to build the edges of the CGR #'} #' #'@export #' #'@examples #'###HIV data #'data("HIV") #' #'### encoding the sequence #'HIV.cgr = cgr(HIV, res = 100) #' #'###plot the sequence #'cgr.plot(HIV.cgr, mode = "points") #' #'###plot the FCGR #'cgr.plot(HIV.cgr, mode = "matrix") #' #'###change the resolution of matrix from 100x100 to 200x200 #'cgr.res(HIV.cgr, 200) #' #'### get the FCGR encoded vector #'vectorize(HIV.cgr) #' #' cgr = function(data, seq.base = row.names(table(data)), sf = F, res = 100) { r = 1 if(is.character(seq.base)&&length(seq.base)==1){ if(seq.base == "digits"){ seq.base =c(0:9) } else if(seq.base == "AMINO"){ seq.base=c("A","C","D","E","F","G","H","I","K","L","M","N","P","Q","R", "S","T","V","W","Y") } else if(seq.base == "amino"){ seq.base=c("a","c","d","e","f","g","h","i","k","l","m","n","p","q","r", "s","t","v","w","y") } else if (seq.base == "DNA"){ seq.base= c("A","G","T","C") } else if (seq.base == "dna"){ seq.base= c("a","g","t","c") } else if (seq.base == "LETTERS"){ seq.base=LETTERS } else if (seq.base == "letters"){ seq.base=letters } } #check for input errors stopifnot( length(seq.base) >= length(table(data)), all(row.names(table(data)) %in% seq.base), sf <= 1, sf >= 0, res >= 1) #get the number of bases base.num = length(seq.base) if(base.num==4){ x=c(1,-1,-1,1) y=c(-1,-1,1,1) base.coord = xy.coords(x, y) } #calculate corner coordinates for the base else{ base.coord = distr.pts(base.num, r) } #calculate the "scaling factor" depending on the number of bases. #the scaling factor is the ratio of the radius of the circle that #passes through the centers of the touching subpolygons to the #radius of a circle that circumscribes the parent polygon. if (!sf) {sf = 1- (sinpi(1/base.num)/ (sinpi(1 / base.num) + sinpi ( 1/base.num + 2 * (floor (base.num/4) /base.num))))} #data frame for easy access base = data.frame(x = base.coord$x, y = base.coord$y, row.names = seq.base) #get the length of data data.length = length(data) #cgr algorithm: #start at point (0,0) #1. check next character #2. go a fraction of the way to the corresponding base, according to the #scaling factor #3. save coordinates of the point #repeat x = vector("double", data.length) y = vector("double", data.length) A = matrix(data = 0, ncol = res, nrow = res) pt = vector("double", 2) for (i in 1:data.length) { pt = pt + (unlist(base[data[i],]) - pt) * sf x[i] = pt[1] y[i] = pt[2] x.matrix = ceiling((x[i]+r ) * res/(2*r)) y.matrix = ceiling((y[i]+r ) * res/(2*r)) A[x.matrix, y.matrix] = A[x.matrix, y.matrix] + 1 } #return matrix, coordinates, scaling factor, resolution return(list(matrix = A, x = x, y = y, scaling_factor = sf, resolution = res, base = base)) } kaos/R/vectorize.R0000644000176200001440000000040113441463012013536 0ustar liggesusers#'Vectorizes the FCGR #' #'@name vectorize #'@param data CGR object from cgr.R #' #'@details This function returns the FCGR as vector. #' #'@return Vector with FCGR-encoding #' #'@export #' #' #' vectorize = function (data) { as.vector(data$matrix) } kaos/R/Message.R0000644000176200001440000000031413543337327013127 0ustar liggesuserspackageStartupMessage("Please cite: Loechel HF, Eger D, Sperlea T, Heider D: Deep Learning on Chaos Game Representation for Proteins. Bioinformatics 2019, in press. .") kaos/MD50000644000176200001440000000135013543346477011536 0ustar liggesusers911f9e0bc85974dc6c27ba4ee3bf7c55 *DESCRIPTION e25f634d9f82faf7dfd3b60ce5a6f05b *NAMESPACE 8ab46ecd737fd83d74583ad3d64c5038 *R/HIV.R 19cdaf1a6ca7da83f36783802e0b2580 *R/Message.R 7da5d595042b5d9b51c8dce2e2aa2e8a *R/cgr.R 9518262f9d2784a4a1ce06897c707c4b *R/cgr.plot.R 2e26e29d733631af51a99af6f8f969f7 *R/cgr.res.R 70c721ddb446d340e98280c345bd0469 *R/distr.pts.R fb7d94db843f24c21b3ac9853e188abb *R/vectorize.R 283a3de0efc4f974bb10d2ee7c563843 *data/HIV.rda 1783c900bf02e5c1071340af99ba20bf *inst/CITATION 82ee44b05a81833f60e4752a6a48bda9 *man/HIV.Rd 7c61d984dc490fb6abf560b3b986c2f2 *man/cgr.Rd 2ecda6e1b838b775da256e666902e75e *man/cgr.plot.Rd 8777388e1f4ce05296dd399edf2cf8be *man/cgr.res.Rd 98ca36cdef74f324969abfd4f4fecc2e *man/vectorize.Rd kaos/inst/0000755000176200001440000000000013522255314012166 5ustar liggesuserskaos/inst/CITATION0000644000176200001440000000065613522263533013334 0ustar liggesusers bibentry(bibtype = "article", title = "Deep learning on chaos game representation for proteins", author =c(person("Hannah F", "Löchel"), person("Dominic", "Eger"), person("Theodor","Sperlea"), person("Dominik","Heider")), journal = "Bioinformatics", year = {2019}, month = {06}, doi = "10.1093/bioinformatics/btz493",)