sparsesvd/0000755000176200001440000000000013513113513012260 5ustar liggesuserssparsesvd/tests/0000755000176200001440000000000013512663660013436 5ustar liggesuserssparsesvd/tests/iris.R0000644000176200001440000000170013512706726014526 0ustar liggesusers## compute PCA of iris data set with svd() and sparsesvd() library(sparsesvd) library(Matrix) data(iris) M <- scale(as.matrix(iris[, 1:4]), scale=FALSE) Ms <- Matrix(M) # not sparse, but a dMatrix res1 <- svd(M) res2 <- sparsesvd(Ms) ## check that eigenvalues are the same print(res2$d, digits=3) stopifnot(all.equal(res1$d, res2$d, tolerance=1e-12)) ## these should be diagonal unit matrices UtU <- abs(crossprod(res2$u, res1$u)) # diagonal entries may be 1 or -1 VtV <- abs(crossprod(res2$v, res1$v)) # (because sign of eigenvectors is arbitrary) I1 <- diag(1, length(res1$d)) print(round(UtU, 12)) stopifnot(all.equal(UtU, I1, tolerance=1e-12)) print(round(VtV, 12)) stopifnot(all.equal(VtV, I1, tolerance=1e-12)) ## check that SVD is reproducible ## (this is guaranteed by a deterministic RNG built into the SVDLIBC code) for (i in 1:20) { res <- sparsesvd(Ms) if (!isTRUE(all.equal(res, res2))) stop("SVD not reproducible on iteration #", i) } sparsesvd/tests/loss_of_orthogonality.R0000644000176200001440000003762413512675413020222 0ustar liggesusers## SVDLIBC seems to have a bug that leads to incorrect results and infinite loops with ## highly sparse matrices with invariant subspaces, requiring frequent restarts of the Lanczos iteration. ## Here we test these issues with several sample matrices (should be fixed in sparsesvd v0.2) library(Matrix) library(sparsesvd) check.svd <- function (M, tol=1e-2) { cat(sprintf("Checking %d x %d matrix %s ...\n", nrow(M), ncol(M), deparse(substitute(M)))) M.ref <- as.matrix(M) res <- sparsesvd(M) M.new <- res$u %*% diag(res$d, length(res$d)) %*% t(res$v) if (!all(dim(M.new) == dim(M.ref))) stop(sprintf("SVD approximation has wrong dimensions %d x %d", nrow(M.new), ncol(M.new))) d <- sqrt(sum((M.new - M.ref)^2)) cat(sprintf(" --> approximation error ||M.new - M.ref||_2 = %g\n", d)) if (d > tol) { print(round(M.new, 2)) print(round(M.ref, 2)) stop("approximation error exceeds tolerance limit") } } Zero <- as(matrix(0, 3, 5), "dgCMatrix") # all-zero matrix check.svd(Zero) D1 <- new("dgCMatrix", i = 0:1, p = c(0L, 2L), Dim = 2:1, Dimnames = list( NULL, NULL), x = c(1, 1), factors = list()) check.svd(D1) D2 <- new("dgCMatrix", i = 0:3, p = c(0L, 2L, 4L), Dim = c(4L, 2L), Dimnames = list(NULL, NULL), x = c(1, 1, 1, 1), factors = list()) check.svd(D2) D5 <- new("dgCMatrix", i = 0:4, p = 0:5, Dim = c(5L, 5L), Dimnames = list( NULL, NULL), x = c(1, 1, 1, 1, 1), factors = list()) check.svd(D5) D21 <- new("dgCMatrix", i = 0:21, p = c(0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L), Dim = 22:21, Dimnames = list(NULL, NULL), x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ), factors = list()) check.svd(D21) TermDoc <- new("dgCMatrix", i = c(0L, 14L, 15L, 13L, 17L, 16L, 4L, 1L, 13L, 9L, 16L, 6L, 7L, 10L, 9L, 11L, 8L, 14L, 1L, 5L, 16L, 3L, 8L, 12L, 0L, 14L, 15L, 4L, 3L, 2L, 5L, 2L, 17L, 9L, 3L, 14L, 12L, 11L, 6L, 7L, 10L, 9L, 3L, 12L), p = c(0L, 3L, 4L, 5L, 6L, 7L, 9L, 11L, 14L, 16L, 18L, 19L, 21L, 24L, 27L, 28L, 29L, 30L, 31L, 33L, 34L, 36L, 37L, 38L, 41L, 42L, 44L), Dim = c(18L, 26L), Dimnames = list( NULL, NULL), x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), factors = list()) check.svd(TermDoc) M <- new("dgTMatrix", i = c(13L, 46L, 377L, 243L, 298L, 345L, 155L, 422L, 441L, 77L, 167L, 282L, 375L, 389L, 395L, 28L, 30L, 202L, 250L, 297L, 69L, 84L, 214L, 234L, 317L, 68L, 135L, 196L, 380L, 414L, 459L, 114L, 229L, 343L, 406L, 70L, 188L, 223L, 322L, 334L, 429L, 1L, 53L, 96L, 176L, 239L, 296L, 320L, 346L, 371L, 432L, 27L, 58L, 74L, 198L, 237L, 253L, 256L, 287L, 440L, 97L, 157L, 185L, 257L, 93L, 116L, 458L, 99L, 110L, 180L, 186L, 333L, 403L, 219L, 311L, 382L, 49L, 64L, 123L, 171L, 200L, 264L, 335L, 339L, 357L, 364L, 366L, 448L, 79L, 281L, 417L, 87L, 105L, 128L, 319L, 398L, 194L, 249L, 276L, 8L, 59L, 72L, 81L, 242L, 267L, 386L, 454L, 182L, 207L, 226L, 230L, 431L, 24L, 181L, 294L, 305L, 383L, 410L, 442L, 22L, 103L, 238L, 306L, 336L, 370L, 2L, 23L, 35L, 119L, 175L, 266L, 350L, 0L, 50L, 149L, 228L, 293L, 397L, 451L, 17L, 48L, 83L, 215L, 340L, 420L, 456L, 15L, 142L, 277L, 57L, 150L, 435L, 11L, 62L, 82L, 85L, 164L, 189L, 279L, 246L, 436L, 75L, 100L, 163L, 165L, 166L, 170L, 247L, 290L, 407L, 433L, 33L, 131L, 225L, 3L, 19L, 179L, 418L, 4L, 160L, 338L, 355L, 445L, 452L, 71L, 158L, 183L, 184L, 203L, 331L, 437L, 156L, 199L, 323L, 94L, 146L, 208L, 424L, 292L, 394L, 425L, 65L, 125L, 205L, 365L, 396L, 443L, 52L, 136L, 310L, 76L, 204L, 206L, 212L, 299L, 342L, 356L, 446L, 73L, 95L, 111L, 187L, 341L, 20L, 309L, 325L, 10L, 31L, 91L, 178L, 252L, 447L, 121L, 197L, 314L, 25L, 169L, 218L, 221L, 255L, 295L, 378L, 411L, 444L, 44L, 118L, 271L, 173L, 232L, 374L, 18L, 159L, 210L, 133L, 244L, 369L, 385L, 162L, 312L, 327L, 363L, 439L, 36L, 313L, 404L, 423L, 153L, 245L, 251L, 303L, 29L, 248L, 285L, 145L, 261L, 450L, 7L, 45L, 405L, 430L, 14L, 126L, 286L, 328L, 388L, 434L, 90L, 332L, 354L, 6L, 61L, 152L, 115L, 134L, 137L, 154L, 174L, 213L, 258L, 324L, 348L, 391L, 415L, 117L, 138L, 141L, 231L, 259L, 291L, 315L, 438L, 39L, 112L, 321L, 122L, 260L, 427L, 26L, 40L, 88L, 192L, 268L, 300L, 280L, 283L, 393L, 413L, 108L, 120L, 151L, 419L, 21L, 211L, 351L, 161L, 209L, 216L, 161L, 209L, 216L, 98L, 352L, 401L, 104L, 372L, 412L, 358L, 408L, 409L, 263L, 362L, 449L, 233L, 361L, 402L, 107L, 140L, 222L, 367L, 9L, 51L, 113L, 132L, 190L, 191L, 195L, 301L, 307L, 330L, 359L, 376L, 421L, 426L, 86L, 106L, 127L, 42L, 47L, 60L, 262L, 273L, 304L, 326L, 344L, 32L, 43L, 329L, 272L, 390L, 392L, 240L, 428L, 460L, 38L, 102L, 109L, 217L, 274L, 349L, 56L, 241L, 453L, 12L, 37L, 130L, 254L, 80L, 269L, 288L, 384L, 455L, 92L, 172L, 353L, 101L, 224L, 302L, 318L, 5L, 78L, 89L, 54L, 139L, 177L, 16L, 34L, 373L, 124L, 284L, 360L, 63L, 227L, 235L, 270L, 308L, 66L, 129L, 144L, 147L, 337L, 387L, 143L, 220L, 368L, 201L, 316L, 381L, 55L, 193L, 399L, 67L, 265L, 379L, 400L, 416L, 236L, 275L, 278L, 148L, 168L, 289L, 41L, 347L, 457L), j = c(0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 33L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 36L, 36L, 36L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L, 42L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L, 48L, 49L, 49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 51L, 52L, 52L, 52L, 52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 55L, 55L, 55L, 56L, 56L, 56L, 56L, 57L, 57L, 57L, 57L, 57L, 57L, 58L, 58L, 58L, 59L, 59L, 59L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 64L, 64L, 64L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 68L, 68L, 68L, 69L, 69L, 69L, 70L, 70L, 70L, 71L, 71L, 71L, 72L, 72L, 72L, 73L, 73L, 73L, 74L, 74L, 74L, 75L, 75L, 75L, 75L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 77L, 77L, 77L, 78L, 78L, 78L, 78L, 78L, 78L, 78L, 78L, 79L, 79L, 79L, 80L, 80L, 80L, 81L, 81L, 81L, 82L, 82L, 82L, 82L, 82L, 82L, 83L, 83L, 83L, 84L, 84L, 84L, 84L, 85L, 85L, 85L, 85L, 85L, 86L, 86L, 86L, 87L, 87L, 87L, 87L, 88L, 88L, 88L, 89L, 89L, 89L, 90L, 90L, 90L, 91L, 91L, 91L, 92L, 92L, 92L, 92L, 92L, 93L, 93L, 93L, 93L, 93L, 93L, 94L, 94L, 94L, 95L, 95L, 95L, 96L, 96L, 96L, 97L, 97L, 97L, 97L, 97L, 98L, 98L, 98L, 99L, 99L, 99L, 100L, 100L, 100L), Dim = c(461L, 101L), Dimnames = list( NULL, NULL), x = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), factors = list()) check.svd(M) A <- new("dgTMatrix", i = c(0L, 586L, 1958L, 18724L, 10404L, 13340L, 16234L, 6536L, 21785L, 23539L, 3256L, 7242L, 12660L, 18483L, 19388L, 19824L, 1238L, 1264L, 8787L, 10708L, 13334L, 2867L, 3512L, 9360L, 10146L, 14287L, 2854L, 5693L, 8646L, 18887L, 21277L, 27422L, 4632L, 9888L, 16123L, 20780L, 2960L, 8291L, 9771L, 14585L, 15294L, 22360L, 26L, 2274L, 3930L, 7690L, 10278L, 13269L, 14444L, 16327L, 17875L, 22552L, 1207L, 2362L, 3174L, 8653L, 10220L, 10754L, 10857L, 12861L, 23415L, 3957L, 6679L, 8183L, 10870L, 3773L, 4719L, 26559L, 4030L, 4486L, 7926L, 8245L, 15243L, 20489L, 9551L, 13960L, 18997L, 2088L, 2785L, 4965L, 7428L, 8762L, 11435L, 15325L, 15451L, 16967L, 17323L, 17463L, 25040L, 3368L, 12604L, 21330L, 3615L, 4307L, 5179L, 14429L, 19969L, 8536L, 10618L, 12325L, 366L, 2394L, 3097L, 3390L, 10377L, 11606L, 19305L, 25620L, 8051L, 9066L, 9831L, 9923L, 22547L, 1051L, 8000L, 13148L, 13652L, 19134L, 21014L, 23716L, 974L, 4140L, 10244L, 13758L, 15363L, 17744L, 110L, 1042L, 1435L, 4802L, 7663L, 11599L, 16531L, 12L, 2158L, 6172L, 9866L, 13125L, 19941L, 25208L, 833L, 2072L, 3476L, 9408L, 15581L, 21608L, 25886L, 749L, 5960L, 12348L, 2342L, 6249L, 23087L, 571L, 2737L, 3420L, 3576L, 7017L, 8423L, 12401L, 10484L, 23110L, 3212L, 4053L, 7013L, 7037L, 7194L, 7317L, 10532L, 12917L, 20800L, 22803L, 1374L, 5427L, 9798L, 178L, 870L, 7869L, 21366L, 181L, 6742L, 15435L, 16834L, 24213L, 25261L, 3080L, 6701L, 8100L, 8146L, 8845L, 15076L, 23227L, 6650L, 8667L, 14607L, 3860L, 6059L, 9131L, 21934L, 13039L, 19810L, 21992L, 2797L, 4977L, 8988L, 17456L, 19865L, 23892L, 2225L, 5696L, 13948L, 3231L, 8849L, 9013L, 9270L, 13351L, 16093L, 16881L, 24362L, 3151L, 3911L, 4534L, 8259L, 15656L, 872L, 13889L, 14709L, 512L, 1303L, 3741L, 7837L, 10750L, 24978L, 4937L, 8651L, 14102L, 1116L, 7309L, 9485L, 9640L, 10828L, 13251L, 18787L, 21060L, 23901L, 1879L, 4794L, 11817L, 7440L, 10074L, 18425L, 843L, 6710L, 9218L, 5651L, 10410L, 17733L, 19290L, 7005L, 13985L, 14964L, 17289L, 23410L, 1548L, 14100L, 20576L, 21875L, 6356L, 10475L, 10712L, 13594L, 1261L, 10574L, 12727L, 6047L, 11238L, 25105L, 297L, 1903L, 20711L, 22391L, 605L, 5002L, 12782L, 14991L, 19368L, 23012L, 3699L, 15108L, 16802L, 268L, 2562L, 6333L, 4668L, 5682L, 5713L, 6416L, 7499L, 9318L, 10948L, 14649L, 16450L, 19481L, 21282L, 4788L, 5758L, 5939L, 9983L, 10954L, 13000L, 14198L, 23381L, 1678L, 4591L, 14577L, 4958L, 11141L, 22168L, 1128L, 1722L, 3619L, 8484L, 11661L, 13422L, 12412L, 12688L, 19778L, 21264L, 4379L, 4891L, 6302L, 21571L, 931L, 9252L, 16600L, 6782L, 9204L, 9435L, 6782L, 9204L, 9435L, 3959L, 16655L, 20200L, 4153L, 18020L, 21108L, 17079L, 20885L, 20922L, 11384L, 17273L, 25071L, 10100L, 17218L, 20241L, 4331L, 5925L, 9736L, 17639L, 446L, 2200L, 4603L, 5442L, 8431L, 8464L, 8566L, 13425L, 13761L, 15026L, 17106L, 18624L, 21649L, 22001L, 3603L, 4316L, 5073L, 1775L, 1984L, 2414L, 11242L, 12045L, 13619L, 14769L, 16229L, 1337L, 1844L, 14997L, 11948L, 19464L, 19777L, 10280L, 22259L, 27458L, 1666L, 4138L, 4406L, 9461L, 12200L, 16461L, 2339L, 10325L, 25264L, 580L, 1646L, 5412L, 10793L, 3385L, 11791L, 12875L, 19187L, 25846L, 3749L, 7436L, 16780L, 4107L, 9793L, 13445L, 14305L, 255L, 3257L, 3674L, 2279L, 5912L, 7777L, 761L, 1404L, 18184L, 4970L, 12711L, 17107L, 2760L, 9858L, 10187L, 11794L, 13807L, 2813L, 5403L, 5986L, 6112L, 15382L, 19313L, 5979L, 9579L, 17654L, 8784L, 14210L, 18958L, 2315L, 8532L, 19996L, 2826L, 11510L, 18848L, 20043L, 21313L, 10212L, 12304L, 12367L, 6160L, 7248L, 12897L, 1764L, 16440L, 26230L), j = c(0L, 0L, 0L, 0L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 32L, 32L, 32L, 32L, 33L, 33L, 33L, 33L, 33L, 33L, 34L, 34L, 34L, 34L, 34L, 34L, 34L, 35L, 35L, 35L, 36L, 36L, 36L, 36L, 37L, 37L, 37L, 38L, 38L, 38L, 39L, 39L, 39L, 40L, 40L, 40L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 41L, 42L, 42L, 42L, 42L, 42L, 43L, 43L, 43L, 44L, 44L, 44L, 44L, 44L, 44L, 45L, 45L, 45L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 46L, 47L, 47L, 47L, 48L, 48L, 48L, 49L, 49L, 49L, 50L, 50L, 50L, 50L, 51L, 51L, 51L, 51L, 51L, 52L, 52L, 52L, 52L, 53L, 53L, 53L, 53L, 54L, 54L, 54L, 55L, 55L, 55L, 56L, 56L, 56L, 56L, 57L, 57L, 57L, 57L, 57L, 57L, 58L, 58L, 58L, 59L, 59L, 59L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 60L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 61L, 62L, 62L, 62L, 63L, 63L, 63L, 64L, 64L, 64L, 64L, 64L, 64L, 65L, 65L, 65L, 65L, 66L, 66L, 66L, 66L, 67L, 67L, 67L, 68L, 68L, 68L, 69L, 69L, 69L, 70L, 70L, 70L, 71L, 71L, 71L, 72L, 72L, 72L, 73L, 73L, 73L, 74L, 74L, 74L, 75L, 75L, 75L, 75L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 76L, 77L, 77L, 77L, 78L, 78L, 78L, 78L, 78L, 78L, 78L, 78L, 79L, 79L, 79L, 80L, 80L, 80L, 81L, 81L, 81L, 82L, 82L, 82L, 82L, 82L, 82L, 83L, 83L, 83L, 84L, 84L, 84L, 84L, 85L, 85L, 85L, 85L, 85L, 86L, 86L, 86L, 87L, 87L, 87L, 87L, 88L, 88L, 88L, 89L, 89L, 89L, 90L, 90L, 90L, 91L, 91L, 91L, 92L, 92L, 92L, 92L, 92L, 93L, 93L, 93L, 93L, 93L, 93L, 94L, 94L, 94L, 95L, 95L, 95L, 96L, 96L, 96L, 97L, 97L, 97L, 97L, 97L, 98L, 98L, 98L, 99L, 99L, 99L, 100L, 100L, 100L), Dim = c(95474L, 101L), Dimnames = list(NULL, NULL), x = c(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), factors = list()) check.svd(A) sparsesvd/tests/stress_test.R0000644000176200001440000000613712711444713016146 0ustar liggesuserslibrary(Matrix, quietly=TRUE) library(sparsesvd) ## Stress test: ## - apply sparsesvd() to a large number of random sparse nonnegative matrices ## - check that original matrix M can be reconstructed with reasonable accuracy ## - compare truncated SVD against first components of full SVD ## Run interactively with: ## R --slave -f stress_test.R --args ## (all arguments are optional with default values set below) argv <- commandArgs(trailingOnly=TRUE) argc <- length(argv) ## arguments and their default values n.run <- if (argc >= 1) as.integer(argv[1]) else 100 # number of runs k <- if (argc >= 2) as.integer(argv[2]) else 500 # k = number of rows of M n <- if (argc >= 3) as.integer(argv[3]) else 100 # n = number of columns of M r <- if (argc >= 4) as.integer(argv[4]) else 10 # r = rank of truncated SVD torture <- if (argc >= 5) as.logical(argv[5]) else FALSE # run with gctorture()? fillrate <- if (argc >= 6) as.numeric(argv[6]) else .10 # fill rate of sparse matrix (10%) tol <- if (argc >= 7) as.numeric(argv[7]) else 1e-6 # acceptable approx. error per cell cat("sparsesvd() Stress Test:\n") cat(sprintf(" - %d runs testing full and rank-%d sparse SVD\n", n.run, r)) cat(sprintf(" - based on %d x %d nonnegative matrix with %.1f%% nonzero cells\n", k, n, 100 * fillrate)) cat(sprintf(" - reconstruction accuracy < %g, gctorture() is %s\n", tol, if (torture) "ON" else "OFF")) ## approximate matrix comparison assert.mat.equal <- function (x, y, tol=1e-6, msg="") { if (!all(dim(x) == dim(y))) stop(sprintf("error: matrix dimensions differ %s", msg)) err <- max(abs(x - y)) if (err > tol) stop(sprintf("approximation error %g > tolerance %g %s", err, tol, msg)) } ## now perform n.run iterations of the test procedure gctorture(torture) pb <- txtProgressBar(min=0, max=n.run, style=3) for (i in 1:n.run) { setTxtProgressBar(pb, i) x <- ifelse(runif(k * n) < fillrate, runif(k * n), 0) M <- matrix(x, k, n) # random non-negative matrix with specified fill rate Ms <- Matrix(M, sparse=TRUE) ## complete sparse SVD svdM <- sparsesvd(Ms) R <- with(svdM, u %*% diag(d) %*% t(v)) # reconstructed matrix assert.mat.equal(R, M, tol=tol, msg=sprintf("(full SVD at iteration #%d)", i)) ## complete sparse SVD of transposed matrix svdMt <- sparsesvd(t(Ms)) R <- with(svdMt, v %*% diag(d) %*% t(u)) # reconstructed matrix assert.mat.equal(R, M, tol=tol, msg=sprintf("(full transposed SVD at iteration #%d)", i)) Mr <- with(svdM, u[, 1:r] %*% diag(d[1:r]) %*% t(v[, 1:r])) # r-dimensional approximation ## truncated sparse SVD svdMr <- sparsesvd(Ms, rank=r) Rr <- with(svdMr, u %*% diag(d) %*% t(v)) # reconstructed r-dim approximation assert.mat.equal(Rr, Mr, tol=tol, msg=sprintf("(%d-rank truncated SVD at iteration #%d)", r, i)) ## truncated sparse SVD of transposed matrix svdMrt <- sparsesvd(t(Ms), rank=r) Rr <- with(svdMrt, v %*% diag(d) %*% t(u)) # reconstructed r-dim approximation assert.mat.equal(Rr, Mr, tol=tol, msg=sprintf("(%d-rank truncated transposed SVD at iteration #%d)", r, i)) } close(pb) sparsesvd/tests/formats.R0000644000176200001440000000244012676701510015231 0ustar liggesusers## any sparse matrix format that inherits from dMatrix should work library(sparsesvd) library(Matrix) M <- rbind( c(20, 10, 15, 0, 2), c(10, 5, 8, 1, 0), c( 0, 1, 2, 6, 3), c( 1, 0, 0, 10, 5)) res1 <- sparsesvd(as(M, "dgCMatrix")) # standard format (column-compressed) res2 <- sparsesvd(as(M, "dgeMatrix")) # dense matrix res3 <- sparsesvd(as(M, "dgTMatrix")) # triple format ## -- row-compressed form cannot be converted to dgCMatrix ## res4 <- sparsesvd(as(M, "dgRMatrix")) # row-compressed ## check that eigenvalues are the same stopifnot(all.equal(res1$d, res2$d, tolerance=1e-12)) stopifnot(all.equal(res1$d, res3$d, tolerance=1e-12)) ## stopifnot(all.equal(res1$d, res4$d, tolerance=1e-12)) ## special classes for symmetric matrices A <- crossprod(M) res1a <- sparsesvd(as(A, "dgCMatrix")) # standard format (column-compressed) ## -- symmetric matrix can only be converted if already in row-compressed form ## res2a <- sparsesvd(as(A, "dsTMatrix")) # symmetric triplet format res3a <- sparsesvd(as(A, "dsCMatrix")) # symmetric column-compressed ## check that eigenvalues are the same and consistent with M stopifnot(all.equal(res1a$d, (res1$d)^2, tolerance=1e-12)) ## stopifnot(all.equal(res1a$d, res2a$d, tolerance=1e-12)) stopifnot(all.equal(res1a$d, res3a$d, tolerance=1e-12)) sparsesvd/src/0000755000176200001440000000000013241241645013055 5ustar liggesuserssparsesvd/src/main.c0000644000176200001440000000617113153306432014150 0ustar liggesusers#include #include #include #include #include #include "svdlib.h" SEXP svdLAS2_(SEXP dim, SEXP i, SEXP p, SEXP x, SEXP dimensions, SEXP exclude, SEXP kappa) { struct smat M; SVDRec svd; SEXP res, res_d, res_u, res_v, res_names; double *u_dbl, *v_dbl, *mark, *point; int *i_int = INTEGER(i); int *p_int = INTEGER(p); int nR = INTEGER(dim)[0]; int nC = INTEGER(dim)[1]; int n_cells = length(x); int rank, k, j, n_row, n_col; /* copy M to SMat structure (column-compressed format) */ M.rows = nR; M.cols = nC; M.vals = n_cells; M.value = REAL(x); /* need to make copy of i and p because of different data type (long vs. int) */ M.pointr = (long *) R_alloc(nC + 1, sizeof(long)); for (k = 0; k <= nC; k++) M.pointr[k] = p_int[k]; M.rowind = (long *) R_alloc(n_cells, sizeof(long)); for (k = 0; k < n_cells; k++) M.rowind[k] = i_int[k]; /* execute sparse SVD */ SVDVerbosity = 0; svd = svdLAS2A(&M, INTEGER(dimensions)[0]); rank = svd->d; /* check matrix dimensions */ n_row = svd->Ut->cols; /* Ut is the transposed matrix, hence swap row/col counts */ n_col = svd->Ut->rows; if ((n_col < rank) || (n_row != nR)) { svdFreeSVDRec(svd); error("internal error (U is %d x %d matrix, expected %d x %d)", n_row, n_col, nR, rank); } n_row = svd->Vt->cols; /* same for Vt */ n_col = svd->Vt->rows; if ((n_col < rank) || n_row != nC) { svdFreeSVDRec(svd); error("internal error (V is %d x %d matrix, expected %d x %d)", n_row, n_col, nC, rank); } /* note that Ut and Vt may contain more eigenvectors than there are significant eigenvalues; this is expected if some singular values are culled because of the kappa criterion */ /* extract singular values and matrices of singluar vectors into R objects */ res_d = PROTECT(allocVector(REALSXP, rank)); for (k = 0; k < rank; k++) REAL(res_d)[k] = svd->S[k]; res_u = PROTECT(allocMatrix(REALSXP, nR, rank)); u_dbl = REAL(res_u); for (k = 0; k < rank; k++) { mark = svd->Ut->value[k]; point = u_dbl + k * nR; for (j = 0; j < nR; j++) *point++ = *mark++; } res_v = PROTECT(allocMatrix(REALSXP, nC, rank)); v_dbl = REAL(res_v); for (k = 0; k < rank; k++) { mark = svd->Vt->value[k]; point = v_dbl + k * nC; for (j = 0; j < nC; j++) *point++ = *mark++; } /* free SVDRec after copying to R objects */ svdFreeSVDRec(svd); /* construct result list */ res = PROTECT(allocVector(VECSXP, 3)); SET_VECTOR_ELT(res, 0, res_d); SET_VECTOR_ELT(res, 1, res_u); SET_VECTOR_ELT(res, 2, res_v); res_names = PROTECT(allocVector(STRSXP, 3)); SET_STRING_ELT(res_names, 0, mkChar("d")); SET_STRING_ELT(res_names, 1, mkChar("u")); SET_STRING_ELT(res_names, 2, mkChar("v")); setAttrib(res, R_NamesSymbol, res_names); UNPROTECT(5); return res; } static const R_CallMethodDef sparsesvd_methods[] = { {"svdLAS2_", (DL_FUNC) &svdLAS2_, 7}, {NULL, NULL, 0} }; void R_init_sparsesvd(DllInfo *dll) { R_registerRoutines(dll, NULL, sparsesvd_methods, NULL, NULL); R_useDynamicSymbols(dll, FALSE); R_forceSymbols(dll, TRUE); } sparsesvd/src/svdlib.c0000644000176200001440000004064312704210031014477 0ustar liggesusers/* Copyright © 2002, University of Tennessee Research Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include "svdlib.h" #include "svdutil.h" char *SVDVersion = "1.4"; long SVDVerbosity = 1; long SVDCount[SVD_COUNTERS]; void svdResetCounters(void) { int i; for (i = 0; i < SVD_COUNTERS; i++) SVDCount[i] = 0; } /********************************* Allocation ********************************/ /* Row major order. Rows are vectors that are consecutive in memory. Matrix is initialized to empty. */ DMat svdNewDMat(int rows, int cols) { int i; DMat D = (DMat) malloc(sizeof(struct dmat)); if (!D) {perror("svdNewDMat"); return NULL;} D->rows = rows; D->cols = cols; D->value = (double **) malloc(rows * sizeof(double *)); if (!D->value) {SAFE_FREE(D); return NULL;} D->value[0] = (double *) calloc(rows * cols, sizeof(double)); if (!D->value[0]) {SAFE_FREE(D->value); SAFE_FREE(D); return NULL;} for (i = 1; i < rows; i++) D->value[i] = D->value[i-1] + cols; return D; } void svdFreeDMat(DMat D) { if (!D) return; SAFE_FREE(D->value[0]); SAFE_FREE(D->value); free(D); } SMat svdNewSMat(int rows, int cols, int vals) { SMat S = (SMat) calloc(1, sizeof(struct smat)); if (!S) {perror("svdNewSMat"); return NULL;} S->rows = rows; S->cols = cols; S->vals = vals; S->pointr = svd_longArray(cols + 1, TRUE, "svdNewSMat: pointr"); if (!S->pointr) {svdFreeSMat(S); return NULL;} S->rowind = svd_longArray(vals, FALSE, "svdNewSMat: rowind"); if (!S->rowind) {svdFreeSMat(S); return NULL;} S->value = svd_doubleArray(vals, FALSE, "svdNewSMat: value"); if (!S->value) {svdFreeSMat(S); return NULL;} return S; } void svdFreeSMat(SMat S) { if (!S) return; SAFE_FREE(S->pointr); SAFE_FREE(S->rowind); SAFE_FREE(S->value); free(S); } /* Creates an empty SVD record */ SVDRec svdNewSVDRec(void) { SVDRec R = (SVDRec) calloc(1, sizeof(struct svdrec)); if (!R) {perror("svdNewSVDRec"); return NULL;} return R; } /* Frees an svd rec and all its contents. */ void svdFreeSVDRec(SVDRec R) { if (!R) return; if (R->Ut) svdFreeDMat(R->Ut); if (R->S) SAFE_FREE(R->S); if (R->Vt) svdFreeDMat(R->Vt); free(R); } /**************************** Conversion *************************************/ /* Converts a sparse matrix to a dense one (without affecting the former) */ DMat svdConvertStoD(SMat S) { int i, c; DMat D = svdNewDMat(S->rows, S->cols); if (!D) { svd_error("svdConvertStoD: failed to allocate D"); return NULL; } for (i = 0, c = 0; i < S->vals; i++) { while (S->pointr[c + 1] <= i) c++; D->value[S->rowind[i]][c] = S->value[i]; } return D; } /* Converts a dense matrix to a sparse one (without affecting the dense one) */ SMat svdConvertDtoS(DMat D) { SMat S; int i, j, n; for (i = 0, n = 0; i < D->rows; i++) for (j = 0; j < D->cols; j++) if (D->value[i][j] != 0) n++; S = svdNewSMat(D->rows, D->cols, n); if (!S) { svd_error("svdConvertDtoS: failed to allocate S"); return NULL; } for (j = 0, n = 0; j < D->cols; j++) { S->pointr[j] = n; for (i = 0; i < D->rows; i++) if (D->value[i][j] != 0) { S->rowind[n] = i; S->value[n] = D->value[i][j]; n++; } } S->pointr[S->cols] = S->vals; return S; } /* Transposes a dense matrix. */ DMat svdTransposeD(DMat D) { int r, c; DMat N = svdNewDMat(D->cols, D->rows); for (r = 0; r < D->rows; r++) for (c = 0; c < D->cols; c++) N->value[c][r] = D->value[r][c]; return N; } /* Efficiently transposes a sparse matrix. */ SMat svdTransposeS(SMat S) { int r, c, i, j; SMat N = svdNewSMat(S->cols, S->rows, S->vals); /* Count number nz in each row. */ for (i = 0; i < S->vals; i++) N->pointr[S->rowind[i]]++; /* Fill each cell with the starting point of the previous row. */ N->pointr[S->rows] = S->vals - N->pointr[S->rows - 1]; for (r = S->rows - 1; r > 0; r--) N->pointr[r] = N->pointr[r+1] - N->pointr[r-1]; N->pointr[0] = 0; /* Assign the new columns and values. */ for (c = 0, i = 0; c < S->cols; c++) { for (; i < S->pointr[c+1]; i++) { r = S->rowind[i]; j = N->pointr[r+1]++; N->rowind[j] = c; N->value[j] = S->value[i]; } } return N; } /**************************** Input/Output ***********************************/ void svdWriteDenseArray(double *a, int n, char *filename, char binary) { int i; FILE *file = svd_writeFile(filename, FALSE); if (!file) { svd_error("svdWriteDenseArray: failed to write %s", filename); return; } if (binary) { svd_writeBinInt(file, n); for (i = 0; i < n; i++) svd_writeBinFloat(file, (float) a[i]); } else { fprintf(file, "%d\n", n); for (i = 0; i < n; i++) fprintf(file, "%g\n", a[i]); } svd_closeFile(file); } double *svdLoadDenseArray(char *filename, int *np, char binary) { int i, n; double *a; FILE *file = svd_readFile(filename); if (!file) { svd_error("svdLoadDenseArray: failed to read %s", filename); return NULL; } if (binary) { svd_readBinInt(file, np); } else if (fscanf(file, " %d", np) != 1) { svd_error("svdLoadDenseArray: error reading %s", filename); svd_closeFile(file); return NULL; } n = *np; a = svd_doubleArray(n, FALSE, "svdLoadDenseArray: a"); if (!a) return NULL; if (binary) { float f; for (i = 0; i < n; i++) { svd_readBinFloat(file, &f); a[i] = f; } } else { for (i = 0; i < n; i++) { if (fscanf(file, " %lf\n", a + i) != 1) { svd_error("svdLoadDenseArray: error reading %s", filename); break; } } } svd_closeFile(file); return a; } /* File format has a funny header, then first entry index per column, then the row for each entry, then the value for each entry. Indices count from 1. Assumes A is initialized. */ static SMat svdLoadSparseTextHBFile(FILE *file) { char line[128]; long i, x, rows, cols, vals, num_mat; SMat S; /* Skip the header line: */ if (!fgets(line, 128, file)) ; /* Skip the line giving the number of lines in this file: */ if (!fgets(line, 128, file)) ; /* Read the line with useful dimensions: */ if (fscanf(file, "%*s%ld%ld%ld%ld\n", &rows, &cols, &vals, &num_mat) != 4) { svd_error("svdLoadSparseTextHBFile: bad file format on line 3"); return NULL; } if (num_mat != 0) { svd_error("svdLoadSparseTextHBFile: I don't know how to handle a file " "with elemental matrices (last entry on header line 3)"); return NULL; } /* Skip the line giving the formats: */ if (!fgets(line, 128, file)) ; S = svdNewSMat(rows, cols, vals); if (!S) return NULL; /* Read column pointers. */ for (i = 0; i <= S->cols; i++) { if (fscanf(file, " %ld", &x) != 1) { svd_error("svdLoadSparseTextHBFile: error reading pointr %d", i); return NULL; } S->pointr[i] = x - 1; } S->pointr[S->cols] = S->vals; /* Read row indices. */ for (i = 0; i < S->vals; i++) { if (fscanf(file, " %ld", &x) != 1) { svd_error("svdLoadSparseTextHBFile: error reading rowind %d", i); return NULL; } S->rowind[i] = x - 1; } for (i = 0; i < S->vals; i++) if (fscanf(file, " %lf", S->value + i) != 1) { svd_error("svdLoadSparseTextHBFile: error reading value %d", i); return NULL; } return S; } static void svdWriteSparseTextHBFile(SMat S, FILE *file) { int i; long col_lines = ((S->cols + 1) / 8) + (((S->cols + 1) % 8) ? 1 : 0); long row_lines = (S->vals / 8) + ((S->vals % 8) ? 1 : 0); long total_lines = col_lines + 2 * row_lines; char title[32]; sprintf(title, "SVDLIBC v. %s", SVDVersion); fprintf(file, "%-72s%-8s\n", title, ""); fprintf(file, "%14ld%14ld%14ld%14ld%14d\n", total_lines, col_lines, row_lines, row_lines, 0); fprintf(file, "%-14s%14ld%14ld%14ld%14d\n", "rra", S->rows, S->cols, S->vals, 0); fprintf(file, "%16s%16s%16s%16s\n", "(8i)", "(8i)", "(8e)", "(8e)"); for (i = 0; i <= S->cols; i++) fprintf(file, "%ld%s", S->pointr[i] + 1, (((i+1) % 8) == 0) ? "\n" : " "); fprintf(file, "\n"); for (i = 0; i < S->vals; i++) fprintf(file, "%ld%s", S->rowind[i] + 1, (((i+1) % 8) == 0) ? "\n" : " "); fprintf(file, "\n"); for (i = 0; i < S->vals; i++) fprintf(file, "%g%s", S->value[i], (((i+1) % 8) == 0) ? "\n" : " "); fprintf(file, "\n"); } static SMat svdLoadSparseTextFile(FILE *file) { long c, i, n, v, rows, cols, vals; SMat S; if (fscanf(file, " %ld %ld %ld", &rows, &cols, &vals) != 3) { svd_error("svdLoadSparseTextFile: bad file format"); return NULL; } S = svdNewSMat(rows, cols, vals); if (!S) return NULL; for (c = 0, v = 0; c < cols; c++) { if (fscanf(file, " %ld", &n) != 1) { svd_error("svdLoadSparseTextFile: bad file format"); return NULL; } S->pointr[c] = v; for (i = 0; i < n; i++, v++) { if (fscanf(file, " %ld %lf", S->rowind + v, S->value + v) != 2) { svd_error("svdLoadSparseTextFile: bad file format"); return NULL; } } } S->pointr[cols] = vals; return S; } static void svdWriteSparseTextFile(SMat S, FILE *file) { int c, v; fprintf(file, "%ld %ld %ld\n", S->rows, S->cols, S->vals); for (c = 0, v = 0; c < S->cols; c++) { fprintf(file, "%ld\n", S->pointr[c + 1] - S->pointr[c]); for (; v < S->pointr[c+1]; v++) fprintf(file, "%ld %g\n", S->rowind[v], S->value[v]); } } static SMat svdLoadSparseBinaryFile(FILE *file) { int rows, cols, vals, n, c, i, v, r, e = 0; float f; SMat S; e += svd_readBinInt(file, &rows); e += svd_readBinInt(file, &cols); e += svd_readBinInt(file, &vals); if (e) { svd_error("svdLoadSparseBinaryFile: bad file format"); return NULL; } S = svdNewSMat(rows, cols, vals); if (!S) return NULL; for (c = 0, v = 0; c < cols; c++) { if (svd_readBinInt(file, &n)) { svd_error("svdLoadSparseBinaryFile: bad file format"); return NULL; } S->pointr[c] = v; for (i = 0; i < n; i++, v++) { e += svd_readBinInt(file, &r); e += svd_readBinFloat(file, &f); if (e) { svd_error("svdLoadSparseBinaryFile: bad file format"); return NULL; } S->rowind[v] = r; S->value[v] = f; } } S->pointr[cols] = vals; return S; } static void svdWriteSparseBinaryFile(SMat S, FILE *file) { int c, v; svd_writeBinInt(file, (int) S->rows); svd_writeBinInt(file, (int) S->cols); svd_writeBinInt(file, (int) S->vals); for (c = 0, v = 0; c < S->cols; c++) { svd_writeBinInt(file, (int) (S->pointr[c + 1] - S->pointr[c])); for (; v < S->pointr[c+1]; v++) { svd_writeBinInt(file, (int) S->rowind[v]); svd_writeBinFloat(file, (float) S->value[v]); } } } static DMat svdLoadDenseTextFile(FILE *file) { long rows, cols, i, j; DMat D; if (fscanf(file, " %ld %ld", &rows, &cols) != 2) { svd_error("svdLoadDenseTextFile: bad file format"); return NULL; } D = svdNewDMat(rows, cols); if (!D) return NULL; for (i = 0; i < rows; i++) for (j = 0; j < cols; j++) { if (fscanf(file, " %lf", &(D->value[i][j])) != 1) { svd_error("svdLoadDenseTextFile: bad file format"); return NULL; } } return D; } static void svdWriteDenseTextFile(DMat D, FILE *file) { int i, j; fprintf(file, "%ld %ld\n", D->rows, D->cols); for (i = 0; i < D->rows; i++) for (j = 0; j < D->cols; j++) fprintf(file, "%g%c", D->value[i][j], (j == D->cols - 1) ? '\n' : ' '); } static DMat svdLoadDenseBinaryFile(FILE *file) { int rows, cols, i, j, e = 0; float f; DMat D; e += svd_readBinInt(file, &rows); e += svd_readBinInt(file, &cols); if (e) { svd_error("svdLoadDenseBinaryFile: bad file format"); return NULL; } D = svdNewDMat(rows, cols); if (!D) return NULL; for (i = 0; i < rows; i++) for (j = 0; j < cols; j++) { if (svd_readBinFloat(file, &f)) { svd_error("svdLoadDenseBinaryFile: bad file format"); return NULL; } D->value[i][j] = f; } return D; } static void svdWriteDenseBinaryFile(DMat D, FILE *file) { int i, j; svd_writeBinInt(file, (int) D->rows); svd_writeBinInt(file, (int) D->cols); for (i = 0; i < D->rows; i++) for (j = 0; j < D->cols; j++) svd_writeBinFloat(file, (float) D->value[i][j]); } SMat svdLoadSparseMatrix(char *filename, int format) { SMat S = NULL; DMat D = NULL; FILE *file = svd_fatalReadFile(filename); switch (format) { case SVD_F_STH: S = svdLoadSparseTextHBFile(file); break; case SVD_F_ST: S = svdLoadSparseTextFile(file); break; case SVD_F_SB: S = svdLoadSparseBinaryFile(file); break; case SVD_F_DT: D = svdLoadDenseTextFile(file); break; case SVD_F_DB: D = svdLoadDenseBinaryFile(file); break; default: svd_error("svdLoadSparseMatrix: unknown format %d", format); } svd_closeFile(file); if (D) { S = svdConvertDtoS(D); svdFreeDMat(D); } return S; } DMat svdLoadDenseMatrix(char *filename, int format) { SMat S = NULL; DMat D = NULL; FILE *file = svd_fatalReadFile(filename); switch (format) { case SVD_F_STH: S = svdLoadSparseTextHBFile(file); break; case SVD_F_ST: S = svdLoadSparseTextFile(file); break; case SVD_F_SB: S = svdLoadSparseBinaryFile(file); break; case SVD_F_DT: D = svdLoadDenseTextFile(file); break; case SVD_F_DB: D = svdLoadDenseBinaryFile(file); break; default: svd_error("svdLoadSparseMatrix: unknown format %d", format); } svd_closeFile(file); if (S) { D = svdConvertStoD(S); svdFreeSMat(S); } return D; } void svdWriteSparseMatrix(SMat S, char *filename, int format) { DMat D = NULL; FILE *file = svd_writeFile(filename, FALSE); if (!file) { svd_error("svdWriteSparseMatrix: failed to write file %s\n", filename); return; } switch (format) { case SVD_F_STH: svdWriteSparseTextHBFile(S, file); break; case SVD_F_ST: svdWriteSparseTextFile(S, file); break; case SVD_F_SB: svdWriteSparseBinaryFile(S, file); break; case SVD_F_DT: D = svdConvertStoD(S); svdWriteDenseTextFile(D, file); break; case SVD_F_DB: D = svdConvertStoD(S); svdWriteDenseBinaryFile(D, file); break; default: svd_error("svdLoadSparseMatrix: unknown format %d", format); } svd_closeFile(file); if (D) svdFreeDMat(D); } void svdWriteDenseMatrix(DMat D, char *filename, int format) { SMat S = NULL; FILE *file = svd_writeFile(filename, FALSE); if (!file) { svd_error("svdWriteDenseMatrix: failed to write file %s\n", filename); return; } switch (format) { case SVD_F_STH: S = svdConvertDtoS(D); svdWriteSparseTextHBFile(S, file); break; case SVD_F_ST: S = svdConvertDtoS(D); svdWriteSparseTextFile(S, file); break; case SVD_F_SB: S = svdConvertDtoS(D); svdWriteSparseBinaryFile(S, file); break; case SVD_F_DT: svdWriteDenseTextFile(D, file); break; case SVD_F_DB: svdWriteDenseBinaryFile(D, file); break; default: svd_error("svdLoadSparseMatrix: unknown format %d", format); } svd_closeFile(file); if (S) svdFreeSMat(S); } sparsesvd/src/svdlib.h0000644000176200001440000001242512676312360014521 0ustar liggesusers/* Copyright © 2002, University of Tennessee Research Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef SVDLIB_H #define SVDLIB_H #ifndef FALSE # define FALSE 0 #endif #ifndef TRUE # define TRUE 1 #endif /******************************** Structures *********************************/ typedef struct smat *SMat; typedef struct dmat *DMat; typedef struct svdrec *SVDRec; /* Harwell-Boeing sparse matrix. */ struct smat { long rows; long cols; long vals; /* Total non-zero entries. */ long *pointr; /* For each col (plus 1), index of first non-zero entry. */ long *rowind; /* For each nz entry, the row index. */ double *value; /* For each nz entry, the value. */ }; /* Row-major dense matrix. Rows are consecutive vectors. */ struct dmat { long rows; long cols; double **value; /* Accessed by [row][col]. Free value[0] and value to free.*/ }; struct svdrec { int d; /* Dimensionality (rank) */ DMat Ut; /* Transpose of left singular vectors. (d by m) The vectors are the rows of Ut. */ double *S; /* Array of singular values. (length d) */ DMat Vt; /* Transpose of right singular vectors. (d by n) The vectors are the rows of Vt. */ }; /******************************** Variables **********************************/ /* Version info */ extern char *SVDVersion; /* How verbose is the package: 0, 1 (default), 2 */ extern long SVDVerbosity; /* Counter(s) used to track how much work is done in computing the SVD. */ enum svdCounters {SVD_MXV, SVD_COUNTERS}; extern long SVDCount[SVD_COUNTERS]; extern void svdResetCounters(void); enum svdFileFormats {SVD_F_STH, SVD_F_ST, SVD_F_SB, SVD_F_DT, SVD_F_DB}; /* File formats: SVD_F_STH: sparse text, SVDPACK-style SVD_F_ST: sparse text, SVDLIB-style SVD_F_DT: dense text SVD_F_SB: sparse binary SVD_F_DB: dense binary */ /* True if a file format is sparse: */ #define SVD_IS_SPARSE(format) ((format >= SVD_F_STH) && (format <= SVD_F_SB)) /******************************** Functions **********************************/ /* Creates an empty dense matrix. */ extern DMat svdNewDMat(int rows, int cols); /* Frees a dense matrix. */ extern void svdFreeDMat(DMat D); /* Creates an empty sparse matrix. */ SMat svdNewSMat(int rows, int cols, int vals); /* Frees a sparse matrix. */ void svdFreeSMat(SMat S); /* Creates an empty SVD record. */ SVDRec svdNewSVDRec(void); /* Frees an svd rec and all its contents. */ void svdFreeSVDRec(SVDRec R); /* Converts a sparse matrix to a dense one (without affecting former) */ DMat svdConvertStoD(SMat S); /* Converts a dense matrix to a sparse one (without affecting former) */ SMat svdConvertDtoS(DMat D); /* Transposes a dense matrix (returning a new one) */ DMat svdTransposeD(DMat D); /* Transposes a sparse matrix (returning a new one) */ SMat svdTransposeS(SMat S); /* Writes an array to a file. */ extern void svdWriteDenseArray(double *a, int n, char *filename, char binary); /* Reads an array from a file, storing its size in *np. */ extern double *svdLoadDenseArray(char *filename, int *np, char binary); /* Loads a matrix file (in various formats) into a sparse matrix. */ extern SMat svdLoadSparseMatrix(char *filename, int format); /* Loads a matrix file (in various formats) into a dense matrix. */ extern DMat svdLoadDenseMatrix(char *filename, int format); /* Writes a dense matrix to a file in a given format. */ extern void svdWriteDenseMatrix(DMat A, char *filename, int format); /* Writes a sparse matrix to a file in a given format. */ extern void svdWriteSparseMatrix(SMat A, char *filename, int format); /* Performs the las2 SVD algorithm and returns the resulting Ut, S, and Vt. */ extern SVDRec svdLAS2(SMat A, long dimensions, long iterations, double end[2], double kappa); /* Chooses default parameter values. Set dimensions to 0 for all dimensions: */ extern SVDRec svdLAS2A(SMat A, long dimensions); #endif /* SVDLIB_H */ sparsesvd/src/svdutil.c0000644000176200001440000005050013513075402014711 0ustar liggesusers/* Copyright © 2002, University of Tennessee Research Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) /* EDIT: POSIX functions not available on Windows (and not needed anyway) */ #include #endif #include "svdlib.h" #include "svdutil.h" #include #include #define BUNZIP2 "bzip2 -d" #define BZIP2 "bzip2 -1" #define UNZIP "gzip -d" #define ZIP "gzip -1" #define COMPRESS "compress" #define MAX_FILENAME 512 #define MAX_PIPES 64 static FILE *Pipe[MAX_PIPES]; static int numPipes = 0; long *svd_longArray(long size, char empty, char *name) { long *a; if (empty) a = (long *) calloc(size, sizeof(long)); else a = (long *) malloc(size * sizeof(long)); if (!a) { perror(name); /* exit(errno); */ } return a; } double *svd_doubleArray(long size, char empty, char *name) { double *a; if (empty) a = (double *) calloc(size, sizeof(double)); else a = (double *) malloc(size * sizeof(double)); if (!a) { perror(name); /* exit(errno); */ } return a; } void svd_beep(void) { /* fputc('\a', stderr); */ /* fflush(stderr); */ REprintf("DING!\n"); } void svd_debug(char *fmt, ...) { va_list args; va_start(args, fmt); REvprintf(fmt, args); va_end(args); } void svd_error(char *fmt, ...) { va_list args; va_start(args, fmt); svd_beep(); REprintf("ERROR: "); REvprintf(fmt, args); REprintf("\n"); va_end(args); } void svd_fatalError(char *fmt, ...) { va_list args; va_start(args, fmt); svd_beep(); REprintf( "ERROR: "); REvprintf(fmt, args); REprintf("\n"); va_end(args); error("error in SVDLIBC code"); } static void registerPipe(FILE *p) { if (numPipes >= MAX_PIPES) svd_error("Too many pipes open"); Pipe[numPipes++] = p; } static char isPipe(FILE *p) { int i; for (i = 0; i < numPipes && Pipe[i] != p; i++); if (i == numPipes) return FALSE; Pipe[i] = Pipe[--numPipes]; return TRUE; } static FILE *openPipe(char *pipeName, char *mode) { FILE *pipe; /* fflush(stdout); */ if ((pipe = popen(pipeName, mode))) registerPipe(pipe); return pipe; } static FILE *readZippedFile(char *command, char *fileName) { char buf[MAX_FILENAME]; sprintf(buf, "%s < %s 2>/dev/null", command, fileName); return openPipe(buf, "r"); } FILE *svd_fatalReadFile(char *filename) { FILE *file; if (!(file = svd_readFile(filename))) svd_fatalError("couldn't read the file %s", filename); return file; } static int stringEndsIn(char *s, char *t) { int ls = strlen(s); int lt = strlen(t); if (ls < lt) return FALSE; return (strcmp(s + ls - lt, t)) ? FALSE : TRUE; } /* Will silently return NULL if file couldn't be opened */ FILE *svd_readFile(char *fileName) { char fileBuf[MAX_FILENAME]; struct stat statbuf; /* Special file name */ if (!strcmp(fileName, "-")) /* return stdin; */ svd_fatalError("library code is not allowed to read from STDIN"); /* If it is a pipe */ if (fileName[0] == '|') return openPipe(fileName + 1, "r"); /* Check if already ends in .gz or .Z and assume compressed */ if (stringEndsIn(fileName, ".gz") || stringEndsIn(fileName, ".Z")) { if (!stat(fileName, &statbuf)) return readZippedFile(UNZIP, fileName); return NULL; } /* Check if already ends in .bz or .bz2 and assume compressed */ if (stringEndsIn(fileName, ".bz") || stringEndsIn(fileName, ".bz2")) { if (!stat(fileName, &statbuf)) return readZippedFile(BUNZIP2, fileName); return NULL; } /* Try just opening normally */ if (!stat(fileName, &statbuf)) return fopen(fileName, "r"); /* Try adding .gz */ sprintf(fileBuf, "%s.gz", fileName); if (!stat(fileBuf, &statbuf)) return readZippedFile(UNZIP, fileBuf); /* Try adding .Z */ sprintf(fileBuf, "%s.Z", fileName); if (!stat(fileBuf, &statbuf)) return readZippedFile(UNZIP, fileBuf); /* Try adding .bz2 */ sprintf(fileBuf, "%s.bz2", fileName); if (!stat(fileBuf, &statbuf)) return readZippedFile(BUNZIP2, fileBuf); /* Try adding .bz */ sprintf(fileBuf, "%s.bz", fileName); if (!stat(fileBuf, &statbuf)) return readZippedFile(BUNZIP2, fileBuf); return NULL; } static FILE *writeZippedFile(char *fileName, char append) { char buf[MAX_FILENAME]; const char *op = (append) ? ">>" : ">"; if (stringEndsIn(fileName, ".bz2") || stringEndsIn(fileName, ".bz")) sprintf(buf, "%s %s \"%s\"", BZIP2, op, fileName); else if (stringEndsIn(fileName, ".Z")) sprintf(buf, "%s %s \"%s\"", COMPRESS, op, fileName); else sprintf(buf, "%s %s \"%s\"", ZIP, op, fileName); return openPipe(buf, "w"); } FILE *svd_writeFile(char *fileName, char append) { /* Special file name */ if (!strcmp(fileName, "-")) /* return stdout; */ svd_fatalError("library code is not allowed to write to STDOUT"); /* If it is a pipe */ if (fileName[0] == '|') return openPipe(fileName + 1, "w"); /* Check if ends in .gz, .Z, .bz, .bz2 */ if (stringEndsIn(fileName, ".gz") || stringEndsIn(fileName, ".Z") || stringEndsIn(fileName, ".bz") || stringEndsIn(fileName, ".bz2")) return writeZippedFile(fileName, append); return (append) ? fopen(fileName, "a") : fopen(fileName, "w"); } /* Could be a file or a stream. */ void svd_closeFile(FILE *file) { /* if (file == stdin || file == stdout) return; */ if (isPipe(file)) pclose(file); else fclose(file); } char svd_readBinInt(FILE *file, int *val) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) /* EDIT: POSIX functions not available on Windows (and not needed anyway) */ int x; if (fread(&x, sizeof(int), 1, file) == 1) { *val = ntohl(x); return FALSE; } return TRUE; #else error("binary I/O not available (not a POSIX platform)"); #endif } /* This reads a float in network order and converts to a real in host order. */ char svd_readBinFloat(FILE *file, float *val) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) /* EDIT: POSIX functions not available on Windows (and not needed anyway) */ int x; float y; if (fread(&x, sizeof(int), 1, file) == 1) { x = ntohl(x); y = *((float *) &x); *val = y; return FALSE; } return TRUE; #else error("binary I/O not available (not a POSIX platform)"); #endif } char svd_writeBinInt(FILE *file, int x) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) /* EDIT: POSIX functions not available on Windows (and not needed anyway) */ int y = htonl(x); if (fwrite(&y, sizeof(int), 1, file) != 1) return TRUE; return FALSE; #else error("binary I/O not available (not a POSIX platform)"); #endif } /* This takes a real in host order and writes a float in network order. */ char svd_writeBinFloat(FILE *file, float r) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) /* EDIT: POSIX functions not available on Windows (and not needed anyway) */ int y = htonl(*((int *) &r)); if (fwrite(&y, sizeof(int), 1, file) != 1) return TRUE; return FALSE; #else error("binary I/O not available (not a POSIX platform)"); #endif } /************************************************************** * returns |a| if b is positive; else fsign returns -|a| * **************************************************************/ double svd_fsign(double a, double b) { if ((a>=0.0 && b>=0.0) || (a<0.0 && b<0.0))return(a); else return -a; } /************************************************************** * returns the larger of two double precision numbers * **************************************************************/ double svd_dmax(double a, double b) { return (a > b) ? a : b; } /************************************************************** * returns the smaller of two double precision numbers * **************************************************************/ double svd_dmin(double a, double b) { return (a < b) ? a : b; } /************************************************************** * returns the larger of two integers * **************************************************************/ long svd_imax(long a, long b) { return (a > b) ? a : b; } /************************************************************** * returns the smaller of two integers * **************************************************************/ long svd_imin(long a, long b) { return (a < b) ? a : b; } /************************************************************** * Function scales a vector by a constant. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ void svd_dscal(long n, double da, double *dx, long incx) { long i; if (n <= 0 || incx == 0) return; if (incx < 0) dx += (-n+1) * incx; for (i=0; i < n; i++) { *dx *= da; dx += incx; } return; } /************************************************************** * function scales a vector by a constant. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ void svd_datx(long n, double da, double *dx, long incx, double *dy, long incy) { long i; if (n <= 0 || incx == 0 || incy == 0 || da == 0.0) return; if (incx == 1 && incy == 1) for (i=0; i < n; i++) *dy++ = da * (*dx++); else { if (incx < 0) dx += (-n+1) * incx; if (incy < 0) dy += (-n+1) * incy; for (i=0; i < n; i++) { *dy = da * (*dx); dx += incx; dy += incy; } } return; } /************************************************************** * Function copies a vector x to a vector y * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ void svd_dcopy(long n, double *dx, long incx, double *dy, long incy) { long i; if (n <= 0 || incx == 0 || incy == 0) return; if (incx == 1 && incy == 1) for (i=0; i < n; i++) *dy++ = *dx++; else { if (incx < 0) dx += (-n+1) * incx; if (incy < 0) dy += (-n+1) * incy; for (i=0; i < n; i++) { *dy = *dx; dx += incx; dy += incy; } } return; } /************************************************************** * Function forms the dot product of two vectors. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ double svd_ddot(long n, double *dx, long incx, double *dy, long incy) { long i; double dot_product; if (n <= 0 || incx == 0 || incy == 0) return(0.0); dot_product = 0.0; if (incx == 1 && incy == 1) for (i=0; i < n; i++) dot_product += (*dx++) * (*dy++); else { if (incx < 0) dx += (-n+1) * incx; if (incy < 0) dy += (-n+1) * incy; for (i=0; i < n; i++) { dot_product += (*dx) * (*dy); dx += incx; dy += incy; } } return(dot_product); } /************************************************************** * Constant times a vector plus a vector * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ void svd_daxpy (long n, double da, double *dx, long incx, double *dy, long incy) { long i; if (n <= 0 || incx == 0 || incy == 0 || da == 0.0) return; if (incx == 1 && incy == 1) for (i=0; i < n; i++) { *dy += da * (*dx++); dy++; } else { if (incx < 0) dx += (-n+1) * incx; if (incy < 0) dy += (-n+1) * incy; for (i=0; i < n; i++) { *dy += da * (*dx); dx += incx; dy += incy; } } return; } /********************************************************************* * Function sorts array1 and array2 into increasing order for array1 * *********************************************************************/ void svd_dsort2(long igap, long n, double *array1, double *array2) { double temp; long i, j, index; if (!igap) return; else { for (i = igap; i < n; i++) { j = i - igap; index = i; while (j >= 0 && array1[j] > array1[index]) { temp = array1[j]; array1[j] = array1[index]; array1[index] = temp; temp = array2[j]; array2[j] = array2[index]; array2[index] = temp; j -= igap; index = j + igap; } } } svd_dsort2(igap/2,n,array1,array2); } /************************************************************** * Function interchanges two vectors * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ void svd_dswap(long n, double *dx, long incx, double *dy, long incy) { long i; double dtemp; if (n <= 0 || incx == 0 || incy == 0) return; if (incx == 1 && incy == 1) { for (i=0; i < n; i++) { dtemp = *dy; *dy++ = *dx; *dx++ = dtemp; } } else { if (incx < 0) dx += (-n+1) * incx; if (incy < 0) dy += (-n+1) * incy; for (i=0; i < n; i++) { dtemp = *dy; *dy = *dx; *dx = dtemp; dx += incx; dy += incy; } } } /***************************************************************** * Function finds the index of element having max. absolute value* * based on FORTRAN 77 routine from Linpack by J. Dongarra * *****************************************************************/ long svd_idamax(long n, double *dx, long incx) { long ix,i,imax; double dtemp, dmax; if (n < 1) return(-1); if (n == 1) return(0); if (incx == 0) return(-1); if (incx < 0) ix = (-n+1) * incx; else ix = 0; imax = ix; dx += ix; dmax = fabs(*dx); for (i=1; i < n; i++) { ix += incx; dx += incx; dtemp = fabs(*dx); if (dtemp > dmax) { dmax = dtemp; imax = ix; } } return(imax); } /************************************************************** * multiplication of matrix B by vector x, where B = A'A, * * and A is nrow by ncol (nrow >> ncol). Hence, B is of order * * n = ncol (y stores product vector). * **************************************************************/ void svd_opb(SMat A, double *x, double *y, double *temp) { long i, j, end; long *pointr = A->pointr, *rowind = A->rowind; double *value = A->value; long n = A->cols; SVDCount[SVD_MXV] += 2; memset(y, 0, n * sizeof(double)); for (i = 0; i < A->rows; i++) temp[i] = 0.0; for (i = 0; i < A->cols; i++) { end = pointr[i+1]; for (j = pointr[i]; j < end; j++) temp[rowind[j]] += value[j] * (*x); x++; } for (i = 0; i < A->cols; i++) { end = pointr[i+1]; for (j = pointr[i]; j < end; j++) *y += value[j] * temp[rowind[j]]; y++; } return; } /*********************************************************** * multiplication of matrix A by vector x, where A is * * nrow by ncol (nrow >> ncol). y stores product vector. * ***********************************************************/ void svd_opa(SMat A, double *x, double *y) { long end, i, j; long *pointr = A->pointr, *rowind = A->rowind; double *value = A->value; SVDCount[SVD_MXV]++; memset(y, 0, A->rows * sizeof(double)); for (i = 0; i < A->cols; i++) { end = pointr[i+1]; for (j = pointr[i]; j < end; j++) y[rowind[j]] += value[j] * x[i]; } return; } /*********************************************************************** * * * random() * * (double precision) * ***********************************************************************/ /*********************************************************************** Description ----------- This is a translation of a Fortran-77 uniform random number generator. The code is based on theory and suggestions given in D. E. Knuth (1969), vol 2. The argument to the function should be initialized to an arbitrary integer prior to the first call to random. The calling program should not alter the value of the argument between subsequent calls to random. Random returns values within the interval (0,1). Arguments --------- (input) iy an integer seed whose value must not be altered by the caller between subsequent calls (output) random a double precision random number between (0,1) ***********************************************************************/ /* BUGFIX -- 14 July 2019 (Stefan Evert): * This random number generator was designed for signed integers with wrap-around on overflow * but applied to long ints, which are 64-bit on modern platforms. This resulted in very large * positive and negative values instead of the inteded random numbers in the range (0, 1). * Moreover, singed integer overflow is undefined behaviour in C, and wrap-around is not guaranteed. * The bugfix changes the RNG to unsigned long computation, using the full range mapped to (0, 1). */ double svd_random2(unsigned long *iy) { static unsigned long m2 = 0; static unsigned long ia, ic, mic; static double halfm, s; /* If first entry, compute (max unsigned long) / 2 = m2 = halfm */ if (!m2) { m2 = 1; /* make sure that shift below is performed on a long int */ m2 <<= (8 * sizeof(long) - 1); halfm = m2; /* compute multiplier and increment for linear congruential * method */ ia = 8 * (long)(halfm * atan(1.0) / 8.0) + 5; ic = 2 * (long)(halfm * (0.5 - sqrt(3.0)/6.0)) + 1; mic = (m2-ic) + m2; /* s is the scale factor for converting to floating point */ s = 0.5 / halfm; } /* compute next random number */ *iy = *iy * ia; /* for computers which do not allow integer overflow on addition */ /* if (*iy > mic) *iy = (*iy - m2) - m2; */ *iy = *iy + ic; /* for computers whose word length for addition is greater than * for multiplication */ /* if (*iy / 2 > m2) *iy = (*iy - m2) - m2; */ /* for computers whose integer overflow affects the sign bit */ /* if (*iy < 0) *iy = (*iy + m2) + m2; */ return((double)(*iy) * s); } /************************************************************** * * * Function finds sqrt(a^2 + b^2) without overflow or * * destructive underflow. * * * **************************************************************/ /************************************************************** Funtions used ------------- UTILITY dmax, dmin **************************************************************/ double svd_pythag(double a, double b) { double p, r, s, t, u, temp; p = svd_dmax(fabs(a), fabs(b)); if (p != 0.0) { temp = svd_dmin(fabs(a), fabs(b)) / p; r = temp * temp; t = 4.0 + r; while (t != 4.0) { s = r / t; u = 1.0 + 2.0 * s; p *= u; temp = s / u; r *= temp * temp; t = 4.0 + r; } } return(p); } sparsesvd/src/svdutil.h0000644000176200001440000001661013512711433014722 0ustar liggesusers/* Copyright © 2002, University of Tennessee Research Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef SVDUTIL_H #define SVDUTIL_H #include "svdlib.h" #define SAFE_FREE(a) {if (a) {free(a); a = NULL;}} /* Allocates an array of longs. */ extern long *svd_longArray(long size, char empty, char *name); /* Allocates an array of doubles. */ extern double *svd_doubleArray(long size, char empty, char *name); extern void svd_debug(char *fmt, ...); extern void svd_error(char *fmt, ...); extern void svd_fatalError(char *fmt, ...); extern FILE *svd_fatalReadFile(char *filename); extern FILE *svd_readFile(char *fileName); extern FILE *svd_writeFile(char *fileName, char append); extern void svd_closeFile(FILE *file); extern char svd_readBinInt(FILE *file, int *val); extern char svd_readBinFloat(FILE *file, float *val); extern char svd_writeBinInt(FILE *file, int x); extern char svd_writeBinFloat(FILE *file, float r); /************************************************************** * returns |a| if b is positive; else fsign returns -|a| * **************************************************************/ extern double svd_fsign(double a, double b); /************************************************************** * returns the larger of two double precision numbers * **************************************************************/ extern double svd_dmax(double a, double b); /************************************************************** * returns the smaller of two double precision numbers * **************************************************************/ extern double svd_dmin(double a, double b); /************************************************************** * returns the larger of two integers * **************************************************************/ extern long svd_imax(long a, long b); /************************************************************** * returns the smaller of two integers * **************************************************************/ extern long svd_imin(long a, long b); /************************************************************** * Function scales a vector by a constant. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern void svd_dscal(long n, double da, double *dx, long incx); /************************************************************** * function scales a vector by a constant. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern void svd_datx(long n, double da, double *dx, long incx, double *dy, long incy); /************************************************************** * Function copies a vector x to a vector y * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern void svd_dcopy(long n, double *dx, long incx, double *dy, long incy); /************************************************************** * Function forms the dot product of two vectors. * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern double svd_ddot(long n, double *dx, long incx, double *dy, long incy); /************************************************************** * Constant times a vector plus a vector * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern void svd_daxpy (long n, double da, double *dx, long incx, double *dy, long incy); /********************************************************************* * Function sorts array1 and array2 into increasing order for array1 * *********************************************************************/ extern void svd_dsort2(long igap, long n, double *array1, double *array2); /************************************************************** * Function interchanges two vectors * * Based on Fortran-77 routine from Linpack by J. Dongarra * **************************************************************/ extern void svd_dswap(long n, double *dx, long incx, double *dy, long incy); /***************************************************************** * Function finds the index of element having max. absolute value* * based on FORTRAN 77 routine from Linpack by J. Dongarra * *****************************************************************/ extern long svd_idamax(long n, double *dx, long incx); /************************************************************** * multiplication of matrix B by vector x, where B = A'A, * * and A is nrow by ncol (nrow >> ncol). Hence, B is of order * * n = ncol (y stores product vector). * **************************************************************/ extern void svd_opb(SMat A, double *x, double *y, double *temp); /*********************************************************** * multiplication of matrix A by vector x, where A is * * nrow by ncol (nrow >> ncol). y stores product vector. * ***********************************************************/ extern void svd_opa(SMat A, double *x, double *y); /*********************************************************************** * * * random2() * * (double precision) * ***********************************************************************/ extern double svd_random2(unsigned long *iy); /************************************************************** * * * Function finds sqrt(a^2 + b^2) without overflow or * * destructive underflow. * * * **************************************************************/ extern double svd_pythag(double a, double b); #endif /* SVDUTIL_H */ sparsesvd/src/las2.c0000644000176200001440000017451213512711544014075 0ustar liggesusers/* Copyright © 2002, University of Tennessee Research Foundation. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Tennessee nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include "svdlib.h" #include "svdutil.h" #include #include #define MAXLL 2 #define LMTNW 100000000 /* max. size of working area allowed */ enum storeVals {STORQ = 1, RETRQ, STORP, RETRP}; static char *error_msg[] = { /* error messages used by function * * check_parameters */ NULL, "", "ENDL MUST BE LESS THAN ENDR", "REQUESTED DIMENSIONS CANNOT EXCEED NUM ITERATIONS", "ONE OF YOUR DIMENSIONS IS LESS THAN OR EQUAL TO ZERO", "NUM ITERATIONS (NUMBER OF LANCZOS STEPS) IS INVALID", "REQUESTED DIMENSIONS (NUMBER OF EIGENPAIRS DESIRED) IS INVALID", "6*N+4*ITERATIONS+1 + ITERATIONS*ITERATIONS CANNOT EXCEED NW", "6*N+4*ITERATIONS+1 CANNOT EXCEED NW", NULL}; double **LanStore, *OPBTemp; double eps, eps1, reps, eps34; long ierr; /* double rnm, anorm, tol; FILE *fp_out1, *fp_out2; */ void purge(long n, long ll, double *r, double *q, double *ra, double *qa, double *wrk, double *eta, double *oldeta, long step, double *rnmp, double tol); void ortbnd(double *alf, double *eta, double *oldeta, double *bet, long step, double rnm); double startv(SMat A, double *wptr[], long step, long n); void store_vec(long, long, long, double *); void imtql2(long, long, double *, double *, double *); void imtqlb(long n, double d[], double e[], double bnd[]); void write_header(long, long, double, double, long, double, long, long, long); long check_parameters(SMat A, long dimensions, long iterations, double endl, double endr, long vectors); int lanso(SMat A, long iterations, long dimensions, double endl, double endr, double *ritz, double *bnd, double *wptr[], long *neigp, long n); long ritvec(long n, SMat A, SVDRec R, double kappa, double *ritz, double *bnd, double *alf, double *bet, double *w2, long steps, long neig); long lanczos_step(SMat A, long first, long last, double *wptr[], double *alf, double *eta, double *oldeta, double *bet, long *ll, long *enough, double *rnmp, double *tolp, long n); void stpone(SMat A, double *wrkptr[], double *rnmp, double *tolp, long n); long error_bound(long *, double, double, double *, double *, long step, double tol); void machar(long *ibeta, long *it, long *irnd, long *machep, long *negep); /*********************************************************************** * * * main() * * Sparse SVD(A) via Eigensystem of A'A symmetric Matrix * * (double precision) * * * ***********************************************************************/ /*********************************************************************** Description ----------- This sample program uses landr to compute singular triplets of A via the equivalent symmetric eigenvalue problem B x = lambda x, where x' = (u',v'), lambda = sigma**2, where sigma is a singular value of A, B = A'A , and A is m (nrow) by n (ncol) (nrow >> ncol), so that {u,sqrt(lambda),v} is a singular triplet of A. (A' = transpose of A) User supplied routines: svd_opa, opb, store_vec, timer svd_opa( x,y) takes an n-vector x and returns A*x in y. svd_opb(ncol,x,y) takes an n-vector x and returns B*x in y. Based on operation flag isw, store_vec(n,isw,j,s) stores/retrieves to/from storage a vector of length n in s. User should edit timer() with an appropriate call to an intrinsic timing routine that returns elapsed user time. External parameters ------------------- Defined and documented in las2.h Local parameters ---------------- (input) endl left end of interval containing unwanted eigenvalues of B endr right end of interval containing unwanted eigenvalues of B kappa relative accuracy of ritz values acceptable as eigenvalues of B vectors is not equal to 1 r work array n dimension of the eigenproblem for matrix B (ncol) dimensions upper limit of desired number of singular triplets of A iterations upper limit of desired number of Lanczos steps nnzero number of nonzeros in A vectors 1 indicates both singular values and singular vectors are wanted and they can be found in output file lav2; 0 indicates only singular values are wanted (output) ritz array of ritz values bnd array of error bounds d array of singular values memory total memory allocated in bytes to solve the B-eigenproblem Functions used -------------- BLAS svd_daxpy, svd_dscal, svd_ddot USER svd_opa, svd_opb, timer MISC write_header, check_parameters LAS2 landr Precision --------- All floating-point calculations are done in double precision; variables are declared as long and double. LAS2 development ---------------- LAS2 is a C translation of the Fortran-77 LAS2 from the SVDPACK library written by Michael W. Berry, University of Tennessee, Dept. of Computer Science, 107 Ayres Hall, Knoxville, TN, 37996-1301 31 Jan 1992: Date written Theresa H. Do University of Tennessee Dept. of Computer Science 107 Ayres Hall Knoxville, TN, 37996-1301 internet: tdo@cs.utk.edu ***********************************************************************/ /*********************************************************************** * * * check_parameters() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function validates input parameters and returns error code (long) Parameters ---------- (input) dimensions upper limit of desired number of eigenpairs of B iterations upper limit of desired number of lanczos steps n dimension of the eigenproblem for matrix B endl left end of interval containing unwanted eigenvalues of B endr right end of interval containing unwanted eigenvalues of B vectors 1 indicates both eigenvalues and eigenvectors are wanted and they can be found in lav2; 0 indicates eigenvalues only nnzero number of nonzero elements in input matrix (matrix A) ***********************************************************************/ long check_parameters(SMat A, long dimensions, long iterations, double endl, double endr, long vectors) { long error_index; error_index = 0; if (endl >/*=*/ endr) error_index = 2; else if (dimensions > iterations) error_index = 3; else if (A->cols <= 0 || A->rows <= 0) error_index = 4; /*else if (n > A->cols || n > A->rows) error_index = 1;*/ else if (iterations <= 0 || iterations > A->cols || iterations > A->rows) error_index = 5; else if (dimensions <= 0 || dimensions > iterations) error_index = 6; if (error_index) svd_error("svdLAS2 parameter error: %s\n", error_msg[error_index]); return(error_index); } /*********************************************************************** * * * write_header() * * Function writes out header of output file containing ritz values * * * ***********************************************************************/ void write_header(long iterations, long dimensions, double endl, double endr, long vectors, double kappa, long nrow, long ncol, long vals) { Rprintf("SOLVING THE [A^TA] EIGENPROBLEM\n"); Rprintf("NO. OF ROWS = %6ld\n", nrow); Rprintf("NO. OF COLUMNS = %6ld\n", ncol); Rprintf("NO. OF NON-ZERO VALUES = %6ld\n", vals); Rprintf("MATRIX DENSITY = %6.2f%%\n", ((float) vals / nrow) * 100 / ncol); /* Rprintf("ORDER OF MATRIX A = %5ld\n", n); */ Rprintf("MAX. NO. OF LANCZOS STEPS = %6ld\n", iterations); Rprintf("MAX. NO. OF EIGENPAIRS = %6ld\n", dimensions); Rprintf("LEFT END OF THE INTERVAL = %9.2E\n", endl); Rprintf("RIGHT END OF THE INTERVAL = %9.2E\n", endr); Rprintf("KAPPA = %9.2E\n", kappa); /* Rprintf("WANT S-VECTORS? [T/F] = %c\n", (vectors) ? 'T' : 'F'); */ Rprintf("\n"); return; } /*********************************************************************** * * * landr() * * Lanczos algorithm with selective orthogonalization * * Using Simon's Recurrence * * (double precision) * * * ***********************************************************************/ /*********************************************************************** Description ----------- landr() is the LAS2 driver routine that, upon entry, (1) checks for the validity of input parameters of the B-eigenproblem (2) determines several machine constants (3) makes a Lanczos run (4) calculates B-eigenvectors (singular vectors of A) if requested by user arguments --------- (input) n dimension of the eigenproblem for A'A iterations upper limit of desired number of Lanczos steps dimensions upper limit of desired number of eigenpairs nnzero number of nonzeros in matrix A endl left end of interval containing unwanted eigenvalues of B endr right end of interval containing unwanted eigenvalues of B vectors 1 indicates both eigenvalues and eigenvectors are wanted and they can be found in output file lav2; 0 indicates only eigenvalues are wanted kappa relative accuracy of ritz values acceptable as eigenvalues of B (singular values of A) r work array (output) j number of Lanczos steps actually taken neig number of ritz values stabilized ritz array to hold the ritz values bnd array to hold the error bounds External parameters ------------------- Defined and documented in las2.h local parameters ------------------- ibeta radix for the floating-point representation it number of base ibeta digits in the floating-point significand irnd floating-point addition rounded or chopped machep machine relative precision or round-off error negeps largest negative integer wptr array of pointers each pointing to a work space Functions used -------------- MISC svd_dmax, machar, check_parameters LAS2 ritvec, lanso ***********************************************************************/ SVDRec svdLAS2A(SMat A, long dimensions) { double end[2] = {-1.0e-30, 1.0e-30}; double kappa = 1e-6; if (!A) { svd_error("svdLAS2A called with NULL array\n"); return NULL; } return svdLAS2(A, dimensions, 0, end, kappa); } SVDRec svdLAS2(SMat A, long dimensions, long iterations, double end[2], double kappa) { char transpose = FALSE; long ibeta, it, irnd, machep, negep, n, i, steps, nsig, neig, m; double *wptr[10], *ritz, *bnd; SVDRec R = NULL; ierr = 0; // reset the global error flag svdResetCounters(); m = svd_imin(A->rows, A->cols); if (dimensions <= 0 || dimensions > m) dimensions = m; if (iterations <= 0 || iterations > m) iterations = m; if (iterations < dimensions) iterations = dimensions; /* Write output header */ if (SVDVerbosity > 0) write_header(iterations, dimensions, end[0], end[1], TRUE, kappa, A->rows, A->cols, A->vals); /* Check parameters */ if (check_parameters(A, dimensions, iterations, end[0], end[1], TRUE)) return NULL; /* If A is wide, the SVD is computed on its transpose for speed. */ if (A->cols >= A->rows * 1.2) { if (SVDVerbosity > 0) Rprintf("TRANSPOSING THE MATRIX FOR SPEED\n"); transpose = TRUE; A = svdTransposeS(A); } n = A->cols; /* Compute machine precision */ machar(&ibeta, &it, &irnd, &machep, &negep); eps1 = eps * sqrt((double) n); reps = sqrt(eps); eps34 = reps * sqrt(reps); /* Allocate temporary space. */ if (!(wptr[0] = svd_doubleArray(n, TRUE, "las2: wptr[0]"))) goto abort; if (!(wptr[1] = svd_doubleArray(n, FALSE, "las2: wptr[1]"))) goto abort; if (!(wptr[2] = svd_doubleArray(n, FALSE, "las2: wptr[2]"))) goto abort; if (!(wptr[3] = svd_doubleArray(n, FALSE, "las2: wptr[3]"))) goto abort; if (!(wptr[4] = svd_doubleArray(n, FALSE, "las2: wptr[4]"))) goto abort; if (!(wptr[5] = svd_doubleArray(n, FALSE, "las2: wptr[5]"))) goto abort; if (!(wptr[6] = svd_doubleArray(iterations, FALSE, "las2: wptr[6]"))) goto abort; if (!(wptr[7] = svd_doubleArray(iterations, FALSE, "las2: wptr[7]"))) goto abort; if (!(wptr[8] = svd_doubleArray(iterations, FALSE, "las2: wptr[8]"))) goto abort; if (!(wptr[9] = svd_doubleArray(iterations + 1, FALSE, "las2: wptr[9]"))) goto abort; /* Calloc may be unnecessary: */ if (!(ritz = svd_doubleArray(iterations + 1, TRUE, "las2: ritz"))) goto abort; /* Calloc may be unnecessary: */ if (!(bnd = svd_doubleArray(iterations + 1, TRUE, "las2: bnd"))) goto abort; memset(bnd, 127, (iterations + 1) * sizeof(double)); if (!(LanStore = (double **) calloc(iterations + MAXLL, sizeof(double *)))) goto abort; if (!(OPBTemp = svd_doubleArray(A->rows, FALSE, "las2: OPBTemp"))) goto abort; /* Actually run the lanczos thing: */ steps = lanso(A, iterations, dimensions, end[0], end[1], ritz, bnd, wptr, &neig, n); /* Print some stuff. */ if (SVDVerbosity > 0) { Rprintf("NUMBER OF LANCZOS STEPS = %6ld\n" "RITZ VALUES STABILIZED = %6ld\n", steps + 1, neig); } if (SVDVerbosity > 2) { Rprintf("\nCOMPUTED RITZ VALUES (ERROR BNDS)\n"); for (i = 0; i <= steps; i++) Rprintf("%3ld %22.14E (%11.2E)\n", i + 1, ritz[i], bnd[i]); } SAFE_FREE(wptr[0]); SAFE_FREE(wptr[1]); SAFE_FREE(wptr[2]); SAFE_FREE(wptr[3]); SAFE_FREE(wptr[4]); SAFE_FREE(wptr[7]); SAFE_FREE(wptr[8]); /* Compute eigenvectors */ kappa = svd_dmax(fabs(kappa), eps34); R = svdNewSVDRec(); if (!R) { svd_error("svdLAS2: allocation of R failed"); goto cleanup; } R->d = dimensions; /* svd_imin(neig, dimensions); would seem to make more sense, but ritvec() expects to have a sufficiently large buffer that is resized/rotated in the function */ R->Ut = svdNewDMat(R->d, A->rows); R->S = svd_doubleArray(R->d, TRUE, "las2: R->s"); R->Vt = svdNewDMat(R->d, A->cols); if (!R->Ut || !R->S || !R->Vt) { svd_error("svdLAS2: allocation of R failed"); goto cleanup; } nsig = ritvec(n, A, R, kappa, ritz, bnd, wptr[6], wptr[9], wptr[5], steps, neig); if (SVDVerbosity > 1) { Rprintf("\nSINGULAR VALUES: "); svdWriteDenseArray(R->S, R->d, "-", FALSE); if (SVDVerbosity > 2) { Rprintf("\nLEFT SINGULAR VECTORS (transpose of U): "); svdWriteDenseMatrix(R->Ut, "-", SVD_F_DT); Rprintf("\nRIGHT SINGULAR VECTORS (transpose of V): "); svdWriteDenseMatrix(R->Vt, "-", SVD_F_DT); } } if (SVDVerbosity > 0) { Rprintf("SINGULAR VALUES FOUND = %6d\n" "SIGNIFICANT VALUES = %6ld\n", R->d, nsig); } cleanup: for (i = 0; i <= 9; i++) SAFE_FREE(wptr[i]); SAFE_FREE(ritz); SAFE_FREE(bnd); if (LanStore) { for (i = 0; i < iterations + MAXLL; i++) SAFE_FREE(LanStore[i]); SAFE_FREE(LanStore); } SAFE_FREE(OPBTemp); /* This swaps and transposes the singular matrices if A was transposed. */ if (R && transpose) { DMat T; svdFreeSMat(A); T = R->Ut; R->Ut = R->Vt; R->Vt = T; } return R; abort: svd_error("svdLAS2: fatal error, aborting"); return NULL; } /*********************************************************************** * * * ritvec() * * Function computes the singular vectors of matrix A * * * ***********************************************************************/ /*********************************************************************** Description ----------- This function is invoked by landr() only if eigenvectors of the A'A eigenproblem are desired. When called, ritvec() computes the singular vectors of A and writes the result to an unformatted file. Parameters ---------- (input) nrow number of rows of A steps number of Lanczos iterations performed fp_out2 pointer to unformatted output file n dimension of matrix A kappa relative accuracy of ritz values acceptable as eigenvalues of A'A ritz array of ritz values bnd array of error bounds alf array of diagonal elements of the tridiagonal matrix T bet array of off-diagonal elements of T w1, w2 work space (output) xv1 array of eigenvectors of A'A (right singular vectors of A) ierr error code 0 for normal return from imtql2() k if convergence did not occur for k-th eigenvalue in imtql2() nsig number of accepted ritz values based on kappa (local) s work array which is initialized to the identity matrix of order (j + 1) upon calling imtql2(). After the call, s contains the orthonormal eigenvectors of the symmetric tridiagonal matrix T Functions used -------------- BLAS svd_dscal, svd_dcopy, svd_daxpy USER store_vec imtql2 ***********************************************************************/ void rotateArray(double *a, int size, int x) { int i, j, n, start; double t1, t2; if (x == 0) return; j = start = 0; t1 = a[0]; for (i = 0; i < size; i++) { n = (j >= x) ? j - x : j + size - x; t2 = a[n]; a[n] = t1; t1 = t2; j = n; if (j == start) { start = ++j; t1 = a[j]; } } } long ritvec(long n, SMat A, SVDRec R, double kappa, double *ritz, double *bnd, double *alf, double *bet, double *w2, long steps, long neig) { long js, jsq, i, k, /*size,*/ id2, tmp, nsig, x; double *s, *xv2, tmp0, tmp1, xnorm, *w1 = R->Vt->value[0]; js = steps + 1; jsq = js * js; /*size = sizeof(double) * n;*/ s = svd_doubleArray(jsq, TRUE, "ritvec: s"); xv2 = svd_doubleArray(n, FALSE, "ritvec: xv2"); /* initialize s to an identity matrix */ for (i = 0; i < jsq; i+= (js+1)) s[i] = 1.0; svd_dcopy(js, alf, 1, w1, -1); svd_dcopy(steps, &bet[1], 1, &w2[1], -1); /* on return from imtql2(), w1 contains eigenvalues in ascending * order and s contains the corresponding eigenvectors */ imtql2(js, js, w1, w2, s); /*fwrite((char *)&n, sizeof(n), 1, fp_out2); fwrite((char *)&js, sizeof(js), 1, fp_out2); fwrite((char *)&kappa, sizeof(kappa), 1, fp_out2);*/ /*id = 0;*/ nsig = 0; if (ierr) { R->d = 0; } else { x = 0; id2 = jsq - js; for (k = 0; k < js; k++) { tmp = id2; if (bnd[k] <= kappa * fabs(ritz[k]) && k > js-neig-1) { if (--x < 0) x = R->d - 1; w1 = R->Vt->value[x]; for (i = 0; i < n; i++) w1[i] = 0.0; for (i = 0; i < js; i++) { store_vec(n, RETRQ, i, w2); svd_daxpy(n, s[tmp], w2, 1, w1, 1); tmp -= js; } /*fwrite((char *)w1, size, 1, fp_out2);*/ /* store the w1 vector row-wise in array xv1; * size of xv1 is (steps+1) * (nrow+ncol) elements * and each vector, even though only ncol long, * will have (nrow+ncol) elements in xv1. * It is as if xv1 is a 2-d array (steps+1) by * (nrow+ncol) and each vector occupies a row */ /* j is the index in the R arrays, which are sorted by high to low singular values. */ /*for (i = 0; i < n; i++) R->Vt->value[x]xv1[id++] = w1[i];*/ /*id += nrow;*/ nsig++; } id2++; } SAFE_FREE(s); /* Rotate the singular vectors and values. */ /* x is now the location of the highest singular value. */ rotateArray(R->Vt->value[0], R->Vt->rows * R->Vt->cols, x * R->Vt->cols); R->d = svd_imin(R->d, nsig); for (x = 0; x < R->d; x++) { /* multiply by matrix B first */ svd_opb(A, R->Vt->value[x], xv2, OPBTemp); tmp0 = svd_ddot(n, R->Vt->value[x], 1, xv2, 1); svd_daxpy(n, -tmp0, R->Vt->value[x], 1, xv2, 1); tmp0 = sqrt(tmp0); xnorm = sqrt(svd_ddot(n, xv2, 1, xv2, 1)); /* multiply by matrix A to get (scaled) left s-vector */ svd_opa(A, R->Vt->value[x], R->Ut->value[x]); tmp1 = 1.0 / tmp0; svd_dscal(A->rows, tmp1, R->Ut->value[x], 1); xnorm *= tmp1; bnd[i] = xnorm; R->S[x] = tmp0; } } SAFE_FREE(s); SAFE_FREE(xv2); return nsig; } /*********************************************************************** * * * lanso() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function determines when the restart of the Lanczos algorithm should occur and when it should terminate. Arguments --------- (input) n dimension of the eigenproblem for matrix B iterations upper limit of desired number of lanczos steps dimensions upper limit of desired number of eigenpairs endl left end of interval containing unwanted eigenvalues endr right end of interval containing unwanted eigenvalues ritz array to hold the ritz values bnd array to hold the error bounds wptr array of pointers that point to work space: wptr[0]-wptr[5] six vectors of length n wptr[6] array to hold diagonal of the tridiagonal matrix T wptr[9] array to hold off-diagonal of T wptr[7] orthogonality estimate of Lanczos vectors at step j wptr[8] orthogonality estimate of Lanczos vectors at step j-1 (output) j number of Lanczos steps actually taken neig number of ritz values stabilized ritz array to hold the ritz values bnd array to hold the error bounds ierr (globally declared) error flag ierr = 8192 if stpone() fails to find a starting vector ierr = k if convergence did not occur for k-th eigenvalue in imtqlb() ierr = 0 otherwise Functions used -------------- LAS stpone, error_bound, lanczos_step MISC svd_dsort2 UTILITY svd_imin, svd_imax ***********************************************************************/ int lanso(SMat A, long iterations, long dimensions, double endl, double endr, double *ritz, double *bnd, double *wptr[], long *neigp, long n) { double *alf, *eta, *oldeta, *bet, *wrk, rnm, tol; long ll, first, last, ENOUGH, id2, id3, i, l, neig, j = 0, intro = 0; alf = wptr[6]; eta = wptr[7]; oldeta = wptr[8]; bet = wptr[9]; wrk = wptr[5]; /* take the first step */ stpone(A, wptr, &rnm, &tol, n); /* BUGFIX -- 14 July 2019 (Stefan Evert): * rnm == 0.0 is valid if the starting vector hits an invariant subspace, * so we must only abort if an error is signaled. */ if (/* !rnm || */ ierr) return 0; eta[0] = eps1; oldeta[0] = eps1; ll = 0; first = 1; last = svd_imin(dimensions + svd_imax(8, dimensions), iterations); ENOUGH = FALSE; /*id1 = 0;*/ while (/*id1 < dimensions && */!ENOUGH) { if (rnm <= tol) rnm = 0.0; /* the actual lanczos loop */ j = lanczos_step(A, first, last, wptr, alf, eta, oldeta, bet, &ll, &ENOUGH, &rnm, &tol, n); if (ENOUGH) j = j - 1; else j = last - 1; first = j + 1; bet[j+1] = rnm; /* analyze T */ l = 0; /* BUGFIX -- 14 July 2019 (Stefan Evert): * There is a one-off bug for j in the loop below, presumably due to the transition * from 1-based indexing in Fortran to 0-based indexing in C, which has been implemented * in an inconsistent way. Allowing the loop to run for one extra iteration seems to * solve the issue. */ /* for (id2 = 0; id2 < j; id2++) { */ for (id2 = 0; id2 <= j; id2++) { if (l > j) break; for (i = l; i <= j; i++) if (!bet[i+1]) break; if (i > j) i = j; /* now i is at the end of an unreduced submatrix */ svd_dcopy(i-l+1, &alf[l], 1, &ritz[l], -1); svd_dcopy(i-l, &bet[l+1], 1, &wrk[l+1], -1); imtqlb(i-l+1, &ritz[l], &wrk[l], &bnd[l]); if (ierr) { svd_error("svdLAS2: imtqlb failed to converge (ierr = %ld)\n", ierr); svd_error(" l = %ld i = %ld\n", l, i); for (id3 = l; id3 <= i; id3++) svd_error(" %ld %lg %lg %lg\n", id3, ritz[id3], wrk[id3], bnd[id3]); } for (id3 = l; id3 <= i; id3++) bnd[id3] = rnm * fabs(bnd[id3]); l = i + 1; } /* sort eigenvalues into increasing order */ svd_dsort2((j+1) / 2, j + 1, ritz, bnd); /* for (i = 0; i < iterations; i++) Rprintf("%f ", ritz[i]); Rprintf("\n"); */ /* massage error bounds for very close ritz values */ neig = error_bound(&ENOUGH, endl, endr, ritz, bnd, j, tol); *neigp = neig; /* should we stop? */ if (neig < dimensions) { if (!neig) { last = first + 9; intro = first; } else last = first + svd_imax(3, 1 + ((j - intro) * (dimensions-neig)) / neig); last = svd_imin(last, iterations); } else ENOUGH = TRUE; ENOUGH = ENOUGH || first >= iterations; /* id1++; */ /* Rprintf("id1=%d dimen=%d first=%d\n", id1, dimensions, first); */ } store_vec(n, STORQ, j, wptr[1]); return j; } /*********************************************************************** * * * lanczos_step() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function embodies a single Lanczos step Arguments --------- (input) n dimension of the eigenproblem for matrix B first start of index through loop last end of index through loop wptr array of pointers pointing to work space alf array to hold diagonal of the tridiagonal matrix T eta orthogonality estimate of Lanczos vectors at step j oldeta orthogonality estimate of Lanczos vectors at step j-1 bet array to hold off-diagonal of T ll number of intitial Lanczos vectors in local orthog. (has value of 0, 1 or 2) enough stop flag Functions used -------------- BLAS svd_ddot, svd_dscal, svd_daxpy, svd_datx, svd_dcopy USER store_vec LAS purge, ortbnd, startv UTILITY svd_imin, svd_imax ***********************************************************************/ long lanczos_step(SMat A, long first, long last, double *wptr[], double *alf, double *eta, double *oldeta, double *bet, long *ll, long *enough, double *rnmp, double *tolp, long n) { double t, *mid, rnm = *rnmp, tol = *tolp, anorm; long i, j; for (j=first; j 4.0 * fabs(alf[j]))) *ll = j; for (i=0; i < svd_imin(*ll, j-1); i++) { store_vec(n, RETRP, i, wptr[5]); t = svd_ddot(n, wptr[5], 1, wptr[0], 1); store_vec(n, RETRQ, i, wptr[5]); svd_daxpy(n, -t, wptr[5], 1, wptr[0], 1); eta[i] = eps1; oldeta[i] = eps1; } /* extended local reorthogonalization */ t = svd_ddot(n, wptr[0], 1, wptr[4], 1); svd_daxpy(n, -t, wptr[2], 1, wptr[0], 1); if (bet[j] > 0.0) bet[j] = bet[j] + t; t = svd_ddot(n, wptr[0], 1, wptr[3], 1); svd_daxpy(n, -t, wptr[1], 1, wptr[0], 1); alf[j] = alf[j] + t; svd_dcopy(n, wptr[0], 1, wptr[4], 1); rnm = sqrt(svd_ddot(n, wptr[0], 1, wptr[4], 1)); anorm = bet[j] + fabs(alf[j]) + rnm; tol = reps * anorm; /* update the orthogonality bounds */ ortbnd(alf, eta, oldeta, bet, j, rnm); /* restore the orthogonality state when needed */ purge(n, *ll, wptr[0], wptr[1], wptr[4], wptr[3], wptr[5], eta, oldeta, j, &rnm, tol); if (rnm <= tol) rnm = 0.0; } *rnmp = rnm; *tolp = tol; return j; } /*********************************************************************** * * * ortbnd() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Funtion updates the eta recurrence Arguments --------- (input) alf array to hold diagonal of the tridiagonal matrix T eta orthogonality estimate of Lanczos vectors at step j oldeta orthogonality estimate of Lanczos vectors at step j-1 bet array to hold off-diagonal of T n dimension of the eigenproblem for matrix B j dimension of T rnm norm of the next residual vector eps1 roundoff estimate for dot product of two unit vectors (output) eta orthogonality estimate of Lanczos vectors at step j+1 oldeta orthogonality estimate of Lanczos vectors at step j Functions used -------------- BLAS svd_dswap ***********************************************************************/ void ortbnd(double *alf, double *eta, double *oldeta, double *bet, long step, double rnm) { long i; if (step < 1) return; if (rnm) { if (step > 1) { oldeta[0] = (bet[1] * eta[1] + (alf[0]-alf[step]) * eta[0] - bet[step] * oldeta[0]) / rnm + eps1; } for (i=1; i<=step-2; i++) oldeta[i] = (bet[i+1] * eta[i+1] + (alf[i]-alf[step]) * eta[i] + bet[i] * eta[i-1] - bet[step] * oldeta[i])/rnm + eps1; } oldeta[step-1] = eps1; svd_dswap(step, oldeta, 1, eta, 1); eta[step] = eps1; return; } /*********************************************************************** * * * purge() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function examines the state of orthogonality between the new Lanczos vector and the previous ones to decide whether re-orthogonalization should be performed Arguments --------- (input) n dimension of the eigenproblem for matrix B ll number of intitial Lanczos vectors in local orthog. r residual vector to become next Lanczos vector q current Lanczos vector ra previous Lanczos vector qa previous Lanczos vector wrk temporary vector to hold the previous Lanczos vector eta state of orthogonality between r and prev. Lanczos vectors oldeta state of orthogonality between q and prev. Lanczos vectors j current Lanczos step (output) r residual vector orthogonalized against previous Lanczos vectors q current Lanczos vector orthogonalized against previous ones Functions used -------------- BLAS svd_daxpy, svd_dcopy, svd_idamax, svd_ddot USER store_vec ***********************************************************************/ void purge(long n, long ll, double *r, double *q, double *ra, double *qa, double *wrk, double *eta, double *oldeta, long step, double *rnmp, double tol) { double t, tq, tr, reps1, rnm = *rnmp; long k, iteration, flag, i; if (step < ll+2) return; k = svd_idamax(step - (ll+1), &eta[ll], 1) + ll; /* BUGFIX -- 14 July 2019 (Stefan Evert): * Orthogonality seems to be lost much faster than the estimates in eta[] suggest, * therefore force re-orthogonalization after each Lanczos step. Despite quadratic * complexity in the number of Lanczos vectors, this shouldn't cost too much performance * unless the vectors are stored in external memory (cf. original mainframe implementation). */ /* if (fabs(eta[k]) > reps) { */ if (TRUE) { reps1 = eps1 / reps; iteration = 0; flag = TRUE; while (iteration < 2 && flag) { if (rnm > tol) { /* bring in a lanczos vector t and orthogonalize both * r and q against it */ tq = 0.0; tr = 0.0; for (i = ll; i < step; i++) { store_vec(n, RETRQ, i, wrk); t = -svd_ddot(n, qa, 1, wrk, 1); tq += fabs(t); svd_daxpy(n, t, wrk, 1, q, 1); t = -svd_ddot(n, ra, 1, wrk, 1); tr += fabs(t); svd_daxpy(n, t, wrk, 1, r, 1); } svd_dcopy(n, q, 1, qa, 1); t = -svd_ddot(n, r, 1, qa, 1); tr += fabs(t); svd_daxpy(n, t, q, 1, r, 1); svd_dcopy(n, r, 1, ra, 1); rnm = sqrt(svd_ddot(n, ra, 1, r, 1)); if (tq <= reps1 && tr <= reps1 * rnm) flag = FALSE; } iteration++; } for (i = ll; i <= step; i++) { eta[i] = eps1; oldeta[i] = eps1; } } *rnmp = rnm; return; } /*********************************************************************** * * * stpone() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function performs the first step of the Lanczos algorithm. It also does a step of extended local re-orthogonalization. Arguments --------- (input) n dimension of the eigenproblem for matrix B (output) ierr error flag wptr array of pointers that point to work space that contains wptr[0] r[j] wptr[1] q[j] wptr[2] q[j-1] wptr[3] p wptr[4] p[j-1] wptr[6] diagonal elements of matrix T Functions used -------------- BLAS svd_daxpy, svd_datx, svd_dcopy, svd_ddot, svd_dscal USER store_vec, opb LAS startv ***********************************************************************/ void stpone(SMat A, double *wrkptr[], double *rnmp, double *tolp, long n) { double t, *alf, rnm, anorm; alf = wrkptr[6]; /* get initial vector; default is random */ rnm = startv(A, wrkptr, 0, n); if (rnm == 0.0 || ierr != 0) return; /* normalize starting vector */ t = 1.0 / rnm; svd_datx(n, t, wrkptr[0], 1, wrkptr[1], 1); svd_dscal(n, t, wrkptr[3], 1); /* take the first step */ svd_opb(A, wrkptr[3], wrkptr[0], OPBTemp); alf[0] = svd_ddot(n, wrkptr[0], 1, wrkptr[3], 1); svd_daxpy(n, -alf[0], wrkptr[1], 1, wrkptr[0], 1); t = svd_ddot(n, wrkptr[0], 1, wrkptr[3], 1); svd_daxpy(n, -t, wrkptr[1], 1, wrkptr[0], 1); alf[0] += t; svd_dcopy(n, wrkptr[0], 1, wrkptr[4], 1); rnm = sqrt(svd_ddot(n, wrkptr[0], 1, wrkptr[4], 1)); anorm = rnm + fabs(alf[0]); *rnmp = rnm; *tolp = reps * anorm; return; } /*********************************************************************** * * * startv() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function delivers a starting vector in r and returns |r|; it returns zero if the range is spanned, and ierr is non-zero if no starting vector within range of operator can be found. Parameters --------- (input) n dimension of the eigenproblem matrix B wptr array of pointers that point to work space j starting index for a Lanczos run eps machine epsilon (relative precision) (output) wptr array of pointers that point to work space that contains r[j], q[j], q[j-1], p[j], p[j-1] ierr error flag (nonzero if no starting vector can be found) Functions used -------------- BLAS svd_ddot, svd_dcopy, svd_daxpy USER svd_opb, store_vec MISC random ***********************************************************************/ double startv(SMat A, double *wptr[], long step, long n) { double rnm2, *r, t; unsigned long irand; long id, i; /* BUGFIX -- 14 July 2019 (Stefan Evert): * This function is designed to either take a user-supplied starting vector in wptr[0] * (only in the first Lanczos step and if wptr[0] != 0) or to generate a random vector * and orthogonalize it wrt. previous Lanczos vectors. If the vector isn't usable, * several up to 2 additional random vectors are tried (in the loop). * * The original code fails to re-try if the start vector doesn't have a sufficiently * large orthogonal component to the previous Lanczos vectors. It will then incorrectly * assume that all non-zero eigenvalues have been found. * * The bug fix is to include the orthogonalization in the retry loop (with ), * making sure to also re-try the initial random generation separately. If this first * phase fails, an error is signalled and the algorithm is terminated immediately; if * only the orthogonalization fails, we have found all non-zero eigenvalues. * We also allow more re-tries (up to 5) to be on the safe side. * * It would be desirable to replace the customRNG svd_random2(), which generates very * large values and is more likely to trigger boundary cases, with R's built-in Gaussian RNG. * However, this means that repeated runs of the same SVD might give different results * (signs, and rotations for singular values with mutliplicity > 1), whereas the built-in * svd_random2() is fully deterministic. */ /* get initial vector; default is random */ rnm2 = svd_ddot(n, wptr[0], 1, wptr[0], 1); irand = 918273 + step; r = wptr[0]; id = 0; while (id < 5) { while (id < 5) { if (id > 0 || step > 0 || rnm2 == 0) { /* -- switch to this code when we know how to deal with non-determinism GetRNGstate(); for (i = 0; i < n; i++) r[i] = norm_rand(); PutRNGstate(); */ for (i = 0; i < n; i++) r[i] = svd_random2(&irand); } svd_dcopy(n, wptr[0], 1, wptr[3], 1); /* apply operator to put r in range (essential if m singular) */ svd_opb(A, wptr[3], wptr[0], OPBTemp); svd_dcopy(n, wptr[0], 1, wptr[3], 1); rnm2 = svd_ddot(n, wptr[0], 1, wptr[3], 1); if (rnm2 >= eps) break; /* otherwise try another random start vector */ id++; } /* fatal error: this means that the matrix A is close to zero */ if (rnm2 < eps) { ierr = 8192; return(-1); } /* if this isn't the first Lanczos step, orthogonalize wrt. previous vectors */ if (step > 0) { for (i = 0; i < step; i++) { store_vec(n, RETRQ, i, wptr[5]); t = -svd_ddot(n, wptr[3], 1, wptr[5], 1); svd_daxpy(n, t, wptr[5], 1, wptr[0], 1); } /* make sure q[step] is orthogonal to q[step-1] */ t = svd_ddot(n, wptr[4], 1, wptr[0], 1); svd_daxpy(n, -t, wptr[2], 1, wptr[0], 1); svd_dcopy(n, wptr[0], 1, wptr[3], 1); t = svd_ddot(n, wptr[3], 1, wptr[0], 1); if (t <= eps * rnm2) t = 0.0; rnm2 = t; } if (rnm2 > 0.0) break; /* this means we have found a suitable starting vector */ id++; } return(sqrt(rnm2)); } /* this is the buggy original version, which is no longer used */ double startv_orig(SMat A, double *wptr[], long step, long n) { double rnm2, *r, t; unsigned long irand; /* added unsigned to avoid spurious warning with modified svd_random2() */ long id, i; /* get initial vector; default is random */ rnm2 = svd_ddot(n, wptr[0], 1, wptr[0], 1); irand = 918273 + step; r = wptr[0]; for (id = 0; id < 5; id++) { if (id > 0 || step > 0 || rnm2 == 0) for (i = 0; i < n; i++) r[i] = svd_random2(&irand); svd_dcopy(n, wptr[0], 1, wptr[3], 1); /* apply operator to put r in range (essential if m singular) */ svd_opb(A, wptr[3], wptr[0], OPBTemp); svd_dcopy(n, wptr[0], 1, wptr[3], 1); rnm2 = svd_ddot(n, wptr[0], 1, wptr[3], 1); if (rnm2 > eps) break; /* try another random vector */ } /* fatal error */ if (rnm2 <= 0.0) { ierr = 8192; return(-1); } if (step > 0) { for (i = 0; i < step; i++) { store_vec(n, RETRQ, i, wptr[5]); t = -svd_ddot(n, wptr[3], 1, wptr[5], 1); svd_daxpy(n, t, wptr[5], 1, wptr[0], 1); } /* make sure q[step] is orthogonal to q[step-1] */ t = svd_ddot(n, wptr[4], 1, wptr[0], 1); svd_daxpy(n, -t, wptr[2], 1, wptr[0], 1); svd_dcopy(n, wptr[0], 1, wptr[3], 1); t = svd_ddot(n, wptr[3], 1, wptr[0], 1); if (t <= eps * rnm2) t = 0.0; rnm2 = t; } return(sqrt(rnm2)); } /*********************************************************************** * * * error_bound() * * * ***********************************************************************/ /*********************************************************************** Description ----------- Function massages error bounds for very close ritz values by placing a gap between them. The error bounds are then refined to reflect this. Arguments --------- (input) endl left end of interval containing unwanted eigenvalues endr right end of interval containing unwanted eigenvalues ritz array to store the ritz values bnd array to store the error bounds enough stop flag Functions used -------------- BLAS svd_idamax UTILITY svd_dmin ***********************************************************************/ long error_bound(long *enough, double endl, double endr, double *ritz, double *bnd, long step, double tol) { long mid, i, neig; double gapl, gap; /* massage error bounds for very close ritz values */ mid = svd_idamax(step + 1, bnd, 1); for (i=((step+1) + (step-1)) / 2; i >= mid + 1; i -= 1) if (fabs(ritz[i-1] - ritz[i]) < eps34 * fabs(ritz[i])) if (bnd[i] > tol && bnd[i-1] > tol) { bnd[i-1] = sqrt(bnd[i] * bnd[i] + bnd[i-1] * bnd[i-1]); bnd[i] = 0.0; } for (i=((step+1) - (step-1)) / 2; i <= mid - 1; i +=1 ) if (fabs(ritz[i+1] - ritz[i]) < eps34 * fabs(ritz[i])) if (bnd[i] > tol && bnd[i+1] > tol) { bnd[i+1] = sqrt(bnd[i] * bnd[i] + bnd[i+1] * bnd[i+1]); bnd[i] = 0.0; } /* refine the error bounds */ neig = 0; gapl = ritz[step] - ritz[0]; for (i = 0; i <= step; i++) { gap = gapl; if (i < step) gapl = ritz[i+1] - ritz[i]; gap = svd_dmin(gap, gapl); if (gap > bnd[i]) bnd[i] = bnd[i] * (bnd[i] / gap); if (bnd[i] <= 16.0 * eps * fabs(ritz[i])) { neig++; if (!*enough) *enough = endl < ritz[i] && ritz[i] < endr; } } return neig; } /*********************************************************************** * * * imtqlb() * * * ***********************************************************************/ /*********************************************************************** Description ----------- imtqlb() is a translation of a Fortran version of the Algol procedure IMTQL1, Num. Math. 12, 377-383(1968) by Martin and Wilkinson, as modified in Num. Math. 15, 450(1970) by Dubrulle. Handbook for Auto. Comp., vol.II-Linear Algebra, 241-248(1971). See also B. T. Smith et al, Eispack Guide, Lecture Notes in Computer Science, Springer-Verlag, (1976). The function finds the eigenvalues of a symmetric tridiagonal matrix by the implicit QL method. Arguments --------- (input) n order of the symmetric tridiagonal matrix d contains the diagonal elements of the input matrix e contains the subdiagonal elements of the input matrix in its last n-1 positions. e[0] is arbitrary (output) d contains the eigenvalues in ascending order. if an error exit is made, the eigenvalues are correct and ordered for indices 0,1,...ierr, but may not be the smallest eigenvalues. e has been destroyed. ierr set to zero for normal return, j if the j-th eigenvalue has not been determined after 30 iterations. Functions used -------------- UTILITY svd_fsign MISC svd_pythag ***********************************************************************/ void imtqlb(long n, double d[], double e[], double bnd[]) { long last, l, m, i, iteration; /* various flags */ long exchange, convergence, underflow; double b, test, g, r, s, c, p, f; if (n == 1) return; ierr = 0; bnd[0] = 1.0; last = n - 1; for (i = 1; i < n; i++) { bnd[i] = 0.0; e[i-1] = e[i]; } e[last] = 0.0; for (l = 0; l < n; l++) { iteration = 0; while (iteration <= 30) { for (m = l; m < n; m++) { convergence = FALSE; if (m == last) break; else { test = fabs(d[m]) + fabs(d[m+1]); if (test + fabs(e[m]) == test) convergence = TRUE; } if (convergence) break; } p = d[l]; f = bnd[l]; if (m != l) { if (iteration == 30) { ierr = l; return; } iteration += 1; /*........ form shift ........*/ g = (d[l+1] - p) / (2.0 * e[l]); r = svd_pythag(g, 1.0); g = d[m] - p + e[l] / (g + svd_fsign(r, g)); s = 1.0; c = 1.0; p = 0.0; underflow = FALSE; i = m - 1; while (underflow == FALSE && i >= l) { f = s * e[i]; b = c * e[i]; r = svd_pythag(f, g); e[i+1] = r; if (r == 0.0) underflow = TRUE; else { s = f / r; c = g / r; g = d[i+1] - p; r = (d[i] - g) * s + 2.0 * c * b; p = s * r; d[i+1] = g + p; g = c * r - b; f = bnd[i+1]; bnd[i+1] = s * bnd[i] + c * f; bnd[i] = c * bnd[i] - s * f; i--; } } /* end while (underflow != FALSE && i >= l) */ /*........ recover from underflow .........*/ if (underflow) { d[i+1] -= p; e[m] = 0.0; } else { d[l] -= p; e[l] = g; e[m] = 0.0; } } /* end if (m != l) */ else { /* order the eigenvalues */ exchange = TRUE; if (l != 0) { i = l; while (i >= 1 && exchange == TRUE) { if (p < d[i-1]) { d[i] = d[i-1]; bnd[i] = bnd[i-1]; i--; } else exchange = FALSE; } } if (exchange) i = 0; d[i] = p; bnd[i] = f; iteration = 31; } } /* end while (iteration <= 30) */ } /* end for (l=0; l= l) { f = s * e[i]; b = c * e[i]; r = svd_pythag(f, g); e[i+1] = r; if (r == 0.0) underflow = TRUE; else { s = f / r; c = g / r; g = d[i+1] - p; r = (d[i] - g) * s + 2.0 * c * b; p = s * r; d[i+1] = g + p; g = c * r - b; /* form vector */ for (k = 0; k < nnm; k += n) { index = k + i; f = z[index+1]; z[index+1] = s * z[index] + c * f; z[index] = c * z[index] - s * f; } i--; } } /* end while (underflow != FALSE && i >= l) */ /*........ recover from underflow .........*/ if (underflow) { d[i+1] -= p; e[m] = 0.0; } else { d[l] -= p; e[l] = g; e[m] = 0.0; } } else break; } /*...... end while (iteration <= 30) .........*/ } /*...... end for (l=0; l= MAXLL) { svd_error("svdLAS2: store_vec (STORP) called with j >= MAXLL"); break; } if (!LanStore[j]) { if (!(LanStore[j] = svd_doubleArray(n, FALSE, "LanStore[j]"))) svd_fatalError("svdLAS2: failed to allocate LanStore[%d]", j); } svd_dcopy(n, s, 1, LanStore[j], 1); break; case RETRP: if (j >= MAXLL) { svd_error("svdLAS2: store_vec (RETRP) called with j >= MAXLL"); break; } if (!LanStore[j]) svd_fatalError("svdLAS2: store_vec (RETRP) called on index %d (not allocated)", j); svd_dcopy(n, LanStore[j], 1, s, 1); break; } return; } sparsesvd/NAMESPACE0000644000176200001440000000017313153306054013504 0ustar liggesusersimportFrom(Matrix, sparseMatrix) importFrom(methods, as, is) useDynLib(sparsesvd, .registration = TRUE) export(sparsesvd) sparsesvd/NEWS0000644000176200001440000000240213513102414012753 0ustar liggesusersVersion 0.2: - this release fixes several bugs in the underlying 3rd-party SVDLIBC code, which produce incorrect results or get stuck in infinite loops for highly sparse matrices that require multiple restarts of the Lanczos iteration; various corner cases including small diagonal matrices were also affected - a faulty RNG implementation in SVDLIBC contributed to triggering these bugs (but was not the root cause); the RNG has also been fixed - original bug report from Abdelmoneim Desouki Version 0.1-4: - for some inexplicable reason, R on Oracle Solaris 10 (i386) doesn't like C functions named "store", leading to a segmentation fault and test failures on CRAN - function store() has been renamed store_vec() in the 3rd-party C code included in the package Version 0.1-3: - added native routine registration to comply with new CRAN requirements Version 0.1-2: - stress tests with large number of random sparse nonnegative matrices - fix some typos in documentation Version 0.1-1: - fix ISO C incompatibilities that prevented compilation on some platforms - more detailed copyright information in DESCRIPTION and LICENSE Version 0.1: - initial release - based on SVDLIBC version 1.4 - modified to avoid I/O on STDOUT and STDERR sparsesvd/R/0000755000176200001440000000000013512663533012474 5ustar liggesuserssparsesvd/R/sparsesvd.R0000644000176200001440000000063613153306335014631 0ustar liggesuserssparsesvd <- function (M, rank=0L, tol=1e-15, kappa=1e-6) { if (is.matrix(M)) stop("argument must be a sparse real matrix") if (!is(M, "dMatrix")) stop("only sparse real dMatrix format (from Matrix package) is currently supported") if (!is(M, "dgCMatrix")) M <- as(M, "dgCMatrix") .Call(svdLAS2_, dim(M), M@i, M@p, M@x, as.integer(rank), as.double(tol * c(-1, 1)), as.double(kappa), PACKAGE="sparsesvd") } sparsesvd/MD50000644000176200001440000000137513513113513012576 0ustar liggesusers588d64216b67b496b46207b57f69821f *DESCRIPTION 5aaa0fb3a42fa53f037cc3f5495a95de *LICENSE d52da7481ff5dd294cc0ec7e3b4074ec *NAMESPACE 2b57f5b6e93b9bb3b58cfd68be6c03ed *NEWS 447cedbb32d104f816f4c26ab2b12108 *R/sparsesvd.R 81bfbbc7b8c1a9641a0179bc52642f47 *man/sparsesvd.Rd 33e5fff661a481178dac12dc2f012dd3 *src/las2.c 7062ac538af314886612d9843d2af9a6 *src/main.c 71340db1b232c013f25249b954c29d9d *src/svdlib.c 411e816f799b44c0373d1d421929337c *src/svdlib.h a7950e658766ae0b87dad08da4f0c1f0 *src/svdutil.c 01ba31e23e6061429f1d018cff8c0d36 *src/svdutil.h 9a52d60978e4b52d1b8cbd67866fe9fe *tests/formats.R e766228e0eea708051707ec4ab041411 *tests/iris.R a6ccb8640bbf1f03a68bea9a887cc1d1 *tests/loss_of_orthogonality.R 74f715b954a26845c3c95287f3bf95b5 *tests/stress_test.R sparsesvd/DESCRIPTION0000644000176200001440000000323013513113513013764 0ustar liggesusersPackage: sparsesvd Title: Sparse Truncated Singular Value Decomposition (from 'SVDLIBC') Version: 0.2 Date: 2019-07-15 Authors@R: c(person("Doug", "Rohde", email="dr+svd@tedlab.mit.edu", role="aut"), person("Michael", "Berry", role="aut"), person("Theresa", "Do", role="aut"), person("Gavin", "O'Brien", role="aut"), person("Vijay", "Krishna", role="aut"), person("Sowmini", "Varadhan", role="aut"), person("University of Tennessee Research Foundation", role = "cph", comment = "files src/las2.c, src/svdlib.[ch], src/svdutil.[ch]"), person("Stefan", "Evert", email="stefan.evert@fau.de", role=c("cre", "aut", "cph"), comment="copyright holder for files src/main.c, R/*, man/*, tests/*")) Description: Wrapper around the 'SVDLIBC' library for (truncated) singular value decomposition of a sparse matrix. Currently, only sparse real matrices in Matrix package format are supported. Depends: R (>= 3.0) Imports: Matrix, methods License: BSD_3_clause + file LICENSE URL: http://tedlab.mit.edu/~dr/SVDLIBC/, http://wordspace.r-forge.r-project.org/ NeedsCompilation: yes LazyData: true Packaged: 2019-07-15 13:52:52 UTC; evert Author: Doug Rohde [aut], Michael Berry [aut], Theresa Do [aut], Gavin O'Brien [aut], Vijay Krishna [aut], Sowmini Varadhan [aut], University of Tennessee Research Foundation [cph] (files src/las2.c, src/svdlib.[ch], src/svdutil.[ch]), Stefan Evert [cre, aut, cph] (copyright holder for files src/main.c, R/*, man/*, tests/*) Maintainer: Stefan Evert Repository: CRAN Date/Publication: 2019-07-15 15:10:03 UTC sparsesvd/man/0000755000176200001440000000000012676575236013062 5ustar liggesuserssparsesvd/man/sparsesvd.Rd0000644000176200001440000000426712711203104015340 0ustar liggesusers\name{sparsesvd} \alias{sparsesvd} \title{Singular Value Decomposition of a Sparse Matrix.} \usage{ sparsesvd(M, rank=0L, tol=1e-15, kappa=1e-6) } \arguments{ \item{M}{a sparse real matrix in \bold{Matrix} package format. The preferred format is a \code{\link[=dgCMatrix-class]{dgCMatrix}} and other storage formats will automatically be converted if possible. } \item{rank}{an integer specifying the desired number of singular components, i.e. the rank of the truncated SVD. Specify 0 to return all singular values of magnitude larger than \code{tol} (default). } \item{tol}{exclude singular values whose magnitude is smaller than \code{tol}} \item{kappa}{accuracy parameter \eqn{\kappa} of the SVD algorithm (with SVDLIBC default)} } \value{ The truncated SVD decomposition \deqn{ M_r = U_r D V_r^T } where \eqn{M_r} is the optimal rank \eqn{r} approximation of \eqn{M}. Note that \eqn{r} may be smaller than the requested number \code{rank} of singular components. The returned value is a list with components \item{d}{ a vector containing the first \eqn{r} singular values of \code{M} } \item{u}{ a column matrix of the first \eqn{r} left singular vectors of \code{M} } \item{v}{ a column matrix of the first \eqn{r} right singular vectors of \code{M} } } \description{ Compute the (usually truncated) singular value decomposition (SVD) of a sparse real matrix. This function is a shallow wrapper around the SVDLIBC implementation of Berry's (1992) single Lanczos algorithm. } \references{ \url{http://tedlab.mit.edu/~dr/SVDLIBC/} Berry, Michael~W. (1992). Large scale sparse singular value computations. \emph{International Journal of Supercomputer Applications}, \bold{6}, 13--49. } \seealso{ \code{\link{svd}}, \code{\link{sparseMatrix}} } \examples{ M <- rbind( c(20, 10, 15, 0, 2), c(10, 5, 8, 1, 0), c( 0, 1, 2, 6, 3), c( 1, 0, 0, 10, 5)) M <- Matrix::Matrix(M, sparse=TRUE) print(M) res <- sparsesvd(M, rank=2L) # compute first 2 singular components print(res, digits=3) M2 <- res$u \%*\% diag(res$d) \%*\% t(res$v) # rank-2 approximation print(M2, digits=1) print(as.matrix(M) - M2, digits=2) # approximation error } sparsesvd/LICENSE0000644000176200001440000000033512706741216013300 0ustar liggesusersYEAR: 2002 COPYRIGHT HOLDER: University of Tennessee Research Foundation ORGANIZATION: University of Tennessee YEAR: 2016 COPYRIGHT HOLDER: Stefan Evert ORGANIZATION: Friedrich-Alexander-Universitaet Erlangen-Nuernberg