lexRankr/ 0000755 0001762 0000144 00000000000 13443532523 012044 5 ustar ligges users lexRankr/inst/ 0000755 0001762 0000144 00000000000 13443530263 013020 5 ustar ligges users lexRankr/inst/doc/ 0000755 0001762 0000144 00000000000 13443530263 013565 5 ustar ligges users lexRankr/inst/doc/Analyzing_Twitter_with_LexRankr.html.asis 0000644 0001762 0000144 00000000213 13233415326 023723 0 ustar ligges users %\VignetteIndexEntry{Analyzing Twitter with LexRankr}
%\VignetteEngine{R.rsp::asis}
%\VignetteKeyword{twitter}
%\VignetteKeyword{lexrankr}
lexRankr/inst/doc/Analyzing_Twitter_with_LexRankr.html 0000644 0001762 0000144 00000053474 13443530263 023007 0 ustar ligges users
Packages Used
library(lexRankr)
library(tidyverse)
library(stringr)
library(httr)
library(jsonlite)
In this document we get tweets from twitter using the twitter API and then analyze the tweets using lexRankr in order to find a user’s most representative tweets. If you don’t care about interacting with the twitter api you can jump to the lexrank analysis.
Lexrank Analysis
We now have a dataframe that contains a column of tweets. This column of tweets will be the subject of the rest of the analysis. With the data in this format, we only need to call the bind_lexrank
function to apply the lexrank algorithm to the tweets. The function will add a column of lexrank scores. The higher the lexrank score the more representative the tweet is of the tweets that we downloaded.
note: typically one would parse documents into sentences before applying lexrank (?unnest_sentences
); however we will equate tweets to sentences for this analysis
tweets_df %>%
bind_lexrank(text, id, level="sentences") %>%
arrange(desc(lexrank)) %>%
head(n=5) %>%
select(text, lexrank) %>%
knitr::kable(caption = "Most Representative @realDonaldTrump Tweets")
Most Representative @realDonaldTrump Tweets
MAKE AMERICA GREAT AGAIN! |
0.0087551 |
Well, the New Year begins. We will, together, MAKE AMERICA GREAT AGAIN! |
0.0085258 |
HAPPY PRESIDENTS DAY - MAKE AMERICA GREAT AGAIN! |
0.0082361 |
Happy Thanksgiving to everyone. We will, together, MAKE AMERICA GREAT AGAIN! |
0.0060486 |
Hopefully, all supporters, and those who want to MAKE AMERICA GREAT AGAIN, will go to D.C. on January 20th. It will be a GREAT SHOW! |
0.0059713 |
lexRankr/tests/ 0000755 0001762 0000144 00000000000 13443530264 013206 5 ustar ligges users lexRankr/tests/testthat.R 0000644 0001762 0000144 00000000074 13177136432 015175 0 ustar ligges users library(testthat)
library(lexRankr)
test_check("lexRankr")
lexRankr/tests/testthat/ 0000755 0001762 0000144 00000000000 13443532523 015046 5 ustar ligges users lexRankr/tests/testthat/test-unnest_sentences.R 0000644 0001762 0000144 00000006437 13213603250 021532 0 ustar ligges users context("unnest_sentences")
# test output str --------------------------------------------------------
test_that("correct ouput class and str", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences(df, out, text)
expect_equal(dim(test_result), c(4,3))
expect_true(is.data.frame(test_result))
expect_equal(names(test_result), c("doc_id","sent_id","out"))
test_result <- unnest_sentences(df, out, text, drop=FALSE)
expect_equal(dim(test_result), c(4,4))
expect_equal(names(test_result), c("doc_id","text","sent_id","out"))
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_error(unnest_sentences(df, out, fake))
expect_error(unnest_sentences(NULL, out, text))
expect_error(unnest_sentences(df, out, text, drop = NULL))
expect_error(unnest_sentences(df, out, text, doc_id = fake))
})
# test output val ------------------------------------------------------
test_that("output value", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences(df, out, text)
expected_result <- data.frame(doc_id = c(1L, 1L, 2L, 3L),
sent_id = c(1L, 2L, 1L, 1L),
out = c("Testing the system.",
"Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
df <- data.frame(doc_id = c(1,1,3),
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences(df, out, text, doc_id = doc_id)
expected_result <- data.frame(doc_id = c(1L, 1L, 1L, 3L),
sent_id = c(1L, 2L, 3L, 1L),
out = c("Testing the system.",
"Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
})
lexRankr/tests/testthat/test-lexRankFromSimil.R 0000644 0001762 0000144 00000006214 13213603250 021366 0 ustar ligges users context("lexRankFromSimil")
# test object out str and class ---------------------------------------
test_that("object out str and class", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
tokenDf <- sentenceTokenParse(testDocs)$tokens
similDf <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId)
testResult <- lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("sentenceId", "value"))
expect_true(is.character(testResult$sentenceId))
expect_true(is.numeric(testResult$value))
})
# test bad inputs ---------------------------------------
test_that("bad inputs", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
tokenDf <- sentenceTokenParse(testDocs)$tokens
similDf <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId)
expect_error(lexRankFromSimil(NULL, similDf$sent2, similDf$similVal))
expect_error(lexRankFromSimil(c(1,2), similDf$sent2, similDf$similVal))
expect_error(lexRankFromSimil(similDf$sent1, similDf$sent2, c("a","b","c")))
expect_error(lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal, threshold = NULL))
expect_error(lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal, damping = NULL))
})
# test object out value
test_that("object out value", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
tokenDf <- sentenceTokenParse(testDocs)$tokens
similDf <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId)
testResult <- lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal)
testResult$value = round(testResult$value, 5)
expectedResult <- data.frame(sentenceId = c("1_1", "2_1", "3_1"),
value = c(0.25676, 0.48649, 0.25676),
stringsAsFactors = FALSE)
expect_identical(testResult, expectedResult)
testResult <- lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal, continuous = TRUE)
testResult$value = round(testResult$value, 5)
expectedResult <- data.frame(sentenceId = c("1_1", "2_1", "3_1"),
value = c(0.25676, 0.48649, 0.25676),
stringsAsFactors = FALSE)
expect_identical(testResult, expectedResult)
testResult <- lexRankFromSimil(similDf$sent1, similDf$sent2, similDf$similVal, usePageRank = FALSE)
testResult$value = round(testResult$value, 5)
expectedResult <- data.frame(sentenceId = c("2_1", "1_1", "3_1"),
value = c(2, 1, 1),
stringsAsFactors = FALSE)
expect_identical(testResult, expectedResult)
})
lexRankr/tests/testthat/test-idfCosine.R 0000644 0001762 0000144 00000002664 13177136432 020064 0 ustar ligges users context("lexRankr:::idfCosineSimil")
# test bad inputs ---------------------------------------
test_that("bad inputs to idf cosine", {
expect_error(lexRankr:::idfCosineSimil(NULL))
badMat <- matrix(c("a","b","c","d"), nrow=2)
expect_error(lexRankr:::idfCosineSimil(badMat))
})
# test object out str and class ---------------------------------------
test_that("object out str and class", {
testMat <- matrix(runif(9, min = .01, max = 1), nrow=3)
testResult <- lexRankr:::idfCosineSimil(testMat)
expect_equal(class(testResult), "numeric")
expect_equal(length(testResult), 3)
})
# test object out value
test_that("object out value", {
testMat <- matrix(c(1,0,0,0,1,0,0,0,1), nrow=3)
expect_equal(lexRankr:::idfCosineSimil(testMat), c(0,0,0))
testMat <- matrix(c(0,0,0,0,0,0,0,0,0), nrow=3)
expect_equal(lexRankr:::idfCosineSimil(testMat), c(NaN,NaN,NaN))
testMat <- matrix(c(1,1,1,1,1,1,1,1,1), nrow=3)
expect_equal(lexRankr:::idfCosineSimil(testMat), c(1,1,1))
testMat <- matrix(runif(9, min = .01, max = 1), nrow=3)
rcppIdf <- round(lexRankr:::idfCosineSimil(testMat), 10)
#pure r version comparison
idfCosine <- function(x,y) {
res <- sum(x*y)/(sqrt(sum(x^2))*sqrt(sum(y^2)))
return(round(res, 10))
}
elem1 <- idfCosine(testMat[1,], testMat[2,])
elem2 <- idfCosine(testMat[1,], testMat[3,])
elem3 <- idfCosine(testMat[2,], testMat[3,])
expect_equal(rcppIdf, c(elem1, elem2, elem3))
})
lexRankr/tests/testthat/test-sentenceTokenParse.R 0000644 0001762 0000144 00000003050 13213610270 021733 0 ustar ligges users context("sentenceTokenParse")
# test output classes ----------------------------------------
test_that("object class and structure check", {
testDocs <- c("12345", "Testing 1, 2, 3.", "Is everything working as expected Mr. Wickham?")
testResult <- sentenceTokenParse(testDocs)
expect_equal(class(testResult), "list")
expect_equal(unique(vapply(testResult, class, character(1))), "data.frame")
expect_equal(names(testResult$tokens), c("docId","sentenceId","token"))
expect_true(is.numeric(testResult$tokens$docId))
expect_true(is.character(testResult$tokens$sentenceId))
expect_true(is.character(testResult$tokens$sentence))
})
# test output value -------------------------------------------
test_that("All clean options TRUE", {
testDocs <- c("Testing 1, 2, 3.", "Is everything working as expected Mr. Wickham?")
testResult <- sentenceTokenParse(testDocs,
docId = "create",
removePunc=TRUE,
removeNum=TRUE,
toLower=TRUE,
stemWords=TRUE,
rmStopWords=TRUE)
expectedResultSentences <- sentenceParse(testDocs)
expectedResultTokens <- unlist(lexRankr::tokenize(testDocs))
expectedResultTokens <- expectedResultTokens[which(!is.na(expectedResultTokens))]
expect_equal(testResult$sentences, expectedResultSentences)
expect_equal(testResult$tokens$token, expectedResultTokens)
expect_equal(class(testResult), "list")
})
lexRankr/tests/testthat/test-sentenceSimil.R 0000644 0001762 0000144 00000010045 13213603250 020737 0 ustar ligges users context("sentenceSimil")
# test object out str and class ---------------------------------------
test_that("testing result str and class", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
tokenDf <- sentenceTokenParse(testDocs)$tokens
testResult <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId,
sentencesAsDocs = FALSE)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("sent1","sent2","similVal"))
expect_true(is.character(testResult$sent1))
expect_true(is.character(testResult$sent2))
expect_true(is.numeric(testResult$similVal))
testResult <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId,
sentencesAsDocs = TRUE)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("sent1","sent2","similVal"))
expect_true(is.character(testResult$sent1))
expect_true(is.character(testResult$sent2))
expect_true(is.numeric(testResult$similVal))
testResult <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId,
sentencesAsDocs = TRUE)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("sent1","sent2","similVal"))
expect_true(is.character(testResult$sent1))
expect_true(is.character(testResult$sent2))
expect_true(is.numeric(testResult$similVal))
})
test_that("bad input", {
expect_error(sentenceSimil(sentenceId = c("1_1"),
token = c("word","word2"),
docId = c(1,2)))
expect_error(sentenceSimil(sentenceId = c("1_1", "2_1"),
token = c(1,2),
docId = c(1,2)))
#was relevant when using idf calc w/o bounding at 1
# testDocs <- c("test","test")
# tokenDf <- sentenceTokenParse(testDocs)$tokens
#
# expect_error(sentenceSimil(sentenceId = tokenDf$sentenceId,
# token = tokenDf$token,
# docId = tokenDf$docId))
testDocs <- c("1","2")
tokenDf <- sentenceTokenParse(testDocs)$tokens
expect_error(sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId))
})
# test output value ---------------------------------------
test_that("output value check", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
tokenDf <- sentenceTokenParse(testDocs)$tokens
testResult <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId,
sentencesAsDocs = FALSE)
testResult$similVal = round(testResult$similVal, 5)
expectedResult <- data.frame(sent1 = c("1_1", "1_1", "2_1"),
sent2 = c("2_1", "3_1", "3_1"),
similVal = c(0.48624, 0, 0.48624),
stringsAsFactors = FALSE)
expect_equal(testResult, expectedResult)
testResult <- sentenceSimil(sentenceId = tokenDf$sentenceId,
token = tokenDf$token,
docId = tokenDf$docId,
sentencesAsDocs = TRUE)
testResult$similVal = round(testResult$similVal, 5)
expectedResult <- data.frame(sent1 = c("1_1", "1_1", "2_1"),
sent2 = c("2_1", "3_1", "3_1"),
similVal = c(0.48624, 0, 0.48624),
stringsAsFactors = FALSE)
expect_equal(testResult, expectedResult)
})
lexRankr/tests/testthat/test-unnest_sentences_.R 0000644 0001762 0000144 00000006637 13213603250 021673 0 ustar ligges users context("unnest_sentences_")
# test output str --------------------------------------------------------
test_that("correct ouput class and str", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences_(df, "out", "text")
expect_equal(dim(test_result), c(4,3))
expect_true(is.data.frame(test_result))
expect_equal(names(test_result), c("doc_id","sent_id","out"))
test_result <- unnest_sentences_(df, "out", "text", drop=FALSE)
expect_equal(dim(test_result), c(4,4))
expect_equal(names(test_result), c("doc_id","text","sent_id","out"))
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_error(unnest_sentences_(df, "out", "fake"))
expect_error(unnest_sentences_(NULL, "out", "text"))
expect_error(unnest_sentences_(df, "out", "text", drop = NULL))
expect_error(unnest_sentences(df, "out", "text", doc_id = "fake"))
expect_warning(unnest_sentences_(df, "out", "text", output_id=c("test","test2")))
})
# test output val ------------------------------------------------------
test_that("output value", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences_(df, "out", "text")
expected_result <- data.frame(doc_id = c(1L, 1L, 2L, 3L),
sent_id = c(1L, 2L, 1L, 1L),
out = c("Testing the system.",
"Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
df <- data.frame(doc_id = c(1,1,3),
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences_(df, "out", "text", doc_id = "doc_id")
expected_result <- data.frame(doc_id = c(1L, 1L, 1L, 3L),
sent_id = c(1L, 2L, 3L, 1L),
out = c("Testing the system.",
"Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
})
lexRankr/tests/testthat/test-tokenize.R 0000644 0001762 0000144 00000011637 13177136432 020011 0 ustar ligges users context("tokenize")
# test tokenize output classes ----------------------------------------
test_that("All clean options TRUE", {
testDocs <- c("12345", "Testing 1, 2, 3.", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs)
expect_equal(class(testResult), "list")
expect_equal(unique(vapply(testResult, class, character(1))), "character")
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
expect_error(tokenize(NULL))
expect_error(tokenize(data.frame(badInput="test")))
expect_error(tokenize("test", removePunc=NULL))
expect_error(tokenize("test", removeNum=NULL))
expect_error(tokenize("test", toLower=NULL))
expect_error(tokenize("test", stemWords=NULL))
expect_error(tokenize("test", rmStopWords=NULL))
})
# test tokenize and arg option variations ------------------------------
test_that("All clean options TRUE", {
testDocs <- c("Testing 1, 2, 3.", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=TRUE,
removeNum=TRUE,
toLower=TRUE,
stemWords=TRUE,
rmStopWords=TRUE)
expectedResult <- list("test", c("work","expect","mr","wickham"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("All clean options FALSE", {
testDocs <- c("Testing 1, 2, 3", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=FALSE,
removeNum=FALSE,
toLower=FALSE,
stemWords=FALSE,
rmStopWords=FALSE)
expectedResult <- list(c("Testing", "1", ",", "2", ",", "3"),
c("Is", "everything", "working", "as", "expected", "Mr", ".", "Wickham", "?"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("Single option tests: removePunc = FALSE", {
testDocs <- c("Testing 1, 2, 3.", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=FALSE,
removeNum=TRUE,
toLower=TRUE,
stemWords=TRUE,
rmStopWords=TRUE)
expectedResult <- list(c("test",",",",","." ),
c("work","expect","mr",".","wickham","?" ))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("Single option tests: removeNum = FALSE", {
testDocs <- c("Testing 1, 2, 3", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=TRUE,
removeNum=FALSE,
toLower=TRUE,
stemWords=TRUE,
rmStopWords=TRUE)
expectedResult <- list(c("test","1","2","3"),
c("work","expect","mr","wickham"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("Single option tests: toLower = FALSE", {
testDocs <- c("Testing 1, 2, 3", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=TRUE,
removeNum=TRUE,
toLower=FALSE,
stemWords=TRUE,
rmStopWords=TRUE)
expectedResult <- list(c("Test"),
c("work","expect","Mr","Wickham"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("Single option tests: stemWords = FALSE", {
testDocs <- c("Testing 1, 2, 3", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=TRUE,
removeNum=TRUE,
toLower=TRUE,
stemWords=FALSE,
rmStopWords=TRUE)
expectedResult <- list(c("testing"),
c("working","expected","mr","wickham"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
test_that("Single option tests: rmStopWords = FALSE", {
testDocs <- c("Testing 1, 2, 3", "Is everything working as expected Mr. Wickham?")
testResult <- tokenize(testDocs,
removePunc=TRUE,
removeNum=TRUE,
toLower=TRUE,
stemWords=TRUE,
rmStopWords=FALSE)
expectedResult <- list(c("test"),
c("i","everyth","work","a", "expect", "mr", "wickham"))
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "list")
})
lexRankr/tests/testthat/test-sentenceParse.R 0000644 0001762 0000144 00000003242 13177136432 020751 0 ustar ligges users context("sentenceParse")
# test sentence object structure-----------------------------------------------
test_that("sentenceParse output class and structure check", {
testDoc <- "Testing one, two, three. Is everything working as expected Mr. Wickham?"
testResult <- sentenceParse(testDoc)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("docId", "sentenceId", "sentence"))
expect_true(is.numeric(testResult$docId))
expect_true(is.character(testResult$sentenceId))
expect_true(is.character(testResult$sentence))
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
expect_error(sentenceParse(NULL))
expect_error(sentenceParse(data.frame(badInput="test")))
expect_error(sentenceParse("test", docId = c("fake","fake2")))
expect_error(sentenceParse(c("test","test2"), docId = "fake"))
expect_error(sentenceParse(c("test","test2"), docId = NULL))
})
# test sentence output value -----------------------------------------------
test_that("Example doc parses sentences as expected", {
testDoc <- "Testing one, two, three. Is everything working as expected Mr. Wickham?"
testResult <- sentenceParse(testDoc)
expectedResult <- data.frame(docId = c(1L, 1L),
sentenceId = c("1_1", "1_2"),
sentence = c("Testing one, two, three.",
"Is everything working as expected Mr. Wickham?"),
stringsAsFactors = FALSE)
expect_equal(testResult, expectedResult)
expect_equal(class(testResult), "data.frame")
expect_equal
}) lexRankr/tests/testthat/test-lexRank.R 0000644 0001762 0000144 00000003024 13213603250 017540 0 ustar ligges users context("lexRank")
# test object out str and class ---------------------------------------
test_that("object out str and class", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
testResult <- lexRank(testDocs, Verbose = FALSE)
expect_equal(class(testResult), "data.frame")
expect_equal(names(testResult), c("docId","sentenceId", "sentence","value"))
expect_true(is.character(testResult$sentenceId))
expect_true(is.character(testResult$sentence))
expect_true(is.numeric(testResult$value))
})
# test bad inputs ---------------------------------------
test_that("bad inputs", {
expect_error(lexRank(FALSE, Verbose = FALSE))
expect_error(lexRank(NULL, Verbose = FALSE))
})
# test object out value
test_that("object out value", {
testDocs <- c("Testing 1, 2, 3.",
"Is everything working as expected in my test?",
"Is it working?")
testResult <- lexRank(testDocs, Verbose = FALSE)
testResult$value = round(testResult$value, 5)
expectedResult <- data.frame(docId = c(2L, 1L, 3L),
sentenceId = c("2_1", "1_1", "3_1"),
sentence = c("Is everything working as expected in my test?",
"Testing 1, 2, 3.", "Is it working?"),
value = c(0.48649, 0.25676, 0.25676),
stringsAsFactors = FALSE)
expect_identical(testResult, expectedResult)
})
lexRankr/tests/testthat/test-bind_lexrank_.R 0000644 0001762 0000144 00000021755 13213611117 020747 0 ustar ligges users context("bind_lexrank_")
# test output str --------------------------------------------------------
test_that("correct ouput class and str", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences(df, sents, text)
test_result <- bind_lexrank_(test_result, "sents", "doc_id", level = 'sentences')
expect_equal(dim(test_result), c(4,4))
expect_true(is.data.frame(test_result))
expect_equal(names(test_result), c("doc_id","sent_id","sents","lexrank"))
test_result <- unnest_sentences(df, sents, text, drop=FALSE)
test_result <- bind_lexrank_(test_result, "sents", "doc_id", level = 'sentences')
expect_equal(dim(test_result), c(4,5))
expect_equal(names(test_result), c("doc_id","text","sent_id","sents","lexrank"))
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
test_result <- bind_lexrank_(df, "tokens", "doc_id", "sent_id", "tokens")
expect_equal(dim(test_result), c(19,5))
expect_equal(names(test_result), c("doc_id","sent_id","sents","tokens","lexrank"))
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
df <- unnest_sentences(df, sents, text)
expect_error(bind_lexrank_(df, "sents", "fake"))
expect_error(bind_lexrank_(NULL, "sents", "doc_id"))
expect_error(bind_lexrank_(df, "sents", "doc_id", level="fake"))
# expect_warning(bind_lexrank_(df, "sents", "doc_id", level=c("sentences","tokens")))
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
expect_error(bind_lexrank_(df, "tokens", "doc_id", "fake", level="tokens"))
expect_error(bind_lexrank_(df, "tokens", "doc_id", level="tokens"))
# expect_warning(bind_lexrank_(df, "tokens", "doc_id", "sent_id", level=c("tokens","sentences")))
})
# test output val ------------------------------------------------------
test_that("output value", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
df <- unnest_sentences(df, sents, text)
test_result <- bind_lexrank_(df, "sents", "doc_id", level="sentences")
expected_result <- data.frame(doc_id = c(1L, 1L, 2L, 3L),
sent_id = c(1L, 2L, 1L, 1L),
sents = c("Testing the system.", "Second sentence for you.",
"System testing the tidy documents df.", "Documents will be parsed and lexranked."),
lexrank = c(0.5, NA, 0.5, NA),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
test_result <- bind_lexrank_(df, "tokens", "doc_id", "sent_id", level="sentences")
test_result$lexrank <- round(test_result$lexrank, 5)
expected_result <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
lexrank = c(0.16667, NA, 0.16667, NA, NA, NA, NA, 0.16667, 0.16667, NA, NA, 0.16667, NA, 0.16667, NA, NA, NA, NA, NA),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
})
lexRankr/tests/testthat/test-bind_lexrank.R 0000644 0001762 0000144 00000021647 13213610767 020622 0 ustar ligges users context("bind_lexrank")
# test output str --------------------------------------------------------
test_that("correct ouput class and str", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
test_result <- unnest_sentences(df, sents, text)
test_result <- bind_lexrank(test_result, sents, doc_id, level = 'sentences')
expect_equal(dim(test_result), c(4,4))
expect_true(is.data.frame(test_result))
expect_equal(names(test_result), c("doc_id","sent_id","sents","lexrank"))
test_result <- unnest_sentences(df, sents, text, drop=FALSE)
test_result <- bind_lexrank(test_result, sents,doc_id, level = 'sentences')
expect_equal(dim(test_result), c(4,5))
expect_equal(names(test_result), c("doc_id","text","sent_id","sents","lexrank"))
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
test_result <- bind_lexrank(df, tokens, doc_id, sent_id, "tokens")
expect_equal(dim(test_result), c(19,5))
expect_equal(names(test_result), c("doc_id","sent_id","sents","tokens","lexrank"))
})
# test bad input -------------------------------------------------------
test_that("test input checking", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
df <- unnest_sentences(df, sents, text)
expect_error(bind_lexrank(df, sents, fake))
expect_error(bind_lexrank(NULL, sents, doc_id))
expect_error(bind_lexrank(df, sents, doc_id, level="fake"))
# expect_warning(bind_lexrank(df, sents, doc_id, level=c("sentences","tokens")))
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
expect_error(bind_lexrank(df, tokens, doc_id, fake, level="tokens"))
expect_error(bind_lexrank(df, tokens, doc_id, level="tokens"))
# expect_warning(bind_lexrank(df, tokens, doc_id, sent_id, level=c("tokens","sentences")))
})
# test output val ------------------------------------------------------
test_that("output value", {
df <- data.frame(doc_id = 1:3,
text = c("Testing the system. Second sentence for you.",
"System testing the tidy documents df.",
"Documents will be parsed and lexranked."),
stringsAsFactors = FALSE)
df <- unnest_sentences(df, sents, text)
test_result <- bind_lexrank(df, sents, doc_id, level="sentences")
expected_result <- data.frame(doc_id = c(1L, 1L, 2L, 3L),
sent_id = c(1L, 2L, 1L, 1L),
sents = c("Testing the system.", "Second sentence for you.",
"System testing the tidy documents df.", "Documents will be parsed and lexranked."),
lexrank = c(0.5, NA, 0.5, NA),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
df <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
stringsAsFactors = FALSE)
test_result <- bind_lexrank(df, tokens, doc_id, sent_id, level="sentences")
test_result$lexrank <- round(test_result$lexrank, 5)
expected_result <- data.frame(doc_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
sent_id = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
sents = c("Testing the system.", "Testing the system.", "Testing the system.",
"Second sentence for you.", "Second sentence for you.", "Second sentence for you.", "Second sentence for you.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"System testing the tidy documents df.", "System testing the tidy documents df.", "System testing the tidy documents df.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.",
"Documents will be parsed and lexranked.", "Documents will be parsed and lexranked.", "Documents will be parsed and lexranked."),
tokens = c("testing", "the", "system", "second", "sentence", "for", "you", "system", "testing", "the",
"tidy", "documents", "df", "documents", "will", "be", "parsed", "and", "lexranked"),
lexrank = c(0.16667, NA, 0.16667, NA, NA, NA, NA, 0.16667, 0.16667, NA, NA, 0.16667, NA, 0.16667, NA, NA, NA, NA, NA),
stringsAsFactors = FALSE)
expect_equal(test_result, expected_result)
})
lexRankr/src/ 0000755 0001762 0000144 00000000000 13443530264 012633 5 ustar ligges users lexRankr/src/idfCosineSimil.cpp 0000644 0001762 0000144 00000001473 13443530264 016245 0 ustar ligges users #include