rvest/ 0000755 0001762 0000144 00000000000 13561372712 011424 5 ustar ligges users rvest/NAMESPACE 0000644 0001762 0000144 00000002530 13561331745 012644 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(cookies,session)
S3method(format,button)
S3method(format,input)
S3method(format,select)
S3method(format,textarea)
S3method(headers,session)
S3method(html_form,session)
S3method(html_form,xml_document)
S3method(html_form,xml_node)
S3method(html_form,xml_nodeset)
S3method(html_node,default)
S3method(html_node,session)
S3method(html_nodes,default)
S3method(html_nodes,session)
S3method(html_table,session)
S3method(html_table,xml_document)
S3method(html_table,xml_node)
S3method(html_table,xml_nodeset)
S3method(print,fields)
S3method(print,form)
S3method(print,history)
S3method(print,session)
S3method(read_xml,session)
S3method(status_code,session)
export("%>%")
export(back)
export(follow_link)
export(google_form)
export(guess_encoding)
export(html)
export(html_attr)
export(html_attrs)
export(html_children)
export(html_form)
export(html_name)
export(html_node)
export(html_nodes)
export(html_session)
export(html_table)
export(html_tag)
export(html_text)
export(is.session)
export(jump_to)
export(minimal_html)
export(pluck)
export(repair_encoding)
export(session_history)
export(set_values)
export(submit_form)
export(xml)
export(xml_node)
export(xml_nodes)
export(xml_tag)
importFrom(httr,cookies)
importFrom(httr,headers)
importFrom(httr,status_code)
importFrom(magrittr,"%>%")
importFrom(xml2,read_xml)
rvest/demo/ 0000755 0001762 0000144 00000000000 12424711116 012340 5 ustar ligges users rvest/demo/united.R 0000644 0001762 0000144 00000000703 12730351624 013757 0 ustar ligges users # Scrape miles from united site
library(rvest)
united <- html_session("http://www.united.com/")
login <- united %>%
html_node("form[name=LoginForm]") %>%
html_form() %>%
set_values(
MpNumber = "GY797363",
Password = password
)
logged_in <- united %>% submit_form(login)
logged_in %>%
follow_link("View account") %>%
html_node("#ctl00_ContentInfo_AccountSummary_spanEliteMilesNew") %>%
html_text() %>%
readr::parse_number()
rvest/demo/zillow.R 0000644 0001762 0000144 00000001500 12730352077 014006 0 ustar ligges users # Inspired by https://github.com/notesofdabbler
library(rvest)
library(tidyr)
page <- read_html("http://www.zillow.com/homes/for_sale/Greenwood-IN/fsba,fsbo,fore,cmsn_lt/house_type/52333_rid/39.638414,-86.011362,39.550714,-86.179419_rect/12_zm/0_mmm/")
houses <- page %>%
html_nodes(".photo-cards li article")
z_id <- houses %>% html_attr("id")
address <- houses %>%
html_node(".zsg-photo-card-address") %>%
html_text()
price <- houses %>%
html_node(".zsg-photo-card-price") %>%
html_text() %>%
readr::parse_number()
params <- houses %>%
html_node(".zsg-photo-card-info") %>%
html_text() %>%
strsplit("\u00b7")
beds <- params %>% purrr::map_chr(1) %>% readr::parse_number()
baths <- params %>% purrr::map_chr(2) %>% readr::parse_number()
house_area <- params %>% purrr::map_chr(3) %>% readr::parse_number()
rvest/demo/00Index 0000644 0001762 0000144 00000000224 12424711116 013470 0 ustar ligges users united Scrape mileage details from united.com
tripadvisor Scrape review data from tripadvisor
zillow Scrape housing info from tripadvisor
rvest/demo/tripadvisor.R 0000644 0001762 0000144 00000001537 12513505636 015046 0 ustar ligges users # Inspired by
# http://notesofdabbler.github.io/201408_hotelReview/scrapeTripAdvisor.html
library(rvest)
url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"
reviews <- url %>%
read_html() %>%
html_nodes("#REVIEWS .innerBubble")
id <- reviews %>%
html_node(".quote a") %>%
html_attr("id")
quote <- reviews %>%
html_node(".quote span") %>%
html_text()
rating <- reviews %>%
html_node(".rating .rating_s_fill") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()
date <- reviews %>%
html_node(".rating .ratingDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()
review <- reviews %>%
html_node(".entry .partial_entry") %>%
html_text()
data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
rvest/README.md 0000644 0001762 0000144 00000007217 13561331136 012705 0 ustar ligges users
# rvest
[](https://cran.r-project.org/package=rvest)
[](https://travis-ci.org/tidyverse/rvest)
[](https://codecov.io/gh/tidyverse/rvest?branch=master)
## Overview
rvest helps you scrape information from web pages. It is designed to
work with [magrittr](https://github.com/smbache/magrittr) to make it
easy to express common web scraping tasks, inspired by libraries like
[beautiful soup](https://www.crummy.com/software/BeautifulSoup/).
``` r
library(rvest)
lego_movie <- read_html("http://www.imdb.com/title/tt1490017/")
rating <- lego_movie %>%
html_nodes("strong span") %>%
html_text() %>%
as.numeric()
rating
#> [1] 7.8
cast <- lego_movie %>%
html_nodes("#titleCast .primary_photo img") %>%
html_attr("alt")
cast
#> [1] "Will Arnett" "Elizabeth Banks" "Craig Berry"
#> [4] "Alison Brie" "David Burrows" "Anthony Daniels"
#> [7] "Charlie Day" "Amanda Farinos" "Keith Ferguson"
#> [10] "Will Ferrell" "Will Forte" "Dave Franco"
#> [13] "Morgan Freeman" "Todd Hansen" "Jonah Hill"
poster <- lego_movie %>%
html_nodes(".poster img") %>%
html_attr("src")
poster
#> [1] "https://m.media-amazon.com/images/M/MV5BMTg4MDk1ODExN15BMl5BanBnXkFtZTgwNzIyNjg3MDE@._V1_UX182_CR0,0,182,268_AL_.jpg"
```
## Installation
Install the release version from CRAN:
``` r
install.packages("rvest")
```
Or the development version from GitHub
``` r
# install.packages("devtools")
devtools::install_github("tidyverse/rvest")
```
## Key functions
The most important functions in rvest are:
- Create an html document from a url, a file on disk or a string
containing html with `read_html()`.
- Select parts of a document using CSS selectors: `html_nodes(doc,
"table td")` (or if you’ve a glutton for punishment, use XPath
selectors with `html_nodes(doc, xpath = "//table//td")`). If you
haven’t heard of [selectorgadget](http://selectorgadget.com/), make
sure to read `vignette("selectorgadget")` to learn about it.
- Extract components with `html_name()` (the name of the tag),
`html_text()` (all text inside the tag), `html_attr()` (contents of
a single attribute) and `html_attrs()` (all attributes).
- (You can also use rvest with XML files: parse with `xml()`, then
extract components using `xml_node()`, `xml_attr()`, `xml_attrs()`,
`xml_text()` and `xml_name()`.)
- Parse tables into data frames with `html_table()`.
- Extract, modify and submit forms with `html_form()`, `set_values()`
and `submit_form()`.
- Detect and repair encoding problems with `guess_encoding()` and
`repair_encoding()`.
- Navigate around a website as if you’re in a browser with
`html_session()`, `jump_to()`, `follow_link()`, `back()`,
`forward()`, `submit_form()` and so on. (This is still a work in
progress, so I’d love your feedback.)
To see examples of these function in use, check out the demos.
## Inspirations
- Python:
[RoboBrowser](http://robobrowser.readthedocs.org/en/latest/readme.html),
[Beautiful Soup](https://www.crummy.com/software/BeautifulSoup/).
## Code of Conduct
Please note that the rvest project is released with a [Contributor Code of Conduct](https://rvest.tidyverse.org/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms.
rvest/man/ 0000755 0001762 0000144 00000000000 13467052014 012172 5 ustar ligges users rvest/man/set_values.Rd 0000644 0001762 0000144 00000001034 13447431221 014630 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/form.R
\name{set_values}
\alias{set_values}
\title{Set values in a form.}
\usage{
set_values(form, ...)
}
\arguments{
\item{form}{Form to modify}
\item{...}{Name-value pairs giving fields to modify}
}
\value{
An updated form object
}
\description{
Set values in a form.
}
\examples{
search <- html_form(read_html("http://www.google.com"))[[1]]
set_values(search, q = "My little pony")
set_values(search, hl = "fr")
\dontrun{set_values(search, btnI = "blah")}
}
rvest/man/html_nodes.Rd 0000644 0001762 0000144 00000007265 13561331745 014635 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/selectors.R
\name{html_nodes}
\alias{html_nodes}
\alias{html_node}
\title{Select nodes from an HTML document}
\usage{
html_nodes(x, css, xpath)
html_node(x, css, xpath)
}
\arguments{
\item{x}{Either a document, a node set or a single node.}
\item{css, xpath}{Nodes to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS or XPath 1.0 selector.}
}
\description{
More easily extract pieces out of HTML documents using XPath and CSS
selectors. CSS selectors are particularly useful in conjunction with
\url{http://selectorgadget.com/}: it makes it easy to find exactly
which selector you should be using. If you haven't used CSS selectors
before, work your way through the fun tutorial at
\url{http://flukeout.github.io/}
}
\section{\code{html_node} vs \code{html_nodes}}{
\code{html_node} is like \code{[[} it always extracts exactly one
element. When given a list of nodes, \code{html_node} will always return
a list of the same length, the length of \code{html_nodes} might be longer
or shorter.
}
\section{CSS selector support}{
CSS selectors are translated to XPath selectors by the \pkg{selectr}
package, which is a port of the python \pkg{cssselect} library,
\url{https://pythonhosted.org/cssselect/}.
It implements the majority of CSS3 selectors, as described in
\url{http://www.w3.org/TR/2011/REC-css3-selectors-20110929/}. The
exceptions are listed below:
\itemize{
\item Pseudo selectors that require interactivity are ignored:
\code{:hover}, \code{:active}, \code{:focus}, \code{:target},
\code{:visited}
\item The following pseudo classes don't work with the wild card element, *:
\code{*:first-of-type}, \code{*:last-of-type}, \code{*:nth-of-type},
\code{*:nth-last-of-type}, \code{*:only-of-type}
\item It supports \code{:contains(text)}
\item You can use !=, \code{[foo!=bar]} is the same as \code{:not([foo=bar])}
\item \code{:not()} accepts a sequence of simple selectors, not just single
simple selector.
}
}
\examples{
# CSS selectors ----------------------------------------------
url <- paste0(
"https://web.archive.org/web/20190202054736/",
"https://www.boxofficemojo.com/movies/?id=ateam.htm"
)
ateam <- read_html(url)
html_nodes(ateam, "center")
html_nodes(ateam, "center font")
html_nodes(ateam, "center font b")
# But html_node is best used in conjunction with \%>\% from magrittr
# You can chain subsetting:
ateam \%>\% html_nodes("center") \%>\% html_nodes("td")
ateam \%>\% html_nodes("center") \%>\% html_nodes("font")
td <- ateam \%>\% html_nodes("center") \%>\% html_nodes("td")
td
# When applied to a list of nodes, html_nodes() returns all nodes,
# collapsing results into a new nodelist.
td \%>\% html_nodes("font")
# html_node() returns the first matching node. If there are no matching
# nodes, it returns a "missing" node
if (utils::packageVersion("xml2") > "0.1.2") {
td \%>\% html_node("font")
}
# To pick out an element at specified position, use magrittr::extract2
# which is an alias for [[
library(magrittr)
ateam \%>\% html_nodes("table") \%>\% extract2(1) \%>\% html_nodes("img")
ateam \%>\% html_nodes("table") \%>\% `[[`(1) \%>\% html_nodes("img")
# Find all images contained in the first two tables
ateam \%>\% html_nodes("table") \%>\% `[`(1:2) \%>\% html_nodes("img")
ateam \%>\% html_nodes("table") \%>\% extract(1:2) \%>\% html_nodes("img")
# XPath selectors ---------------------------------------------
# chaining with XPath is a little trickier - you may need to vary
# the prefix you're using - // always selects from the root node
# regardless of where you currently are in the doc
ateam \%>\%
html_nodes(xpath = "//center//font//b") \%>\%
html_nodes(xpath = "//b")
}
rvest/man/html_text.Rd 0000644 0001762 0000144 00000002004 13447431221 014464 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/html.R
\name{html_text}
\alias{html_text}
\alias{html_name}
\alias{html_children}
\alias{html_attrs}
\alias{html_attr}
\title{Extract attributes, text and tag name from html.}
\usage{
html_text(x, trim = FALSE)
html_name(x)
html_children(x)
html_attrs(x)
html_attr(x, name, default = NA_character_)
}
\arguments{
\item{x}{A document, node, or node set.}
\item{trim}{If \code{TRUE} will trim leading and trailing spaces.}
\item{name}{Name of attribute to retrieve.}
\item{default}{A string used as a default value when the attribute does
not exist in every node.}
}
\value{
\code{html_attr}, \code{html_tag} and \code{html_text}, a character
vector; \code{html_attrs}, a list.
}
\description{
Extract attributes, text and tag name from html.
}
\examples{
movie <- read_html("http://www.imdb.com/title/tt1490017/")
cast <- html_nodes(movie, "#titleCast span.itemprop")
html_text(cast)
html_name(cast)
html_attrs(cast)
html_attr(cast, "class")
}
rvest/man/google_form.Rd 0000644 0001762 0000144 00000000555 13447431221 014764 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/form.R
\name{google_form}
\alias{google_form}
\title{Make link to google form given id}
\usage{
google_form(x)
}
\arguments{
\item{x}{Unique identifier for form}
}
\description{
Make link to google form given id
}
\examples{
google_form("1M9B8DsYNFyDjpwSK6ur_bZf8Rv_04ma3rmaaBiveoUI")
}
rvest/man/html_form.Rd 0000644 0001762 0000144 00000001242 13447431221 014446 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/form.R
\name{html_form}
\alias{html_form}
\title{Parse forms in a page.}
\usage{
html_form(x)
}
\arguments{
\item{x}{A node, node set or document.}
}
\description{
Parse forms in a page.
}
\examples{
\donttest{
html_form(read_html("https://hadley.wufoo.com/forms/libraryrequire-quiz/"))
html_form(read_html("https://hadley.wufoo.com/forms/r-journal-submission/"))
box_office <- read_html("http://www.boxofficemojo.com/movies/?id=ateam.htm")
box_office \%>\% html_node("form") \%>\% html_form()
}
}
\seealso{
HTML 4.01 form specification:
\url{http://www.w3.org/TR/html401/interact/forms.html}
}
rvest/man/xml.Rd 0000644 0001762 0000144 00000002075 13447431221 013264 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/xml.R
\name{xml}
\alias{xml}
\alias{xml_tag}
\alias{xml_node}
\alias{xml_nodes}
\title{Work with xml.}
\usage{
xml(x, ..., encoding = "")
xml_tag(x)
xml_node(x, css, xpath)
xml_nodes(x, css, xpath)
}
\arguments{
\item{x}{A url, a local path, a string containing html, or a response from
an httr request.}
\item{...}{If \code{x} is a URL, additional arguments are passed on to
\code{\link[httr:GET]{httr::GET()}}.}
\item{encoding}{Specify encoding of document. See \code{\link[=iconvlist]{iconvlist()}}
for complete list. If you have problems determining the correct encoding,
try \code{\link[stringi:stri_enc_detect]{stringi::stri_enc_detect()}}}
\item{css}{Nodes to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS or XPath 1.0 selector.}
\item{xpath}{Nodes to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS or XPath 1.0 selector.}
}
\description{
Deprecated. Please use just xml2 directly
}
\keyword{internal}
rvest/man/html_session.Rd 0000644 0001762 0000144 00000002133 13447431221 015166 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/session.R
\name{html_session}
\alias{html_session}
\alias{is.session}
\title{Simulate a session in an html browser.}
\usage{
html_session(url, ...)
is.session(x)
}
\arguments{
\item{url}{Location to start session}
\item{...}{Any additional httr config to use throughout session.}
\item{x}{An object to test to see if it's a session.}
}
\description{
Simulate a session in an html browser.
}
\section{Methods}{
A session object responds to a combination of httr and html methods:
use \code{\link[httr:cookies]{httr::cookies()}}, \code{\link[httr:headers]{httr::headers()}},
and \code{\link[httr:status_code]{httr::status_code()}} to access properties of the request;
and \code{\link[=html_nodes]{html_nodes()}} to access the html.
}
\examples{
# http://stackoverflow.com/questions/15853204
s <- html_session("http://hadley.nz")
s \%>\% jump_to("hadley-wickham.jpg") \%>\% jump_to("/") \%>\% session_history()
s \%>\% jump_to("hadley-wickham.jpg") \%>\% back() \%>\% session_history()
\donttest{
s \%>\% follow_link(css = "p a")
}
}
rvest/man/minimal_html.Rd 0000644 0001762 0000144 00000000703 13447431221 015132 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/minimal-html.R
\name{minimal_html}
\alias{minimal_html}
\title{Generate a minimal html5 page.}
\usage{
minimal_html(title, html = "")
}
\arguments{
\item{title}{Page title}
\item{html}{Other html to insert into page.}
}
\description{
See \url{http://www.brucelawson.co.uk/2010/a-minimal-html5-document/} for
details.
}
\examples{
minimal_html("test")
}
\keyword{internal}
rvest/man/html_tag.Rd 0000644 0001762 0000144 00000000367 13447431221 014265 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/deprecated.R
\name{html_tag}
\alias{html_tag}
\title{html_tag}
\usage{
html_tag(x)
}
\description{
Deprecated: please use \code{html_name} instead.
}
\keyword{internal}
rvest/man/pluck.Rd 0000644 0001762 0000144 00000000533 13447431221 013577 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{pluck}
\alias{pluck}
\title{Extract elements of a list by position.}
\usage{
pluck(x, i, type)
}
\arguments{
\item{x}{A list}
\item{i}{A string or integer.}
\item{type}{Type of output, if known}
}
\description{
Extract elements of a list by position.
}
rvest/man/jump_to.Rd 0000644 0001762 0000144 00000002205 13447431221 014134 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/session.R
\name{jump_to}
\alias{jump_to}
\alias{follow_link}
\title{Navigate to a new url.}
\usage{
jump_to(x, url, ...)
follow_link(x, i, css, xpath, ...)
}
\arguments{
\item{x}{A session.}
\item{url}{A URL, either relative or absolute, to navigate to.}
\item{...}{Any additional httr configs to apply to this request.}
\item{i}{You can select with: \describe{
\item{an integer}{selects the ith link}
\item{a string}{first link containing that text (case sensitive)}
}}
\item{css}{Nodes to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS or XPath 1.0 selector.}
\item{xpath}{Nodes to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS or XPath 1.0 selector.}
}
\description{
\code{jump_to()} takes a url (either relative or absolute);
\code{follow_link} takes an expression that refers to a link (an \code{}
tag) on the current page.
}
\examples{
\donttest{
s <- html_session("http://hadley.nz")
s <- s \%>\% follow_link("github")
s <- s \%>\% back()
s \%>\% follow_link("readr")
}
}
rvest/man/html_table.Rd 0000644 0001762 0000144 00000003432 13447432300 014574 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/table.R
\name{html_table}
\alias{html_table}
\title{Parse an html table into a data frame.}
\usage{
html_table(x, header = NA, trim = TRUE, fill = FALSE, dec = ".")
}
\arguments{
\item{x}{A node, node set or document.}
\item{header}{Use first row as header? If \code{NA}, will use first row
if it consists of \code{
} tags.}
\item{trim}{Remove leading and trailing whitespace within each cell?}
\item{fill}{If \code{TRUE}, automatically fill rows with fewer than
the maximum number of columns with \code{NA}s.}
\item{dec}{The character used as decimal mark.}
}
\description{
Parse an html table into a data frame.
}
\section{Assumptions}{
\code{html_table} currently makes a few assumptions:
\itemize{
\item No cells span multiple rows
\item Headers are in the first row
}
}
\examples{
sample1 <- minimal_html("")
sample1 \%>\%
html_node("table") \%>\%
html_table()
# Values in merged cells will be duplicated
sample2 <- minimal_html("")
sample2 \%>\%
html_node("table") \%>\%
html_table()
# If the table is badly formed, and has different number of columns
# in each row, use `fill = TRUE` to fill in the missing values
sample3 <- minimal_html("")
sample3 \%>\%
html_node("table") \%>\%
html_table(fill = TRUE)
}
rvest/man/submit_form.Rd 0000644 0001762 0000144 00000001712 13447431221 015007 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/form.R
\name{submit_form}
\alias{submit_form}
\title{Submit a form back to the server.}
\usage{
submit_form(session, form, submit = NULL, ...)
}
\arguments{
\item{session}{Session to submit form to.}
\item{form}{Form to submit}
\item{submit}{Name of submit button to use. If not supplied, defaults to
first submission button on the form (with a message).}
\item{...}{Additional arguments passed on to \code{\link[httr:GET]{httr::GET()}}
or \code{\link[httr:POST]{httr::POST()}}}
}
\value{
If successful, the parsed html response. Throws an error if http
request fails. To access other elements of response, construct it yourself
using the elements returned by \code{submit_request}.
}
\description{
Submit a form back to the server.
}
\examples{
test <- google_form("1M9B8DsYNFyDjpwSK6ur_bZf8Rv_04ma3rmaaBiveoUI")
f0 <- html_form(test)[[1]]
f1 <- set_values(f0, entry.564397473 = "abc")
}
rvest/man/encoding.Rd 0000644 0001762 0000144 00000002456 13447431221 014255 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/encoding.R
\name{encoding}
\alias{encoding}
\alias{guess_encoding}
\alias{repair_encoding}
\title{Guess and repair faulty character encoding.}
\usage{
guess_encoding(x)
repair_encoding(x, from = NULL)
}
\arguments{
\item{x}{A character vector.}
\item{from}{The encoding that the string is actually in. If \code{NULL},
\code{guess_encoding} will be used.}
}
\description{
These functions help you respond to web pages that declare incorrect
encodings. You can use \code{guess_encoding} to figure out what
the real encoding is (and then supply that to the \code{encoding} argument of
html), or use \code{repair_encoding} to fix character vectors after the
fact.
}
\section{stringi}{
These function are wrappers around tools from the fantastic stringi
package, so you'll need to make sure to have that installed.
}
\examples{
# A file with bad encoding included in the package
path <- system.file("html-ex", "bad-encoding.html", package = "rvest")
x <- read_html(path)
x \%>\% html_nodes("p") \%>\% html_text()
guess_encoding(x)
# Two valid encodings, only one of which is correct
read_html(path, encoding = "ISO-8859-1") \%>\% html_nodes("p") \%>\% html_text()
read_html(path, encoding = "ISO-8859-2") \%>\% html_nodes("p") \%>\% html_text()
}
rvest/man/rvest-package.Rd 0000644 0001762 0000144 00000001372 13447432743 015231 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rvest-package.R
\docType{package}
\name{rvest-package}
\alias{rvest}
\alias{rvest-package}
\title{rvest: Easily Harvest (Scrape) Web Pages}
\description{
\if{html}{\figure{logo.png}{options: align='right'}}
Wrappers around the 'xml2' and 'httr' packages to
make it easy to download, then manipulate, HTML and XML.
}
\seealso{
Useful links:
\itemize{
\item \url{http://rvest.tidyverse.org/}
\item \url{https://github.com/tidyverse/rvest}
\item Report bugs at \url{https://github.com/tidyverse/rvest/issues}
}
}
\author{
\strong{Maintainer}: Hadley Wickham \email{hadley@rstudio.com}
Other contributors:
\itemize{
\item RStudio [copyright holder]
}
}
\keyword{internal}
rvest/man/figures/ 0000755 0001762 0000144 00000000000 13447431221 013635 5 ustar ligges users rvest/man/figures/logo.png 0000644 0001762 0000144 00000075230 13447431221 015312 0 ustar ligges users PNG
IHDR X? gAMA a cHRM z& |