rvest/ 0000755 0001762 0000144 00000000000 14300007360 011406 5 ustar ligges users rvest/NAMESPACE 0000644 0001762 0000144 00000003707 14210001152 012625 0 ustar ligges users # Generated by roxygen2: do not edit by hand
S3method(cookies,rvest_session)
S3method(format,rvest_field)
S3method(headers,rvest_session)
S3method(html_element,default)
S3method(html_element,rvest_session)
S3method(html_elements,default)
S3method(html_elements,rvest_session)
S3method(html_form,rvest_session)
S3method(html_form,xml_document)
S3method(html_form,xml_node)
S3method(html_form,xml_nodeset)
S3method(html_table,rvest_session)
S3method(html_table,xml_document)
S3method(html_table,xml_node)
S3method(html_table,xml_nodeset)
S3method(html_text2,xml_document)
S3method(html_text2,xml_missing)
S3method(html_text2,xml_node)
S3method(html_text2,xml_nodeset)
S3method(print,rvest_field)
S3method(print,rvest_form)
S3method(print,rvest_session)
S3method(read_html,rvest_session)
S3method(status_code,rvest_session)
export("%>%")
export(back)
export(follow_link)
export(forward)
export(google_form)
export(guess_encoding)
export(html_attr)
export(html_attrs)
export(html_children)
export(html_element)
export(html_elements)
export(html_encoding_guess)
export(html_form)
export(html_form_set)
export(html_form_submit)
export(html_name)
export(html_node)
export(html_nodes)
export(html_session)
export(html_table)
export(html_text)
export(html_text2)
export(is.session)
export(jump_to)
export(minimal_html)
export(read_html)
export(repair_encoding)
export(session)
export(session_back)
export(session_follow_link)
export(session_forward)
export(session_history)
export(session_jump_to)
export(session_submit)
export(set_values)
export(submit_form)
export(url_absolute)
export(xml_node)
export(xml_nodes)
export(xml_tag)
import(rlang)
importFrom(glue,glue)
importFrom(httr,cookies)
importFrom(httr,headers)
importFrom(httr,status_code)
importFrom(lifecycle,deprecated)
importFrom(magrittr,"%>%")
importFrom(xml2,read_html)
importFrom(xml2,url_absolute)
importFrom(xml2,xml_attr)
importFrom(xml2,xml_attrs)
importFrom(xml2,xml_children)
importFrom(xml2,xml_name)
importFrom(xml2,xml_text)
rvest/demo/ 0000755 0001762 0000144 00000000000 13775436633 012362 5 ustar ligges users rvest/demo/united.R 0000644 0001762 0000144 00000000712 13775436633 013775 0 ustar ligges users # Scrape miles from united site
library(rvest)
united <- session("http://www.united.com/")
login <- united %>%
html_element("form[name=LoginForm]") %>%
html_form() %>%
html_form_set(
MpNumber = "GY797363",
Password = password
)
logged_in <- united %>% session_submit(login)
logged_in %>%
follow_link("View account") %>%
html_element("#ctl00_ContentInfo_AccountSummary_spanEliteMilesNew") %>%
html_text() %>%
readr::parse_number()
rvest/demo/zillow.R 0000644 0001762 0000144 00000001514 13775423476 014027 0 ustar ligges users # Inspired by https://github.com/notesofdabbler
library(rvest)
library(tidyr)
page <- read_html("http://www.zillow.com/homes/for_sale/Greenwood-IN/fsba,fsbo,fore,cmsn_lt/house_type/52333_rid/39.638414,-86.011362,39.550714,-86.179419_rect/12_zm/0_mmm/")
houses <- page %>%
html_elements(".photo-cards li article")
z_id <- houses %>% html_attr("id")
address <- houses %>%
html_element(".zsg-photo-card-address") %>%
html_text()
price <- houses %>%
html_element(".zsg-photo-card-price") %>%
html_text() %>%
readr::parse_number()
params <- houses %>%
html_element(".zsg-photo-card-info") %>%
html_text() %>%
strsplit("\u00b7")
beds <- params %>% purrr::map_chr(1) %>% readr::parse_number()
baths <- params %>% purrr::map_chr(2) %>% readr::parse_number()
house_area <- params %>% purrr::map_chr(3) %>% readr::parse_number()
rvest/demo/00Index 0000644 0001762 0000144 00000000217 13767413737 013515 0 ustar ligges users united Scrape mileage details from united.com
tripadvisor Scrape review data from tripadvisor
zillow Scrape housing info from zillow
rvest/demo/tripadvisor.R 0000644 0001762 0000144 00000001561 13775423515 015051 0 ustar ligges users # Inspired by
# http://notesofdabbler.github.io/201408_hotelReview/scrapeTripAdvisor.html
library(rvest)
url <- "http://www.tripadvisor.com/Hotel_Review-g37209-d1762915-Reviews-JW_Marriott_Indianapolis-Indianapolis_Indiana.html"
reviews <- url %>%
read_html() %>%
html_elements("#REVIEWS .innerBubble")
id <- reviews %>%
html_element(".quote a") %>%
html_attr("id")
quote <- reviews %>%
html_element(".quote span") %>%
html_text()
rating <- reviews %>%
html_element(".rating .rating_s_fill") %>%
html_attr("alt") %>%
gsub(" of 5 stars", "", .) %>%
as.integer()
date <- reviews %>%
html_element(".rating .ratingDate") %>%
html_attr("title") %>%
strptime("%b %d, %Y") %>%
as.POSIXct()
review <- reviews %>%
html_element(".entry .partial_entry") %>%
html_text()
data.frame(id, quote, rating, date, review, stringsAsFactors = FALSE) %>% View()
rvest/LICENSE 0000644 0001762 0000144 00000000053 13767413737 012442 0 ustar ligges users YEAR: 2020
COPYRIGHT HOLDER: rvest authors
rvest/README.md 0000644 0001762 0000144 00000010542 14210001042 012656 0 ustar ligges users
# rvest
[](https://cran.r-project.org/package=rvest)
[](https://github.com/tidyverse/rvest/actions/workflows/R-CMD-check.yaml)
[](https://app.codecov.io/gh/tidyverse/rvest?branch=main)
## Overview
rvest helps you scrape (or harvest) data from web pages. It is designed
to work with [magrittr](https://github.com/tidyverse/magrittr) to make
it easy to express common web scraping tasks, inspired by libraries like
[beautiful soup](https://www.crummy.com/software/BeautifulSoup/) and
[RoboBrowser](http://robobrowser.readthedocs.io/en/latest/readme.html).
If you’re scraping multiple pages, I highly recommend using rvest in
concert with [polite](https://dmi3kno.github.io/polite/). The polite
package ensures that you’re respecting the
[robots.txt](https://en.wikipedia.org/wiki/Robots_exclusion_standard)
and not hammering the site with too many requests.
## Installation
``` r
# The easiest way to get rvest is to install the whole tidyverse:
install.packages("tidyverse")
# Alternatively, install just rvest:
install.packages("rvest")
```
## Usage
``` r
library(rvest)
# Start by reading a HTML page with read_html():
starwars <- read_html("https://rvest.tidyverse.org/articles/starwars.html")
# Then find elements that match a css selector or XPath expression
# using html_elements(). In this example, each corresponds
# to a different film
films <- starwars %>% html_elements("section")
films
#> {xml_nodeset (7)}
#> [1]
\nThe Phantom Menace\n
\n
\nReleased: 1999 ...
#> [2]
\nAttack of the Clones\n
\n
\nReleased: 20 ...
#> [3]
\nRevenge of the Sith\n
\n
\nReleased: 200 ...
#> [4]
\nA New Hope\n
\n
\nReleased: 1977-05-25\n ...
#> [5]
\nThe Empire Strikes Back\n
\n
\nReleased: ...
#> [6]
\nReturn of the Jedi\n
\n
\nReleased: 1983 ...
#> [7]
\nThe Force Awakens\n
\n
\nReleased: 2015- ...
# Then use html_element() to extract one element per film. Here
# we the title is given by the text inside
title <- films %>%
html_element("h2") %>%
html_text2()
title
#> [1] "The Phantom Menace" "Attack of the Clones"
#> [3] "Revenge of the Sith" "A New Hope"
#> [5] "The Empire Strikes Back" "Return of the Jedi"
#> [7] "The Force Awakens"
# Or use html_attr() to get data out of attributes. html_attr() always
# returns a string so we convert it to an integer using a readr function
episode <- films %>%
html_element("h2") %>%
html_attr("data-id") %>%
readr::parse_integer()
episode
#> [1] 1 2 3 4 5 6 7
```
If the page contains tabular data you can convert it directly to a data
frame with `html_table()`:
``` r
html <- read_html("https://en.wikipedia.org/w/index.php?title=The_Lego_Movie&oldid=998422565")
html %>%
html_element(".tracklist") %>%
html_table()
#> # A tibble: 29 × 4
#> No. Title `Performer(s)` Length
#>
#> 1 1. "\"Everything Is Awesome\"" "Tegan and Sara featuring The Lonel… 2:43
#> 2 2. "\"Prologue\"" "" 2:28
#> 3 3. "\"Emmett's Morning\"" "" 2:00
#> 4 4. "\"Emmett Falls in Love\"" "" 1:11
#> 5 5. "\"Escape\"" "" 3:26
#> 6 6. "\"Into the Old West\"" "" 1:00
#> 7 7. "\"Wyldstyle Explains\"" "" 1:21
#> 8 8. "\"Emmett's Mind\"" "" 2:17
#> 9 9. "\"The Transformation\"" "" 1:46
#> 10 10. "\"Saloons and Wagons\"" "" 3:38
#> # … with 19 more rows
```
rvest/man/ 0000755 0001762 0000144 00000000000 14210000313 012150 5 ustar ligges users rvest/man/html_text.Rd 0000644 0001762 0000144 00000005046 13775651250 014507 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/text.R
\name{html_text}
\alias{html_text}
\alias{html_text2}
\title{Get element text}
\usage{
html_text(x, trim = FALSE)
html_text2(x, preserve_nbsp = FALSE)
}
\arguments{
\item{x}{A document, node, or node set.}
\item{trim}{If \code{TRUE} will trim leading and trailing spaces.}
\item{preserve_nbsp}{Should non-breaking spaces be preserved? By default,
\code{html_text2()} converts to ordinary spaces to ease further computation.
When \code{preserve_nbsp} is \code{TRUE}, \verb{ } will appear in strings as
\code{"\\ua0"}. This often causes confusion because it prints the same way as
\code{" "}.}
}
\value{
A character vector the same length as \code{x}
}
\description{
There are two ways to retrieve text from a element: \code{html_text()} and
\code{html_text2()}. \code{html_text()} is a thin wrapper around \code{\link[xml2:xml_text]{xml2::xml_text()}}
which returns just the raw underlying text. \code{html_text2()} simulates how
text looks in a browser, using an approach inspired by JavaScript's
\href{https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/innerText}{innerText()}.
Roughly speaking, it converts \verb{ } to \code{"\\n"}, adds blank lines
around \verb{
} tags, and lightly formats tabular data.
\code{html_text2()} is usually what you want, but it is much slower than
\code{html_text()} so for simple applications where performance is important
you may want to use \code{html_text()} instead.
}
\examples{
# To understand the difference between html_text() and html_text2()
# take the following html:
html <- minimal_html(
"
This is a paragraph.
This another sentence. This should start on a new line"
)
# html_text() returns the raw underlying text, which includes whitespace
# that would be ignored by a browser, and ignores the
html \%>\% html_element("p") \%>\% html_text() \%>\% writeLines()
# html_text2() simulates what a browser would display. Non-significant
# whitespace is collapsed, and is turned into a line break
html \%>\% html_element("p") \%>\% html_text2() \%>\% writeLines()
# By default, html_text2() also converts non-breaking spaces to regular
# spaces:
html <- minimal_html("
x y
")
x1 <- html \%>\% html_element("p") \%>\% html_text()
x2 <- html \%>\% html_element("p") \%>\% html_text2()
# When printed, non-breaking spaces look exactly like regular spaces
x1
x2
# But aren't actually the same:
x1 == x2
# Which you can confirm by looking at their underlying binary
# representaion:
charToRaw(x1)
charToRaw(x2)
}
rvest/man/google_form.Rd 0000644 0001762 0000144 00000000470 14101012310 014736 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{google_form}
\alias{google_form}
\title{Make link to google form given id}
\usage{
google_form(x)
}
\arguments{
\item{x}{Unique identifier for form}
}
\description{
Make link to google form given id
}
\keyword{internal}
rvest/man/html_form.Rd 0000644 0001762 0000144 00000004261 14277722126 014463 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/form.R
\name{html_form}
\alias{html_form}
\alias{html_form_set}
\alias{html_form_submit}
\title{Parse forms and set values}
\usage{
html_form(x, base_url = NULL)
html_form_set(form, ...)
html_form_submit(form, submit = NULL)
}
\arguments{
\item{x}{A document (from \code{\link[=read_html]{read_html()}}), node set (from \code{\link[=html_elements]{html_elements()}}),
node (from \code{\link[=html_element]{html_element()}}), or session (from \code{\link[=session]{session()}}).}
\item{base_url}{Base url of underlying HTML document. The default, \code{NULL},
uses the url of the HTML document underlying \code{x}.}
\item{form}{A form}
\item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Name-value pairs giving
fields to modify.
Provide a character vector to set multiple checkboxes in a set or
select multiple values from a multi-select.}
\item{submit}{Which button should be used to submit the form?
\itemize{
\item \code{NULL}, the default, uses the first button.
\item A string selects a button by its name.
\item A number selects a button using its relative position.
}}
}
\value{
\itemize{
\item \code{html_form()} returns as S3 object with class \code{rvest_form} when applied
to a single element. It returns a list of \code{rvest_form} objects when
applied to multiple elements or a document.
\item \code{html_form_set()} returns an \code{rvest_form} object.
\item \code{html_form_submit()} submits the form, returning an httr response which
can be parsed with \code{\link[=read_html]{read_html()}}.
}
}
\description{
Use \code{html_form()} to extract a form, set values with \code{html_form_set()},
and submit it with \code{html_form_submit()}.
}
\examples{
html <- read_html("http://www.google.com")
search <- html_form(html)[[1]]
search <- search \%>\% html_form_set(q = "My little pony", hl = "fr")
# Or if you have a list of values, use !!!
vals <- list(q = "web scraping", hl = "en")
search <- search \%>\% html_form_set(!!!vals)
# To submit and get result:
\dontrun{
resp <- html_form_submit(search)
read_html(resp)
}
}
\seealso{
HTML 4.01 form specification:
\url{https://www.w3.org/TR/html401/interact/forms.html}
}
rvest/man/rename.Rd 0000644 0001762 0000144 00000003256 14014035320 013724 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rename.R
\name{rename}
\alias{set_values}
\alias{submit_form}
\alias{xml_tag}
\alias{xml_node}
\alias{xml_nodes}
\alias{html_nodes}
\alias{html_node}
\alias{back}
\alias{forward}
\alias{jump_to}
\alias{follow_link}
\alias{html_session}
\title{Functions renamed in rvest 1.0.0}
\usage{
set_values(form, ...)
submit_form(session, form, submit = NULL, ...)
xml_tag(x)
xml_node(...)
xml_nodes(...)
html_nodes(...)
html_node(...)
back(x)
forward(x)
jump_to(x, url, ...)
follow_link(x, ...)
html_session(url, ...)
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
rvest 1.0.0 renamed a number of functions to ensure that every function
has a common prefix, matching tidyverse conventions that emerged since
rvest was first created.
\itemize{
\item \code{set_values()} -> \code{html_form_set()}
\item \code{submit_form()} -> \code{session_submit()}
\item \code{xml_tag()} -> \code{html_name()}
\item \code{xml_node()} & \code{html_node()} -> \code{html_element()}
\item \code{xml_nodes()} & \code{html_nodes()} -> \code{html_elements()}
}
(\code{html_node()} and \code{html_nodes()} are only superseded because they're
so widely used.)
Additionally all session related functions gained a common prefix:
\itemize{
\item \code{html_session()} -> \code{session()}
\item \code{forward()} -> \code{session_forward()}
\item \code{back()} -> \code{session_back()}
\item \code{jump_to()} -> \code{session_jump_to()}
\item \code{follow_link()} -> \code{session_follow_link()}
}
}
\keyword{internal}
rvest/man/minimal_html.Rd 0000644 0001762 0000144 00000000663 13775437157 015161 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{minimal_html}
\alias{minimal_html}
\title{Create an HTML document from inline HTML}
\usage{
minimal_html(html, title = "")
}
\arguments{
\item{html}{HTML contents of page.}
\item{title}{Page title (required by HTML spec).}
}
\description{
Create an HTML document from inline HTML
}
\examples{
minimal_html("
test
")
}
\keyword{internal}
rvest/man/html_name.Rd 0000644 0001762 0000144 00000001256 13776122153 014436 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/html.R
\name{html_name}
\alias{html_name}
\title{Get element name}
\usage{
html_name(x)
}
\arguments{
\item{x}{A document (from \code{\link[=read_html]{read_html()}}), node set (from \code{\link[=html_elements]{html_elements()}}),
node (from \code{\link[=html_element]{html_element()}}), or session (from \code{\link[=session]{session()}}).}
}
\value{
A character vector the same length as \code{x}
}
\description{
Get element name
}
\examples{
url <- "https://rvest.tidyverse.org/articles/starwars.html"
html <- read_html(url)
html \%>\%
html_element("div") \%>\%
html_children() \%>\%
html_name()
}
rvest/man/html_attr.Rd 0000644 0001762 0000144 00000002423 14132341320 014447 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/html.R
\name{html_attr}
\alias{html_attr}
\alias{html_attrs}
\title{Get element attributes}
\usage{
html_attr(x, name, default = NA_character_)
html_attrs(x)
}
\arguments{
\item{x}{A document (from \code{\link[=read_html]{read_html()}}), node set (from \code{\link[=html_elements]{html_elements()}}),
node (from \code{\link[=html_element]{html_element()}}), or session (from \code{\link[=session]{session()}}).}
\item{name}{Name of attribute to retrieve.}
\item{default}{A string used as a default value when the attribute does
not exist in every element.}
}
\value{
A character vector (for \code{html_attr()}) or list (\code{html_attrs()})
the same length as \code{x}.
}
\description{
\code{html_attr()} gets a single attribute; \code{html_attrs()} gets all attributes.
}
\examples{
html <- minimal_html('
')
html \%>\% html_elements("a") \%>\% html_attrs()
html \%>\% html_elements("a") \%>\% html_attr("href")
html \%>\% html_elements("li") \%>\% html_attr("class")
html \%>\% html_elements("li") \%>\% html_attr("class", default = "inactive")
}
rvest/man/html_table.Rd 0000644 0001762 0000144 00000004665 14007274024 014605 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/table.R
\name{html_table}
\alias{html_table}
\title{Parse an html table into a data frame}
\usage{
html_table(
x,
header = NA,
trim = TRUE,
fill = deprecated(),
dec = ".",
na.strings = "NA",
convert = TRUE
)
}
\arguments{
\item{x}{A document (from \code{\link[=read_html]{read_html()}}), node set (from \code{\link[=html_elements]{html_elements()}}),
node (from \code{\link[=html_element]{html_element()}}), or session (from \code{\link[=session]{session()}}).}
\item{header}{Use first row as header? If \code{NA}, will use first row
if it consists of \verb{
} tags.
If \code{TRUE}, column names are left exactly as they are in the source
document, which may require post-processing to generate a valid data
frame.}
\item{trim}{Remove leading and trailing whitespace within each cell?}
\item{fill}{Deprecated - missing cells in tables are now always
automatically filled with \code{NA}.}
\item{dec}{The character used as decimal place marker.}
\item{na.strings}{Character vector of values that will be converted to \code{NA}
if \code{convert} is \code{TRUE}.}
\item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} to interpret texts as
integer, double, or \code{NA}.}
}
\value{
When applied to a single element, \code{html_table()} returns a single tibble.
When applied to multiple elements or a document, \code{html_table()} returns
a list of tibbles.
}
\description{
The algorithm mimics what a browser does, but repeats the values of merged
cells in every cell that cover.
}
\examples{
sample1 <- minimal_html("
Col A
Col B
1
x
4
y
10
z
")
sample1 \%>\%
html_element("table") \%>\%
html_table()
# Values in merged cells will be duplicated
sample2 <- minimal_html("
A
B
C
1
2
3
4
5
6
7
")
sample2 \%>\%
html_element("table") \%>\%
html_table()
# If a row is missing cells, they'll be filled with NAs
sample3 <- minimal_html("
A
B
C
1
2
3
4
")
sample3 \%>\%
html_element("table") \%>\%
html_table()
}
rvest/man/html_element.Rd 0000644 0001762 0000144 00000006072 14277722126 015153 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/selectors.R
\name{html_element}
\alias{html_element}
\alias{html_elements}
\title{Select elements from an HTML document}
\usage{
html_element(x, css, xpath)
html_elements(x, css, xpath)
}
\arguments{
\item{x}{Either a document, a node set or a single node.}
\item{css, xpath}{Elements to select. Supply one of \code{css} or \code{xpath}
depending on whether you want to use a CSS selector or XPath 1.0
expression.}
}
\value{
\code{html_element()} returns a nodeset the same length as the input.
\code{html_elements()} flattens the output so there's no direct way to map
the output to the input.
}
\description{
\code{html_element()} and \code{html_elements()} find HTML element using CSS selectors
or XPath expressions. CSS selectors are particularly useful in conjunction
with \url{https://selectorgadget.com/}, which makes it very easy to discover the
selector you need.
}
\section{CSS selector support}{
CSS selectors are translated to XPath selectors by the \pkg{selectr}
package, which is a port of the python \pkg{cssselect} library,
\url{https://pythonhosted.org/cssselect/}.
It implements the majority of CSS3 selectors, as described in
\url{https://www.w3.org/TR/2011/REC-css3-selectors-20110929/}. The
exceptions are listed below:
\itemize{
\item Pseudo selectors that require interactivity are ignored:
\verb{:hover}, \verb{:active}, \verb{:focus}, \verb{:target}, \verb{:visited}.
\item The following pseudo classes don't work with the wild card element, *:
\verb{*:first-of-type}, \verb{*:last-of-type}, \verb{*:nth-of-type},
\verb{*:nth-last-of-type}, \verb{*:only-of-type}
\item It supports \verb{:contains(text)}
\item You can use !=, \verb{[foo!=bar]} is the same as \verb{:not([foo=bar])}
\item \verb{:not()} accepts a sequence of simple selectors, not just a single
simple selector.
}
}
\examples{
html <- minimal_html("
This is a heading
This is a paragraph
This is an important paragraph
")
html \%>\% html_element("h1")
html \%>\% html_elements("p")
html \%>\% html_elements(".important")
html \%>\% html_elements("#first")
# html_element() vs html_elements() --------------------------------------
html <- minimal_html("
C-3PO is a droid that weighs 167 kg
R2-D2 is a droid that weighs 96 kg
Yoda weighs 66 kg
R4-P17 is a droid
")
li <- html \%>\% html_elements("li")
# When applied to a node set, html_elements() returns all matching elements
# beneath any of the inputs, flattening results into a new node set.
li \%>\% html_elements("i")
# When applied to a node set, html_element() always returns a vector the
# same length as the input, using a "missing" element where needed.
li \%>\% html_element("i")
# and html_text() and html_attr() will return NA
li \%>\% html_element("i") \%>\% html_text2()
li \%>\% html_element("span") \%>\% html_attr("class")
}
rvest/man/repair_encoding.Rd 0000644 0001762 0000144 00000001231 14014035320 015574 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/encoding.R
\name{repair_encoding}
\alias{repair_encoding}
\title{Repair faulty encoding}
\usage{
repair_encoding(x, from = NULL)
}
\arguments{
\item{from}{The encoding that the string is actually in. If \code{NULL},
\code{guess_encoding} will be used.}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}
This function has been deprecated because it doesn't work. Instead
re-read the HTML file with correct \code{encoding} argument.
}
\keyword{internal}
rvest/man/rvest-package.Rd 0000644 0001762 0000144 00000001435 14277721661 015233 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rvest-package.R
\docType{package}
\name{rvest-package}
\alias{rvest}
\alias{rvest-package}
\title{rvest: Easily Harvest (Scrape) Web Pages}
\description{
\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
Wrappers around the 'xml2' and 'httr' packages to make it easy to download, then manipulate, HTML and XML.
}
\seealso{
Useful links:
\itemize{
\item \url{https://rvest.tidyverse.org/}
\item \url{https://github.com/tidyverse/rvest}
\item Report bugs at \url{https://github.com/tidyverse/rvest/issues}
}
}
\author{
\strong{Maintainer}: Hadley Wickham \email{hadley@rstudio.com}
Other contributors:
\itemize{
\item RStudio [copyright holder, funder]
}
}
\keyword{internal}
rvest/man/figures/ 0000755 0001762 0000144 00000000000 13767413737 013656 5 ustar ligges users rvest/man/figures/lifecycle-defunct.svg 0000644 0001762 0000144 00000001704 14210000474 017734 0 ustar ligges users