dimRed/ 0000755 0001762 0000144 00000000000 14153365332 011463 5 ustar ligges users dimRed/NAMESPACE 0000644 0001762 0000144 00000004574 14153217466 012720 0 ustar ligges users # Generated by roxygen2: do not edit by hand
export(AUC_lnK_R_NX)
export(AutoEncoder)
export(DRR)
export(DiffusionMaps)
export(DrL)
export(FastICA)
export(FruchtermanReingold)
export(HLLE)
export(Isomap)
export(KamadaKawai)
export(LCMC)
export(LLE)
export(LaplacianEigenmaps)
export(MDS)
export(NNMF)
export(PCA)
export(PCA_L1)
export(Q_NX)
export(Q_global)
export(Q_local)
export(R_NX)
export(UMAP)
export(dataSetList)
export(dimRedData)
export(dimRedMethodList)
export(dimRedQualityList)
export(dimRedResult)
export(distance_correlation)
export(embed)
export(getRotationMatrix)
export(installSuggests)
export(inverse)
export(kPCA)
export(loadDataSet)
export(mean_R_NX)
export(mixColor1Ramps)
export(mixColor2Ramps)
export(mixColor3Ramps)
export(mixColorRamps)
export(nMDS)
export(plot)
export(plot_R_NX)
export(predict)
export(quality)
export(reconstruction_error)
export(reconstruction_rmse)
export(tSNE)
export(total_correlation)
exportClasses(AutoEncoder)
exportClasses(DRR)
exportClasses(DiffusionMaps)
exportClasses(DrL)
exportClasses(FastICA)
exportClasses(FruchtermanReingold)
exportClasses(HLLE)
exportClasses(Isomap)
exportClasses(KamadaKawai)
exportClasses(LLE)
exportClasses(LaplacianEigenmaps)
exportClasses(MDS)
exportClasses(NNMF)
exportClasses(PCA)
exportClasses(PCA_L1)
exportClasses(UMAP)
exportClasses(dimRedData)
exportClasses(dimRedMethod)
exportClasses(dimRedResult)
exportClasses(kPCA)
exportClasses(nMDS)
exportClasses(tSNE)
exportMethods("[")
exportMethods(AUC_lnK_R_NX)
exportMethods(LCMC)
exportMethods(Q_NX)
exportMethods(Q_global)
exportMethods(Q_local)
exportMethods(R_NX)
exportMethods(as.data.frame)
exportMethods(as.dimRedData)
exportMethods(cophenetic_correlation)
exportMethods(distance_correlation)
exportMethods(embed)
exportMethods(getData)
exportMethods(getDimRedData)
exportMethods(getMeta)
exportMethods(getNDim)
exportMethods(getOrgData)
exportMethods(getOtherData)
exportMethods(getPars)
exportMethods(inverse)
exportMethods(maximize_correlation)
exportMethods(mean_R_NX)
exportMethods(ndims)
exportMethods(nrow)
exportMethods(plot)
exportMethods(predict)
exportMethods(print)
exportMethods(quality)
exportMethods(reconstruction_error)
exportMethods(reconstruction_rmse)
exportMethods(total_correlation)
import(DRR)
import(methods)
import(utils)
importFrom(grDevices,colorRamp)
importFrom(grDevices,rgb)
importFrom(graphics,plot)
importFrom(magrittr,"%>%")
importFrom(stats,predict)
dimRed/LICENSE 0000644 0001762 0000144 00000076745 12772463050 012514 0 ustar ligges users GNU GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright © 2007 Free Software Foundation, Inc.
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed. Preamble
The GNU General Public License is a free, copyleft license for
software and other kinds of works.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
the GNU General Public License is intended to guarantee your freedom
to share and change all versions of a program--to make sure it remains
free software for all its users. We, the Free Software Foundation, use
the GNU General Public License for most of our software; it applies
also to any other work released this way by its authors. You can apply
it to your programs, too.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
To protect your rights, we need to prevent others from denying you
these rights or asking you to surrender the rights. Therefore, you
have certain responsibilities if you distribute copies of the
software, or if you modify it: responsibilities to respect the freedom
of others.
For example, if you distribute copies of such a program, whether
gratis or for a fee, you must pass on to the recipients the same
freedoms that you received. You must make sure that they, too, receive
or can get the source code. And you must show them these terms so they
know their rights.
Developers that use the GNU GPL protect your rights with two steps:
(1) assert copyright on the software, and (2) offer you this License
giving you legal permission to copy, distribute and/or modify it.
For the developers' and authors' protection, the GPL clearly explains
that there is no warranty for this free software. For both users' and
authors' sake, the GPL requires that modified versions be marked as
changed, so that their problems will not be attributed erroneously to
authors of previous versions.
Some devices are designed to deny users access to install or run
modified versions of the software inside them, although the
manufacturer can do so. This is fundamentally incompatible with the
aim of protecting users' freedom to change the software. The
systematic pattern of such abuse occurs in the area of products for
individuals to use, which is precisely where it is most
unacceptable. Therefore, we have designed this version of the GPL to
prohibit the practice for those products. If such problems arise
substantially in other domains, we stand ready to extend this
provision to those domains in future versions of the GPL, as needed to
protect the freedom of users.
Finally, every program is threatened constantly by software
patents. States should not allow patents to restrict development and
use of software on general-purpose computers, but in those that do, we
wish to avoid the special danger that patents applied to a free
program could make it effectively proprietary. To prevent this, the
GPL assures that patents cannot be used to render the program
non-free.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS 0. Definitions.
“This License” refers to version 3 of the GNU General Public License.
“Copyright” also means copyright-like laws that apply to other kinds
of works, such as semiconductor masks.
“The Program” refers to any copyrightable work licensed under this
License. Each licensee is addressed as “you”. “Licensees” and
“recipients” may be individuals or organizations.
To “modify” a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of
an exact copy. The resulting work is called a “modified version” of
the earlier work or a work “based on” the earlier work.
A “covered work” means either the unmodified Program or a work based
on the Program.
To “propagate” a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To “convey” a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user
through a computer network, with no transfer of a copy, is not
conveying.
An interactive user interface displays “Appropriate Legal Notices” to
the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The “source code” for a work means the preferred form of the work for
making modifications to it. “Object code” means any non-source form of
a work.
A “Standard Interface” means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The “System Libraries” of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
“Major Component”, in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The “Corresponding Source” for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users can
regenerate automatically from other parts of the Corresponding Source.
The Corresponding Source for a work in source code form is that same
work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not convey,
without conditions so long as your license otherwise remains in
force. You may convey covered works to others for the sole purpose of
having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under the
conditions stated below. Sublicensing is not allowed; section 10 makes
it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such
circumvention is effected by exercising rights under this License with
respect to the covered work, and you disclaim any intention to limit
operation or modification of the work as a means of enforcing, against
the work's users, your or third parties' legal rights to forbid
circumvention of technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these
conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to “keep
intact all notices”.
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
“aggregate” if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms of
sections 4 and 5, provided that you also convey the machine-readable
Corresponding Source under the terms of this License, in one of these
ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the Corresponding
Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission,
provided you inform other peers where the object code and
Corresponding Source of the work are being offered to the general
public at no charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A “User Product” is either (1) a “consumer product”, which means any
tangible personal property which is normally used for personal,
family, or household purposes, or (2) anything designed or sold for
incorporation into a dwelling. In determining whether a product is a
consumer product, doubtful cases shall be resolved in favor of
coverage. For a particular product received by a particular user,
“normally used” refers to a typical or common use of that class of
product, regardless of the status of the particular user or of the way
in which the particular user actually uses, or expects or is expected
to use, the product. A product is a consumer product regardless of
whether the product has substantial commercial, industrial or
non-consumer uses, unless such uses represent the only significant
mode of use of the product.
“Installation Information” for a User Product means any methods,
procedures, authorization keys, or other information required to
install and execute modified versions of a covered work in that User
Product from a modified version of its Corresponding Source. The
information must suffice to ensure that the continued functioning of
the modified object code is in no case prevented or interfered with
solely because modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or
updates for a work that has been modified or installed by the
recipient, or for the User Product in which it has been modified or
installed. Access to a network may be denied when the modification
itself materially and adversely affects the operation of the network
or violates the rules and protocols for communication across the
network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
“Additional permissions” are terms that supplement the terms of this
License by making exceptions from one or more of its
conditions. Additional permissions that are applicable to the entire
Program shall be treated as though they were included in this License,
to the extent that they are valid under applicable law. If additional
permissions apply only to part of the Program, that part may be used
separately under those permissions, but the entire Program remains
governed by this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders
of that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material,
or requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors
or authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions
of it) with contractual assumptions of liability to the recipient,
for any liability that these contractual assumptions directly
impose on those licensors and authors.
All other non-permissive additional terms are considered “further
restrictions” within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions; the
above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your license
from a particular copyright holder is reinstated (a) provisionally,
unless and until the copyright holder explicitly and finally
terminates your license, and (b) permanently, if the copyright holder
fails to notify you of the violation by some reasonable means prior to
60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or run
a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An “entity transaction” is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A “contributor” is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's “contributor version”.
A contributor's “essential patent claims” are all patent claims owned
or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, “control” includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a “patent license” is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To “grant” such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. “Knowingly relying” means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is “discriminatory” if it does not include within the
scope of its coverage, prohibits the exercise of, or is conditioned on
the non-exercise of one or more of the rights that are specifically
granted under this License. You may not convey a covered work if you
are a party to an arrangement with a third party that is in the
business of distributing software, under which you make payment to the
third party based on the extent of your activity of conveying the
work, and under which the third party grants, to any of the parties
who would receive the covered work from you, a discriminatory patent
license (a) in connection with copies of the covered work conveyed by
you (or copies made from those copies), or (b) primarily for and in
connection with specific products or compilations that contain the
covered work, unless you entered into that arrangement, or that patent
license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under
this License and any other pertinent obligations, then as a
consequence you may not convey it at all. For example, if you agree to
terms that obligate you to collect a royalty for further conveying
from those to whom you convey the Program, the only way you could
satisfy both those terms and this License would be to refrain entirely
from conveying the Program.
13. Use with the GNU Affero General Public License.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU Affero General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the special requirements of the GNU Affero General Public License,
section 13, concerning interaction through a network will apply to the
combination as such.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions
of the GNU General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in
detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Program
specifies that a certain numbered version of the GNU General Public
License “or any later version” applies to it, you have the option of
following the terms and conditions either of that numbered version or
of any later version published by the Free Software Foundation. If the
Program does not specify a version number of the GNU General Public
License, you may choose any version ever published by the Free
Software Foundation.
If the Program specifies that a proxy can decide which future versions
of the GNU General Public License can be used, that proxy's public
statement of acceptance of a version permanently authorizes you to
choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT
WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
CORRECTION.
16.Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee. dimRed/man/ 0000755 0001762 0000144 00000000000 13753034327 012240 5 ustar ligges users dimRed/man/R_NX-dimRedResult-method.Rd 0000644 0001762 0000144 00000002440 13753034327 017214 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{R_NX,dimRedResult-method}
\alias{R_NX,dimRedResult-method}
\alias{R_NX}
\title{Method R_NX}
\usage{
\S4method{R_NX}{dimRedResult}(object, ndim = getNDim(object))
}
\arguments{
\item{object}{of class dimRedResult}
\item{ndim}{the number of dimensions to take from the embedded data.}
}
\description{
Calculate the R_NX score from Lee et. al. (2013) which shows the neighborhood
preservation for the Kth nearest neighbors, corrected for random point
distributions and scaled to range [0, 1].
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/Isomap-class.Rd 0000644 0001762 0000144 00000005711 13753034327 015066 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/isomap.R
\docType{class}
\name{Isomap-class}
\alias{Isomap-class}
\alias{Isomap}
\title{Isomap embedding}
\description{
An S4 Class implementing the Isomap Algorithm
}
\details{
The Isomap algorithm approximates a manifold using geodesic
distances on a k nearest neighbor graph. Then classical scaling is
performed on the resulting distance matrix.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
Isomap can take the following parameters:
\describe{
\item{knn}{The number of nearest neighbors in the graph. Defaults to 50.}
\item{ndim}{The number of embedding dimensions, defaults to 2.}
\item{get_geod}{Should the geodesic distance matrix be kept,
if \code{TRUE}, access it as \code{getOtherData(x)$geod}}
}
}
\section{Implementation}{
The dimRed package uses its own implementation of Isomap which also
comes with an out of sample extension (known as landmark
Isomap). The default Isomap algorithm scales computationally not
very well, the implementation here uses \code{\link[RANN]{nn2}} for
a faster search of the nearest neighbors. If data are too large it
may be useful to fit a subsample of the data and use the
out-of-sample extension for the other points.
}
\examples{
dat <- loadDataSet("3D S Curve", n = 500)
emb <- embed(dat, "Isomap", knn = 10)
plot(emb)
## or simpler, use embed():
samp <- sample(nrow(dat), size = 200)
emb2 <- embed(dat[samp], "Isomap", .mute = NULL, knn = 10)
emb3 <- predict(emb2, dat[-samp])
plot(emb2, type = "2vars")
plot(emb3, type = "2vars")
}
\references{
Tenenbaum, J.B., Silva, V. de, Langford, J.C., 2000. A Global Geometric
Framework for Nonlinear Dimensionality Reduction. Science 290, 2319-2323.
https://doi.org/10.1126/science.290.5500.2319
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/AutoEncoder-class.Rd 0000644 0001762 0000144 00000012463 14153166463 016052 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/autoencoder.R
\docType{class}
\name{AutoEncoder-class}
\alias{AutoEncoder-class}
\alias{AutoEncoder}
\title{AutoEncoder}
\description{
An S4 Class implementing an Autoencoder
}
\details{
Autoencoders are neural networks that try to reproduce their input. Consider
this method unstable, as the internals may still be changed.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
Autoencoder can take the following parameters:
\describe{
\item{ndim}{The number of dimensions for reduction.}
\item{n_hidden}{The number of neurons in the hidden
layers, the length specifies the number of layers,
the length must be impair, the middle number must
be the same as ndim.}
\item{activation}{The activation functions for the layers,
one of "tanh", "sigmoid", "relu", "elu", everything
else will silently be ignored and there will be no
activation function for the layer.}
\item{weight_decay}{the coefficient for weight decay,
set to 0 if no weight decay desired.}
\item{learning_rate}{The learning rate for gradient descend}
\item{graph}{Optional: A list of bits and pieces that define the
autoencoder in tensorflow, see details.}
\item{keras_graph}{Optional: A list of keras layers that define
the encoder and decoder, specifying this, will ignore all
other topology related variables, see details.}
\item{batchsize}{If NA, all data will be used for training,
else only a random subset of size batchsize will be used}
\item{n_steps}{the number of training steps.}
}
}
\section{Details}{
There are several ways to specify an autoencoder, the simplest is to pass the
number of neurons per layer in \code{n_hidden}, this must be a vector of
integers of impair length and it must be symmetric and the middle number must
be equal to \code{ndim}, For every layer an activation function can be
specified with \code{activation}.
For regularization weight decay can be specified by setting
\code{weight_decay} > 0.
Currently only a gradient descent optimizer is used, the learning rate can be
specified by setting \code{learning_rate}.
The learner can operate on batches if \code{batchsize} is not \code{NA}.
The number of steps the learner uses is specified using \code{n_steps}.
}
\section{Further training a model}{
If the model did not converge in the first training phase or training with
different data is desired, the \code{\link{dimRedResult}} object may be
passed as \code{autoencoder} parameter; In this case all topology related
parameters will be ignored.
}
\section{Using Keras layers}{
The encoder and decoder part can be specified using a list of \pkg{keras}
layers. This requires a list with two entries, \code{encoder} should contain
a LIST of keras layers WITHOUT the \code{\link[keras]{layer_input}}
that will be concatenated in order to form the encoder part.
\code{decoder} should be
defined accordingly, the output of \code{decoder} must have the same number
of dimensions as the input data.
}
\section{Using Tensorflow}{
The model can be entirely defined in \pkg{tensorflow}, it must contain a
list with the following entries:
\describe{
\item{encoder}{A tensor that defines the encoder.}
\item{decoder}{A tensor that defines the decoder.}
\item{network}{A tensor that defines the reconstruction (encoder + decoder).}
\item{loss}{A tensor that calculates the loss (network + loss function).}
\item{in_data}{A \code{placeholder} that points to the data input of
the network AND the encoder.}
\item{in_decoder}{A \code{placeholder} that points to the input of
the decoder.}
\item{session}{A \pkg{tensorflow} \code{Session} object that holds
the values of the tensors.}
}
}
\section{Implementation}{
Uses \pkg{tensorflow} as a backend, for details an
problems relating tensorflow, see \url{https://tensorflow.rstudio.com}.
}
\examples{
\dontrun{
dat <- loadDataSet("3D S Curve")
emb <- embed(dat, "AutoEncoder")
# predicting is possible:
samp <- sample(floor(nrow(dat) / 10))
emb2 <- embed(dat[samp])
emb3 <- predict(emb2, dat[-samp])
plot(emb, type = "2vars")
plot(emb2, type = "2vars")
points(getData(emb3))
}
}
\seealso{
Other dimensionality reduction methods:
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/mixColorRamps.Rd 0000644 0001762 0000144 00000002411 13753034327 015324 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mixColorSpaces.R
\name{mixColorRamps}
\alias{mixColorRamps}
\alias{mixColor1Ramps}
\alias{mixColor2Ramps}
\alias{mixColor3Ramps}
\title{Mixing color ramps}
\usage{
mixColorRamps(vars, ramps)
mixColor1Ramps(vars, ramps = colorRamp(c("blue", "black", "red")))
mixColor2Ramps(
vars,
ramps = list(colorRamp(c("blue", "green")), colorRamp(c("blue", "red")))
)
mixColor3Ramps(
vars,
ramps = list(colorRamp(c("#001A00", "#00E600")), colorRamp(c("#00001A", "#0000E6")),
colorRamp(c("#1A0000", "#E60000")))
)
}
\arguments{
\item{vars}{a list of variables}
\item{ramps}{a list of color ramps, one for each variable.}
}
\description{
mix different color ramps
}
\details{
automatically create colors to represent a varying number of
dimensions.
}
\examples{
cols <- expand.grid(x = seq(0, 1, length.out = 10),
y = seq(0, 1, length.out = 10),
z = seq(0, 1, length.out = 10))
mixed <- mixColor3Ramps(cols)
\dontrun{
library(rgl)
plot3d(cols$x, cols$y, cols$z, col = mixed, pch = 15)
cols <- expand.grid(x = seq(0, 1, length.out = 10),
y = seq(0, 1, length.out = 10))
mixed <- mixColor2Ramps(cols)
}
plot(cols$x, cols$y, col = mixed, pch = 15)
}
dimRed/man/mean_R_NX-dimRedResult-method.Rd 0000644 0001762 0000144 00000002151 13753034327 020213 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{mean_R_NX,dimRedResult-method}
\alias{mean_R_NX,dimRedResult-method}
\alias{mean_R_NX}
\title{Method mean_R_NX}
\usage{
\S4method{mean_R_NX}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculate the mean_R_NX score to assess the quality of a dimensionality reduction.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/getRotationMatrix.Rd 0000644 0001762 0000144 00000001521 13371631672 016214 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/get_info.R
\name{getRotationMatrix}
\alias{getRotationMatrix}
\title{getRotationMatrix}
\usage{
getRotationMatrix(x)
}
\arguments{
\item{x}{of type \code{\link{dimRedResult}}}
}
\value{
a matrix
}
\description{
Extract the rotation matrix from \code{\link{dimRedResult}} objects derived from PCA and FastICA
}
\details{
The data has to be pre-processed the same way as the method does, e.g.
centering and/or scaling.
}
\examples{
dat <- loadDataSet("Iris")
pca <- embed(dat, "PCA")
ica <- embed(dat, "FastICA")
rot_pca <- getRotationMatrix(pca)
rot_ica <- getRotationMatrix(ica)
scale(getData(dat), TRUE, FALSE) \%*\% rot_pca - getData(getDimRedData(pca))
scale(getData(dat), TRUE, FALSE) \%*\% rot_ica - getData(getDimRedData(ica))
}
\concept{convenience functions}
dimRed/man/distance_correlation-dimRedResult-method.Rd 0000644 0001762 0000144 00000002245 13753034327 022604 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{distance_correlation,dimRedResult-method}
\alias{distance_correlation,dimRedResult-method}
\alias{distance_correlation}
\title{Method distance_correlation}
\usage{
\S4method{distance_correlation}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculate the distance correlation between the distance matrices in
high and low dimensioal space.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/tSNE-class.Rd 0000644 0001762 0000144 00000005366 13753034327 014455 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tsne.R
\docType{class}
\name{tSNE-class}
\alias{tSNE-class}
\alias{tSNE}
\title{t-Distributed Stochastic Neighborhood Embedding}
\description{
An S4 Class for t-SNE.
}
\details{
t-SNE is a method that uses Kullback-Leibler divergence between the
distance matrices in high and low-dimensional space to embed the
data. The method is very well suited to visualize complex
structures in low dimensions.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
t-SNE can take the following parameters:
\describe{
\item{d}{A distance function, defaults to euclidean distances}
\item{perplexity}{The perplexity parameter, roughly equivalent to neighborhood size.}
\item{theta}{Approximation for the nearest neighbour search, large values are more inaccurate.}
\item{ndim}{The number of embedding dimensions.}
}
}
\section{Implementation}{
Wraps around \code{\link[Rtsne]{Rtsne}}, which is very well
documented. Setting \code{theta = 0} does a normal t-SNE, larger
values for \code{theta < 1} use the Barnes-Hut algorithm which
scales much nicer with data size. Larger values for perplexity take
larger neighborhoods into account.
}
\examples{
\dontrun{
dat <- loadDataSet("3D S Curve", n = 300)
emb <- embed(dat, "tSNE", perplexity = 80)
plot(emb, type = "2vars")
}
}
\references{
Maaten, L. van der, 2014. Accelerating t-SNE using Tree-Based
Algorithms. Journal of Machine Learning Research 15, 3221-3245.
van der Maaten, L., Hinton, G., 2008. Visualizing Data using
t-SNE. J. Mach. Learn. Res. 9, 2579-2605.
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/LCMC-dimRedResult-method.Rd 0000644 0001762 0000144 00000002211 13753034327 017120 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{LCMC,dimRedResult-method}
\alias{LCMC,dimRedResult-method}
\alias{LCMC}
\title{Method LCMC}
\usage{
\S4method{LCMC}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculates the Local Continuity Meta Criterion, which is
\code{\link{Q_NX}} adjusted for random overlap inside the K-ary
neighborhood.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/getOtherData.Rd 0000644 0001762 0000144 00000000530 13371631672 015102 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getOtherData}
\alias{getOtherData}
\title{Method getOtherData}
\usage{
getOtherData(object, ...)
}
\arguments{
\item{object}{The object to extract data from.}
\item{...}{other arguments.}
}
\description{
Extract other data produced by a dimRedMethod
}
dimRed/man/DiffusionMaps-class.Rd 0000644 0001762 0000144 00000006410 13753034327 016402 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/diffmap.R
\docType{class}
\name{DiffusionMaps-class}
\alias{DiffusionMaps-class}
\alias{DiffusionMaps}
\title{Diffusion Maps}
\description{
An S4 Class implementing Diffusion Maps
}
\details{
Diffusion Maps uses a diffusion probability matrix to robustly
approximate a manifold.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
Diffusion Maps can take the following parameters:
\describe{
\item{d}{a function transforming a matrix row wise into a
distance matrix or \code{dist} object,
e.g. \code{\link[stats]{dist}}.}
\item{ndim}{The number of dimensions}
\item{eps}{The epsilon parameter that determines the
diffusion weight matrix from a distance matrix \code{d},
\eqn{exp(-d^2/eps)}, if set to \code{"auto"} it will
be set to the median distance to the 0.01*n nearest
neighbor.}
\item{t}{Time-scale parameter. The recommended value, 0,
uses multiscale geometry.}
\item{delta}{Sparsity cut-off for the symmetric graph Laplacian,
a higher value results in more sparsity and faster calculation.
The predefined value is 10^-5.}
}
}
\section{Implementation}{
Wraps around \code{\link[diffusionMap]{diffuse}}, see there for
details. It uses the notation of Richards et al. (2009) which is
slightly different from the one in the original paper (Coifman and
Lafon, 2006) and there is no \eqn{\alpha} parameter.
There is also an out-of-sample extension, see examples.
}
\examples{
dat <- loadDataSet("3D S Curve", n = 300)
emb <- embed(dat, "DiffusionMaps")
plot(emb, type = "2vars")
# predicting is possible:
samp <- sample(floor(nrow(dat) / 10))
emb2 <- embed(dat[samp])
emb3 <- predict(emb2, dat[-samp])
plot(emb2, type = "2vars")
points(getData(emb3))
}
\references{
Richards, J.W., Freeman, P.E., Lee, A.B., Schafer,
C.M., 2009. Exploiting Low-Dimensional Structure in
Astronomical Spectra. ApJ 691,
32. doi:10.1088/0004-637X/691/1/32
Coifman, R.R., Lafon, S., 2006. Diffusion maps. Applied and
Computational Harmonic Analysis 21,
5-30. doi:10.1016/j.acha.2006.04.006
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/maximize_correlation-dimRedResult-method.Rd 0000644 0001762 0000144 00000001757 13753034327 022644 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/rotate.R
\name{maximize_correlation,dimRedResult-method}
\alias{maximize_correlation,dimRedResult-method}
\alias{maximize_correlation}
\title{Maximize Correlation with the Axes}
\usage{
\S4method{maximize_correlation}{dimRedResult}(
object,
naxes = ncol(object@data@data),
cor_method = "pearson"
)
}
\arguments{
\item{object}{A dimRedResult object}
\item{naxes}{the number of axes to optimize for.}
\item{cor_method}{which correlation method to use}
}
\description{
Rotates the data in such a way that the correlation with the first
\code{naxes} axes is maximized.
}
\details{
Methods that do not use eigenvector decomposition, like t-SNE often
do not align the data with axes according to the correlation of
variables with the data. \code{maximize_correlation} uses the
\code{\link[optimx]{optimx}} package to rotate the data in such a
way that the original variables have maximum correlation with the
embedding axes.
}
dimRed/man/dimRedMethod-class.Rd 0000644 0001762 0000144 00000003407 13753034327 016203 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dimRedMethod-class.R
\docType{class}
\name{dimRedMethod-class}
\alias{dimRedMethod-class}
\title{Class "dimRedMethod"}
\description{
A virtual class "dimRedMethod" to serve as a template to implement
methods for dimensionality reduction.
}
\details{
Implementations of dimensionality reductions should inherit from
this class.
The \code{fun} slot should be a function that takes three arguments
\describe{
\item{data}{An object of class \code{\link{dimRedData}}.}
\item{pars}{A list with the standard parameters.}
\item{keep.org.data}{Logical. If the original data should be kept in the output.}
}
and returns an object of class \code{\link{dimRedResult}}.
The \code{stdpars} slot should take a list that contains standard
parameters for the implemented methods.
This way the method can be called by \code{embed(data, "method-name",
...)}, where \code{...} can be used to to change single parameters.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding.}
\item{\code{stdpars}}{A list with the default parameters for the \code{fun}
slot.}
}}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/kPCA-class.Rd 0000644 0001762 0000144 00000005014 13753034327 014410 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kpca.R
\docType{class}
\name{kPCA-class}
\alias{kPCA-class}
\alias{kPCA}
\title{Kernel PCA}
\description{
An S4 Class implementing Kernel PCA
}
\details{
Kernel PCA is a nonlinear extension of PCA using kernel methods.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
Kernel PCA can take the following parameters:
\describe{
\item{ndim}{the number of output dimensions, defaults to 2}
\item{kernel}{The kernel function, either as a function or a
character vector with the name of the kernel. Defaults to
\code{"rbfdot"}}
\item{kpar}{A list with the parameters for the kernel function,
defaults to \code{list(sigma = 0.1)}}
}
The most comprehensive collection of kernel functions can be found in
\code{\link[kernlab]{kpca}}. In case the function does not take any
parameters \code{kpar} has to be an empty list.
}
\section{Implementation}{
Wraps around \code{\link[kernlab]{kpca}}, but provides additionally
forward and backward projections.
}
\examples{
\dontrun{
dat <- loadDataSet("3D S Curve")
emb <- embed(dat, "kPCA")
plot(emb, type = "2vars")
}
}
\references{
Sch\"olkopf, B., Smola, A., M\"uller, K.-R., 1998. Nonlinear Component Analysis
as a Kernel Eigenvalue Problem. Neural Computation 10, 1299-1319.
https://doi.org/10.1162/089976698300017467
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/dimRedData-class.Rd 0000644 0001762 0000144 00000006025 13753034327 015633 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dimRedData-class.R
\docType{class}
\name{dimRedData-class}
\alias{dimRedData-class}
\alias{dimRedData}
\alias{as.data.frame,dimRedData-method}
\alias{getData,dimRedData-method}
\alias{getMeta,dimRedData-method}
\alias{nrow,dimRedData-method}
\alias{[,dimRedData,ANY,ANY,ANY-method}
\alias{ndims,dimRedData-method}
\title{Class "dimRedData"}
\usage{
\S4method{as.data.frame}{dimRedData}(x, meta.prefix = "meta.", data.prefix = "")
\S4method{getData}{dimRedData}(object)
\S4method{getMeta}{dimRedData}(object)
\S4method{nrow}{dimRedData}(x)
\S4method{[}{dimRedData,ANY,ANY,ANY}(x, i)
\S4method{ndims}{dimRedData}(object)
}
\arguments{
\item{x}{Of class dimRedData}
\item{meta.prefix}{Prefix for the columns of the meta data names.}
\item{data.prefix}{Prefix for the columns of the variable names.}
\item{object}{Of class dimRedData.}
\item{i}{a valid index for subsetting rows.}
}
\description{
A class to hold data for dimensionality reduction and methods.
}
\details{
The class hast two slots, \code{data} and \code{meta}. The
\code{data} slot contains a \code{numeric matrix} with variables in
columns and observations in rows. The \code{meta} slot may contain
a \code{data.frame} with additional information. Both slots need to
have the same number of rows or the \code{meta} slot needs to
contain an empty \code{data.frame}.
See examples for easy conversion from and to \code{data.frame}.
For plotting functions see \code{\link{plot.dimRedData}}.
}
\section{Methods (by generic)}{
\itemize{
\item \code{as.data.frame}: convert to data.frame
\item \code{getData}: Get the data slot.
\item \code{getMeta}: Get the meta slot.
\item \code{nrow}: Get the number of observations.
\item \code{[}: Subset rows.
\item \code{ndims}: Extract the number of Variables from the data.
}}
\section{Slots}{
\describe{
\item{\code{data}}{of class \code{matrix}, holds the data, observations in
rows, variables in columns}
\item{\code{meta}}{of class \code{data.frame}, holds meta data such as
classes, internal manifold coordinates, or simply additional
data of the data set. Must have the same number of rows as the
\code{data} slot or be an empty data frame.}
}}
\examples{
## Load an example data set:
s3d <- loadDataSet("3D S Curve")
## Create using a constructor:
### without meta information:
dimRedData(iris[, 1:4])
### with meta information:
dimRedData(iris[, 1:4], iris[, 5])
### using slot names:
dimRedData(data = iris[, 1:4], meta = iris[, 5])
## Convert to a dimRedData objects:
Iris <- as(iris[, 1:4], "dimRedData")
## Convert to data.frame:
head(as(s3d, "data.frame"))
head(as.data.frame(s3d))
head(as.data.frame(as(iris[, 1:4], "dimRedData")))
## Extract slots:
head(getData(s3d))
head(getMeta(s3d))
## Get the number of observations:
nrow(s3d)
## Subset:
s3d[1:5, ]
## Shuffle data:
s3 <- s3d[nrow(s3d)]
## Get the number of variables:
ndims(s3d)
}
\seealso{
Other dimRedData:
\code{\link{as.dimRedData}()}
Other dimRedData:
\code{\link{as.dimRedData}()}
}
\concept{dimRedData}
dimRed/man/dataSets.Rd 0000644 0001762 0000144 00000002534 13065033470 014275 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataSets.R
\name{dataSets}
\alias{dataSets}
\alias{loadDataSet}
\alias{dataSetList}
\title{Example Data Sets for dimensionality reduction}
\usage{
loadDataSet(name = dataSetList(), n = 2000, sigma = 0.05)
dataSetList()
}
\arguments{
\item{name}{A character vector that specifies the name of the data
set.}
\item{n}{In generated data sets the number of points to be
generated, else ignored.}
\item{sigma}{In generated data sets the standard deviation of the
noise added, else ignored.}
}
\value{
\code{loadDataSet} an object of class
\code{\link{dimRedData}}. \code{dataSetList()} return a
character string with the implemented data sets
}
\description{
A compilation of standard data sets that are often being used to
showcase dimensionality reduction techniques.
}
\details{
The argument \code{name} should be one of
\code{dataSetList()}. Partial matching is possible, see
\code{\link{match.arg}}. Generated data sets contain the internal
coordinates of the manifold in the \code{meta} slot. Call
\code{dataSetList()} to see what data sets are available.
}
\examples{
## a list of available data sets:
dataSetList()
## Load a data set:
swissRoll <- loadDataSet("Swiss Roll")
\donttest{plot(swissRoll, type = "3vars")}
## Load Iris data set, partial matching:
loadDataSet("I")
}
dimRed/man/Q_global-dimRedResult-method.Rd 0000644 0001762 0000144 00000002144 13753034327 020127 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{Q_global,dimRedResult-method}
\alias{Q_global,dimRedResult-method}
\alias{Q_global}
\title{Method Q_global}
\usage{
\S4method{Q_global}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculate the Q_global score to assess the quality of a dimensionality reduction.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/installSuggests.Rd 0000644 0001762 0000144 00000001125 13065033470 015713 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{installSuggests}
\alias{installSuggests}
\title{getSuggests}
\usage{
installSuggests()
}
\description{
Install packages wich are suggested by dimRed.
}
\details{
By default dimRed will not install all the dependencies, because
there are quite a lot and in case some of them are not available
for your platform you will not be able to install dimRed without
problems.
To solve this I provide a function which automatically installes
all the suggested packages.
}
\examples{
\dontrun{
installSuggests()
}
}
dimRed/man/cophenetic_correlation-dimRedResult-method.Rd 0000644 0001762 0000144 00000002442 13753034327 023132 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{cophenetic_correlation,dimRedResult-method}
\alias{cophenetic_correlation,dimRedResult-method}
\alias{cophenetic_correlation}
\title{Method cophenetic_correlation}
\usage{
\S4method{cophenetic_correlation}{dimRedResult}(object, d = stats::dist, cor_method = "pearson")
}
\arguments{
\item{object}{of class dimRedResult}
\item{d}{the distance function to use.}
\item{cor_method}{The correlation method.}
}
\description{
Calculate the correlation between the distance matrices in high and
low dimensioal space.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/LaplacianEigenmaps-class.Rd 0000644 0001762 0000144 00000004150 13562225201 017336 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/leim.R
\docType{class}
\name{LaplacianEigenmaps-class}
\alias{LaplacianEigenmaps-class}
\alias{LaplacianEigenmaps}
\title{Laplacian Eigenmaps}
\description{
An S4 Class implementing Laplacian Eigenmaps
}
\details{
Laplacian Eigenmaps use a kernel and were originally developed to
separate non-convex clusters under the name spectral clustering.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
\code{LaplacianEigenmaps} can take the following parameters:
\describe{
\item{ndim}{the number of output dimensions.}
\item{sparse}{A character vector specifying hot to make the graph
sparse, \code{"knn"} means that a K-nearest neighbor graph is
constructed, \code{"eps"} an epsilon neighborhood graph is
constructed, else a dense distance matrix is used.}
\item{knn}{The number of nearest neighbors to use for the knn graph.}
\item{eps}{The distance for the epsilon neighborhood graph.}
\item{t}{Parameter for the transformation of the distance matrix
by \eqn{w=exp(-d^2/t)}, larger values give less weight to
differences in distance, \code{t == Inf} treats all distances != 0 equally.}
\item{norm}{logical, should the normed laplacian be used?}
}
}
\section{Implementation}{
Wraps around \code{\link[loe]{spec.emb}}.
}
\examples{
dat <- loadDataSet("3D S Curve")
emb <- embed(dat, "LaplacianEigenmaps")
plot(emb@data@data)
}
\references{
Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for
Dimensionality Reduction and Data Representation. Neural
Computation 15, 1373.
}
dimRed/man/getDimRedData.Rd 0000644 0001762 0000144 00000000502 13065033470 015154 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getDimRedData}
\alias{getDimRedData}
\title{Method getDimRedData}
\usage{
getDimRedData(object, ...)
}
\arguments{
\item{object}{The object to extract data from.}
\item{...}{other arguments.}
}
\description{
Extract dimRedData.
}
dimRed/man/total_correlation-dimRedResult-method.Rd 0000644 0001762 0000144 00000002631 13753034327 022134 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{total_correlation,dimRedResult-method}
\alias{total_correlation,dimRedResult-method}
\alias{total_correlation}
\title{Method total_correlation}
\usage{
\S4method{total_correlation}{dimRedResult}(
object,
naxes = ndims(object),
cor_method = "pearson",
is.rotated = FALSE
)
}
\arguments{
\item{object}{of class dimRedResult}
\item{naxes}{the number of axes to use for optimization.}
\item{cor_method}{the correlation method to use.}
\item{is.rotated}{if FALSE the object is rotated.}
}
\description{
Calculate the total correlation of the variables with the axes to
assess the quality of a dimensionality reduction.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/getNDim.Rd 0000644 0001762 0000144 00000000510 13141064362 014043 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getNDim}
\alias{getNDim}
\title{Method getNDim}
\usage{
getNDim(object, ...)
}
\arguments{
\item{object}{The object to get the dimensions from.}
\item{...}{other arguments.}
}
\description{
Extract the number of embedding dimensions.
}
dimRed/man/KamadaKawai-class.Rd 0000644 0001762 0000144 00000005613 13753034327 015772 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/graph_embed.R
\docType{class}
\name{KamadaKawai-class}
\alias{KamadaKawai-class}
\alias{KamadaKawai}
\title{Graph Embedding via the Kamada Kawai Algorithm}
\description{
An S4 Class implementing the Kamada Kawai Algorithm for graph embedding.
}
\details{
Graph embedding algorithms se the data as a graph. Between the
nodes of the graph exist attracting and repelling forces which can
be modeled as electrical fields or springs connecting the
nodes. The graph is then forced into a lower dimensional
representation that tries to represent the forces betweent he nodes
accurately by minimizing the total energy of the attracting and
repelling forces.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
KamadaKawai can take the following parameters:
\describe{
\item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
\item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
\item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
}
}
\section{Implementation}{
Wraps around \code{\link[igraph]{layout_with_kk}}. The parameters
maxiter, epsilon and kkconst are set to the default values and
cannot be set, this may change in a future release. The DimRed
Package adds an extra sparsity parameter by constructing a knn
graph which also may improve visualization quality.
}
\examples{
dat <- loadDataSet("Swiss Roll", n = 200)
emb <- embed(dat, "KamadaKawai")
plot(emb, type = "2vars")
}
\references{
Kamada, T., Kawai, S., 1989. An algorithm for drawing general undirected
graphs. Information Processing Letters 31, 7-15.
https://doi.org/10.1016/0020-0190(89)90102-6
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/PCA_L1-class.Rd 0000644 0001762 0000144 00000005433 13753034327 014576 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/l1pca.R
\docType{class}
\name{PCA_L1-class}
\alias{PCA_L1-class}
\alias{PCA_L1}
\title{Principal Component Analysis with L1 error.}
\description{
S4 Class implementing PCA with L1 error.
}
\details{
PCA transforms the data so that the L2 reconstruction error is minimized or
the variance of the projected data is maximized. This is sensitive to
outliers, L1 PCA minimizes the L1 reconstruction error or maximizes the sum
of the L1 norm of the projected observations.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
PCA can take the following parameters:
\describe{
\item{ndim}{The number of output dimensions.}
\item{center}{logical, should the data be centered, defaults to \code{TRUE}.}
\item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.}
\item{fun}{character or function, the method to apply, see the \code{pcaL1} package}
\item{\ldots}{other parameters for \code{fun}}
}
}
\section{Implementation}{
Wraps around the different methods is the \code{pcaL1} package. Because PCA
can be reduced to a simple rotation, forward and backward projection
functions are supplied.
}
\examples{
if(requireNamespace("pcaL1", quietly = TRUE)) {
dat <- loadDataSet("Iris")
emb <- embed(dat, "PCA_L1")
plot(emb, type = "2vars")
plot(inverse(emb, getData(getDimRedData((emb)))), type = "3vars")
}
}
\references{
Park, Y.W., Klabjan, D., 2016. Iteratively Reweighted Least Squares
Algorithms for L1-Norm Principal Component Analysis, in: Data Mining (ICDM),
2016 IEEE 16th International Conference On. IEEE, pp. 430-438.
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/LLE-class.Rd 0000644 0001762 0000144 00000004376 13753034327 014260 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/lle.R
\docType{class}
\name{LLE-class}
\alias{LLE-class}
\alias{LLE}
\title{Locally Linear Embedding}
\description{
An S4 Class implementing Locally Linear Embedding (LLE)
}
\details{
LLE approximates the points in the manifold by linear combination
of its neighbors. These linear combinations are the same inside the
manifold and in highdimensional space.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
LLE can take the following parameters:
\describe{
\item{knn}{the number of neighbors for the knn graph., defaults to 50.}
\item{ndim}{the number of embedding dimensions, defaults to 2.}
}
}
\section{Implementation}{
Wraps around \code{\link[lle]{lle}}, only
exposes the parameters \code{k} and \code{m}.
}
\examples{
dat <- loadDataSet("3D S Curve", n = 500)
emb <- embed(dat, "LLE", knn = 45)
plot(emb, type = "2vars")
}
\references{
Roweis, S.T., Saul, L.K., 2000. Nonlinear Dimensionality Reduction
by Locally Linear Embedding. Science 290,
2323-2326. doi:10.1126/science.290.5500.2323
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/AUC_lnK_R_NX-dimRedResult-method.Rd 0000644 0001762 0000144 00000003475 13753034327 020521 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{AUC_lnK_R_NX,dimRedResult-method}
\alias{AUC_lnK_R_NX,dimRedResult-method}
\alias{AUC_lnK_R_NX}
\title{Method AUC_lnK_R_NX}
\usage{
\S4method{AUC_lnK_R_NX}{dimRedResult}(object, weight = "inv")
}
\arguments{
\item{object}{of class dimRedResult}
\item{weight}{the weight function used, one of \code{c("inv", "log", "log10")}}
}
\description{
Calculate the Area under the R_NX(ln K), used in Lee et. al. (2015). Note
that despite the name, this does not weight the mean by the logarithm, but by
1/K. If explicit weighting by the logarithm is desired use \code{weight =
"log"} or \code{weight = "log10"}
}
\details{
The naming confusion originated from equation 17 in Lee et al (2015) and the
name of this method may change in the future to avoid confusion.
}
\references{
Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015.
Multi-scale similarities in stochastic neighbour embedding: Reducing
dimensionality while preserving both local and global structure.
Neurocomputing 169, 246-261. https://doi.org/10.1016/j.neucom.2014.12.095
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/getPars.Rd 0000644 0001762 0000144 00000000451 13065033470 014126 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getPars}
\alias{getPars}
\title{Method getPars}
\usage{
getPars(object, ...)
}
\arguments{
\item{object}{The object to be converted.}
\item{...}{other arguments.}
}
\description{
Extracts the pars slot.
}
dimRed/man/quality.Rd 0000644 0001762 0000144 00000011017 13753034327 014217 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{quality,dimRedResult-method}
\alias{quality,dimRedResult-method}
\alias{quality}
\alias{quality.dimRedResult}
\alias{dimRedQualityList}
\title{Quality Criteria for dimensionality reduction.}
\usage{
\S4method{quality}{dimRedResult}(.data, .method = dimRedQualityList(), .mute = character(0), ...)
dimRedQualityList()
}
\arguments{
\item{.data}{object of class \code{dimRedResult}}
\item{.method}{character vector naming one of the methods}
\item{.mute}{what output from the embedding method should be muted.}
\item{...}{the pameters, internally passed as a list to the
quality method as \code{pars = list(...)}}
}
\value{
a number
}
\description{
A collection of functions to compute quality measures on
\code{\link{dimRedResult}} objects.
}
\section{Methods (by class)}{
\itemize{
\item \code{dimRedResult}: Calculate a quality index from a dimRedResult object.
}}
\section{Implemented methods}{
Method must be one of \code{"\link{Q_local}", "\link{Q_global}",
"\link{mean_R_NX}", "\link{total_correlation}",
"\link{cophenetic_correlation}", "\link{distance_correlation}",
"\link{reconstruction_rmse}"}
}
\section{Rank based criteria}{
\code{Q_local}, \code{Q_global}, and \code{mean_R_NX} are
quality criteria based on the Co-ranking matrix. \code{Q_local}
and \code{Q_global} determine the local/global quality of the
embedding, while \code{mean_R_NX} determines the quality of the
overall embedding. They are parameter free and return a single
number. The object must include the original data. The number
returns is in the range [0, 1], higher values mean a better
local/global embedding.
}
\section{Correlation based criteria}{
\code{total_correlation} calculates the sum of the mean squared
correlations of the original axes with the axes in reduced
dimensions, because some methods do not care about correlations
with axes, there is an option to rotate data in reduced space to
maximize this criterium. The number may be greater than one if more
dimensions are summed up.
\code{cophenetic_correlation} calculate the correlation between the
lower triangles of distance matrices, the correlation and distance
methods may be specified. The result is in range [-1, 1].
\code{distance_correlation} measures the independes of samples by
calculating the correlation of distances. For details see
\code{\link[energy]{dcor}}.
}
\section{Reconstruction error}{
\code{reconstruction_rmse} calculates the root mean squared error
of the reconstrucion. \code{object} requires an inverse function.
}
\examples{
\dontrun{
embed_methods <- dimRedMethodList()
quality_methods <- dimRedQualityList()
scurve <- loadDataSet("Iris")
quality_results <- matrix(NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods))
embedded_data <- list()
for (e in embed_methods) {
message("embedding: ", e)
embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output"))
for (q in quality_methods) {
message(" quality: ", q)
quality_results[e, q] <- tryCatch(
quality(embedded_data[[e]], q),
error = function (e) NA
)
}
}
print(quality_results)
}
}
\references{
Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How
to Evaluate Dimensionality Reduction? - Improving the
Co-ranking Matrix. arXiv:1110.3917 [cs].
Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and
testing dependence by correlation of distances. Ann. Statist. 35,
2769-2794. doi:10.1214/009053607000000505
Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale
similarities in stochastic neighbour embedding: Reducing
dimensionality while preserving both local and global
structure. Neurocomputing, 169,
246-261. doi:10.1016/j.neucom.2014.12.095
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\author{
Guido Kraemer
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/HLLE-class.Rd 0000644 0001762 0000144 00000004313 13753034327 014357 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/hlle.R
\docType{class}
\name{HLLE-class}
\alias{HLLE-class}
\alias{HLLE}
\title{Hessian Locally Linear Embedding}
\description{
An S4 Class implementing Hessian Locally Linear Embedding (HLLE)
}
\details{
HLLE uses local hessians to approximate the curvines and is an
extension to non-convex subsets in lowdimensional space.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
HLLE can take the following parameters:
\describe{
\item{knn}{neighborhood size}
\item{ndim}{number of output dimensions}
}
}
\section{Implementation}{
Own implementation, sticks to the algorithm in Donoho and Grimes
(2003). Makes use of sparsity to speed up final embedding.
}
\examples{
dat <- loadDataSet("3D S Curve", n = 300)
emb <- embed(dat, "HLLE", knn = 15)
plot(emb, type = "2vars")
}
\references{
Donoho, D.L., Grimes, C., 2003. Hessian eigenmaps: Locally linear
embedding techniques for high-dimensional data. PNAS 100,
5591-5596. doi:10.1073/pnas.1031596100
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/embed.Rd 0000644 0001762 0000144 00000006053 13753034327 013607 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/embed.R
\name{embed}
\alias{embed}
\alias{embed,formula-method}
\alias{embed,ANY-method}
\alias{embed,dimRedData-method}
\title{dispatches the different methods for dimensionality reduction}
\usage{
embed(.data, ...)
\S4method{embed}{formula}(
.formula,
.data,
.method = dimRedMethodList(),
.mute = character(0),
.keep.org.data = TRUE,
...
)
\S4method{embed}{ANY}(
.data,
.method = dimRedMethodList(),
.mute = character(0),
.keep.org.data = TRUE,
...
)
\S4method{embed}{dimRedData}(
.data,
.method = dimRedMethodList(),
.mute = character(0),
.keep.org.data = TRUE,
...
)
}
\arguments{
\item{.data}{object of class \code{\link{dimRedData}}, will be converted to
be of class \code{\link{dimRedData}} if necessary; see examples for
details.}
\item{...}{the parameters, internally passed as a list to the dimensionality
reduction method as \code{pars = list(...)}}
\item{.formula}{a formula, see \code{\link{as.dimRedData}}.}
\item{.method}{character vector naming one of the dimensionality reduction
techniques.}
\item{.mute}{a character vector containing the elements you want to mute
(\code{c("message", "output")}), defaults to \code{character(0)}.}
\item{.keep.org.data}{\code{TRUE}/\code{FALSE} keep the original data.}
}
\value{
an object of class \code{\link{dimRedResult}}
}
\description{
wraps around all dimensionality reduction functions.
}
\details{
Method must be one of \code{\link{dimRedMethodList}()}, partial matching
is performed. All parameters start with a dot, to avoid clashes
with partial argument matching (see the R manual section 4.3.2), if
there should ever occur any clashes in the arguments, call the
function with all arguments named, e.g. \code{embed(.data = dat,
.method = "mymethod", .d = "some parameter")}.
}
\section{Methods (by class)}{
\itemize{
\item \code{formula}: embed a data.frame using a formula.
\item \code{ANY}: Embed anything as long as it can be coerced to
\code{\link{dimRedData}}.
\item \code{dimRedData}: Embed a dimRedData object
}}
\examples{
## embed a data.frame using a formula:
as.data.frame(
embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
iris, "PCA")
)
## embed a data.frame and return a data.frame
as.data.frame(embed(iris[, 1:4], "PCA"))
## embed a matrix and return a data.frame
as.data.frame(embed(as.matrix(iris[, 1:4]), "PCA"))
\dontrun{
## embed dimRedData objects
embed_methods <- dimRedMethodList()
quality_methods <- dimRedQualityList()
dataset <- loadDataSet("Iris")
quality_results <- matrix(NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods))
embedded_data <- list()
for (e in embed_methods) {
message("embedding: ", e)
embedded_data[[e]] <- embed(dataset, e, .mute = c("message", "output"))
for (q in quality_methods) {
message(" quality: ", q)
quality_results[e, q] <- tryCatch(
quality(embedded_data[[e]], q),
error = function(e) NA
)
}
}
print(quality_results)
}
}
dimRed/man/UMAP-class.Rd 0000644 0001762 0000144 00000006502 14153200601 014360 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/umap.R
\docType{class}
\name{UMAP-class}
\alias{UMAP-class}
\alias{UMAP}
\title{Umap embedding}
\description{
An S4 Class implementing the UMAP algorithm
}
\details{
Uniform Manifold Approximation is a gradient descend based algorithm that
gives results similar to t-SNE, but scales better with the number of points.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
UMAP can take the follwing parameters:
\describe{
\item{ndim}{The number of embedding dimensions.}
\item{knn}{The number of neighbors to be used.}
\item{d}{The distance metric to use.}
\item{method}{\code{"naive"} for an R implementation, \code{"python"}
for the reference implementation.}
}
Other method parameters can also be passed, see
\code{\link[umap]{umap.defaults}} for details. The ones above have been
standardized for the use with \code{dimRed} and will get automatically
translated for \code{\link[umap]{umap}}.
}
\section{Implementation}{
The dimRed package wraps the \code{\link[umap]{umap}} packages which provides
an implementation in pure R and also a wrapper around the original python
package \code{umap-learn} (https://github.com/lmcinnes/umap/). This requires
\code{umap-learn} version 0.4 installed, at the time of writing, there is
already \code{umap-learn} 0.5 but it is not supported by the R package
\code{\link[umap]{umap}}.
The \code{"naive"} implementation is a pure R implementation and considered
experimental at the point of writing this, it is also much slower than the
python implementation.
The \code{"python"} implementation is the reference implementation used by
McInees et. al. (2018). It requires the \code{\link[reticulate]{reticulate}}
package for the interaction with python and the python package
\code{umap-learn} installed (use \code{pip install umap-learn}).
}
\examples{
\dontrun{
dat <- loadDataSet("3D S Curve", n = 300)
emb <- embed(dat, "UMAP", .mute = NULL, knn = 10)
plot(emb, type = "2vars")
}
}
\references{
McInnes, Leland, and John Healy.
"UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction."
https://arxiv.org/abs/1802.03426
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/reconstruction_error-dimRedResult-method.Rd 0000644 0001762 0000144 00000004067 13753034327 022707 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{reconstruction_error,dimRedResult-method}
\alias{reconstruction_error,dimRedResult-method}
\alias{reconstruction_error}
\title{Method reconstruction_error}
\usage{
\S4method{reconstruction_error}{dimRedResult}(object, n = seq_len(ndims(object)), error_fun = "rmse")
}
\arguments{
\item{object}{of class dimRedResult}
\item{n}{a positive integer or vector of integers \code{<= ndims(object)}}
\item{error_fun}{a function or string indicating an error function, if
indication a function it must take to matrices of the same size and return
a scalar.}
}
\value{
a vector of number with the same length as \code{n} with the
}
\description{
Calculate the error using only the first \code{n} dimensions of the embedded
data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to
calculate the root mean square error or the mean absolute error respectively,
or a function that takes to equally sized vectors as input and returns a
single number as output.
}
\examples{
\dontrun{
ir <- loadDataSet("Iris")
ir.drr <- embed(ir, "DRR", ndim = ndims(ir))
ir.pca <- embed(ir, "PCA", ndim = ndims(ir))
rmse <- data.frame(
rmse_drr = reconstruction_error(ir.drr),
rmse_pca = reconstruction_error(ir.pca)
)
matplot(rmse, type = "l")
plot(ir)
plot(ir.drr)
plot(ir.pca)
}
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\author{
Guido Kraemer
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/DRR-class.Rd 0000644 0001762 0000144 00000010774 13753034327 014272 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/drr.R
\docType{class}
\name{DRR-class}
\alias{DRR-class}
\alias{DRR}
\title{Dimensionality Reduction via Regression}
\description{
An S4 Class implementing Dimensionality Reduction via Regression (DRR).
}
\details{
DRR is a non-linear extension of PCA that uses Kernel Ridge regression.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
DRR can take the following parameters:
\describe{
\item{ndim}{The number of dimensions}
\item{lambda}{The regularization parameter for the ridge
regression.}
\item{kernel}{The kernel to use for KRR, defaults to
\code{"rbfdot"}.}
\item{kernel.pars}{A list with kernel parameters, elements depend
on the kernel used, \code{"rbfdot"} uses \code{"sigma"}.}
\item{pca}{logical, should an initial pca step be performed,
defaults to \code{TRUE}.}
\item{pca.center}{logical, should the data be centered before the
pca step. Defaults to \code{TRUE}.}
\item{pca.scale}{logical, should the data be scaled before the
pca ste. Defaults to \code{FALSE}.}
\item{fastcv}{logical, should \code{\link[CVST]{fastCV}} from the
CVST package be used instead of normal cross-validation.}
\item{fastcv.test}{If \code{fastcv = TRUE}, separate test data set for fastcv.}
\item{cv.folds}{if \code{fastcv = FALSE}, specifies the number of
folds for crossvalidation.}
\item{fastkrr.nblocks}{integer, higher values sacrifice numerical
accuracy for speed and less memory, see below for details.}
\item{verbose}{logical, should the cross-validation results be
printed out.}
}
}
\section{Implementation}{
Wraps around \code{\link[DRR]{drr}}, see there for details. DRR is
a non-linear extension of principal components analysis using Kernel
Ridge Regression (KRR, details see \code{\link[CVST]{constructKRRLearner}}
and \code{\link[DRR]{constructFastKRRLearner}}). Non-linear
regression is used to explain more variance than PCA. DRR provides
an out-of-sample extension and a backward projection.
The most expensive computations are matrix inversions therefore the
implementation profits a lot from a multithreaded BLAS library.
The best parameters for each KRR are determined by cross-validaton
over all parameter combinations of \code{lambda} and
\code{kernel.pars}, using less parameter values will speed up
computation time. Calculation of KRR can be accelerated by
increasing \code{fastkrr.nblocks}, it should be smaller than
n^{1/3} up to sacrificing some accuracy, for details see
\code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up
is to use \code{pars$fastcv = TRUE} which might provide a more
efficient way to search the parameter space but may also miss the
global maximum, I have not ran tests on the accuracy of this method.
}
\examples{
\dontrun{
dat <- loadDataSet("variable Noise Helix", n = 200)[sample(200)]
emb <- embed(dat, "DRR", ndim = 3)
plot(dat, type = "3vars")
plot(emb, type = "3vars")
# We even have function to reconstruct, also working for only the first few dimensions
rec <- inverse(emb, getData(getDimRedData(emb))[, 1, drop = FALSE])
plot(rec, type = "3vars")
}
}
\references{
Laparra, V., Malo, J., Camps-Valls, G.,
2015. Dimensionality Reduction via Regression in Hyperspectral
Imagery. IEEE Journal of Selected Topics in Signal Processing
9, 1026-1036. doi:10.1109/JSTSP.2015.2417833
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/nMDS-class.Rd 0000644 0001762 0000144 00000004154 13753034327 014437 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nmds.R
\docType{class}
\name{nMDS-class}
\alias{nMDS-class}
\alias{nMDS}
\title{Non-Metric Dimensional Scaling}
\description{
An S4 Class implementing Non-Metric Dimensional Scaling.
}
\details{
A non-linear extension of MDS using monotonic regression
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
nMDS can take the following parameters:
\describe{
\item{d}{A distance function.}
\item{ndim}{The number of embedding dimensions.}
}
}
\section{Implementation}{
Wraps around the
\code{\link[vegan]{monoMDS}}. For parameters that are not
available here, the standard configuration is used.
}
\examples{
dat <- loadDataSet("3D S Curve", n = 300)
emb <- embed(dat, "nMDS")
plot(emb, type = "2vars")
}
\references{
Kruskal, J.B., 1964. Nonmetric multidimensional scaling: A numerical method.
Psychometrika 29, 115-129. https://doi.org/10.1007/BF02289694
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/PCA-class.Rd 0000644 0001762 0000144 00000005155 13753034327 014243 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pca.R
\docType{class}
\name{PCA-class}
\alias{PCA-class}
\alias{PCA}
\title{Principal Component Analysis}
\description{
S4 Class implementing PCA.
}
\details{
PCA transforms the data in orthogonal components so that the first
axis accounts for the larges variance in the data, all the
following axes account for the highest variance under the
constraint that they are orthogonal to the preceding axes. PCA is
sensitive to the scaling of the variables. PCA is by far the
fastest and simples method of dimensionality reduction and should
probably always be applied as a baseline if other methods are tested.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
PCA can take the following parameters:
\describe{
\item{ndim}{The number of output dimensions.}
\item{center}{logical, should the data be centered, defaults to \code{TRUE}.}
\item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.}
}
}
\section{Implementation}{
Wraps around \code{\link{prcomp}}. Because PCA can be reduced to a
simple rotation, forward and backward projection functions are
supplied.
}
\examples{
dat <- loadDataSet("Iris")
emb <- embed(dat, "PCA")
plot(emb, type = "2vars")
plot(inverse(emb, getDimRedData(emb)), type = "3vars")
}
\references{
Pearson, K., 1901. On lines and planes of closest fit to systems of points in
space. Philosophical Magazine 2, 559-572.
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/dimRedResult-class.Rd 0000644 0001762 0000144 00000007600 13753034327 016240 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dimRedResult-class.R
\docType{class}
\name{dimRedResult-class}
\alias{dimRedResult-class}
\alias{dimRedResult}
\alias{predict,dimRedResult-method}
\alias{inverse,dimRedResult-method}
\alias{inverse}
\alias{as.data.frame,dimRedResult-method}
\alias{getPars,dimRedResult-method}
\alias{getNDim,dimRedResult-method}
\alias{print,dimRedResult-method}
\alias{getOrgData,dimRedResult-method}
\alias{getDimRedData,dimRedResult-method}
\alias{ndims,dimRedResult-method}
\alias{getOtherData,dimRedResult-method}
\title{Class "dimRedResult"}
\usage{
\S4method{predict}{dimRedResult}(object, xnew)
\S4method{inverse}{dimRedResult}(object, ynew)
\S4method{as.data.frame}{dimRedResult}(
x,
org.data.prefix = "org.",
meta.prefix = "meta.",
data.prefix = ""
)
\S4method{getPars}{dimRedResult}(object)
\S4method{getNDim}{dimRedResult}(object)
\S4method{print}{dimRedResult}(x)
\S4method{getOrgData}{dimRedResult}(object)
\S4method{getDimRedData}{dimRedResult}(object)
\S4method{ndims}{dimRedResult}(object)
\S4method{getOtherData}{dimRedResult}(object)
}
\arguments{
\item{object}{Of class \code{dimRedResult}}
\item{xnew}{new data, of type \code{\link{dimRedData}}}
\item{ynew}{embedded data, of type \code{\link{dimRedData}}}
\item{x}{Of class \code{dimRedResult}}
\item{org.data.prefix}{Prefix for the columns of the org.data slot.}
\item{meta.prefix}{Prefix for the columns of \code{x@data@meta}.}
\item{data.prefix}{Prefix for the columns of \code{x@data@data}.}
}
\description{
A class to hold the results of of a dimensionality reduction.
}
\section{Methods (by generic)}{
\itemize{
\item \code{predict}: apply a trained method to new data, does not work
with all methods, will give an error if there is no \code{apply}.
In some cases the apply function may only be an approximation.
\item \code{inverse}: inverse transformation of embedded data, does not
work with all methods, will give an error if there is no \code{inverse}.
In some cases the apply function may only be an approximation.
\item \code{as.data.frame}: convert to \code{data.frame}
\item \code{getPars}: Get the parameters with which the method
was called.
\item \code{getNDim}: Get the number of embedding dimensions.
\item \code{print}: Method for printing.
\item \code{getOrgData}: Get the original data and meta.data
\item \code{getDimRedData}: Get the embedded data
\item \code{ndims}: Extract the number of embedding dimensions.
\item \code{getOtherData}: Get other data produced by the method
}}
\section{Slots}{
\describe{
\item{\code{data}}{Output data of class dimRedData.}
\item{\code{org.data}}{original data, a matrix.}
\item{\code{apply}}{a function to apply the method to out-of-sampledata,
may not exist.}
\item{\code{inverse}}{a function to calculate the original coordinates from
reduced space, may not exist.}
\item{\code{has.org.data}}{logical, if the original data is included in the object.}
\item{\code{has.apply}}{logical, if a forward method is exists.}
\item{\code{has.inverse}}{logical if an inverse method exists.}
\item{\code{method}}{saves the method used.}
\item{\code{pars}}{saves the parameters used.}
\item{\code{other.data}}{other data produced by the method, e.g. a distance matrix.}
}}
\examples{
## Create object by embedding data
iris.pca <- embed(loadDataSet("Iris"), "PCA")
## Convert the result to a data.frame
head(as(iris.pca, "data.frame"))
head(as.data.frame(iris.pca))
## There are no nameclashes to avoid here:
head(as.data.frame(iris.pca,
org.data.prefix = "",
meta.prefix = "",
data.prefix = ""))
## Print it more or less nicely:
print(iris.pca)
## Get the embedded data as a dimRedData object:
getDimRedData(iris.pca)
## Get the original data including meta information:
getOrgData(iris.pca)
## Get the number of variables:
ndims(iris.pca)
}
\concept{dimRedResult}
dimRed/man/ndims.Rd 0000644 0001762 0000144 00000000506 13065033470 013634 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{ndims}
\alias{ndims}
\title{Method ndims}
\usage{
ndims(object, ...)
}
\arguments{
\item{object}{To extract the number of dimensions from.}
\item{...}{Arguments for further methods}
}
\description{
Extract the number of dimensions.
}
dimRed/man/NNMF-class.Rd 0000644 0001762 0000144 00000005447 13753034327 014402 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/nnmf.R
\docType{class}
\name{NNMF-class}
\alias{NNMF-class}
\alias{NNMF}
\title{Non-Negative Matrix Factorization}
\description{
S4 Class implementing NNMF.
}
\details{
NNMF is a method for decomposing a matrix into a smaller
dimension such that the constraint that the data (and the
projection) are not negative is taken into account.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
The method can take the following parameters:
\describe{
\item{ndim}{The number of output dimensions.}
\item{method}{character, which algorithm should be used. See
\code{\link[NMF]{nmf}} for possible values. Defaults to
"brunet"}
\item{nrun}{integer, the number of times the computations are
conducted. See \code{\link[NMF]{nmf}}}
\item{seed}{integer, a value to control the random numbers used.}
\item{options}{named list, other options to pass to \code{\link[NMF]{nmf}}}
}
}
\section{Implementation}{
Wraps around \code{\link[NMF]{nmf}}. Note that the estimation uses random
numbers. To create reproducible results, set the random number seed in the
function call. Also, in many cases, the computations will be conducted
in parallel using multiple cores. To disable this, use the option
\code{.pbackend = NULL}.
}
\examples{
set.seed(4646)
dat <- loadDataSet("Iris")
emb <- embed(dat, "NNMF")
plot(emb)
# project new values:
nn_proj <- predict(emb, dat[1:7])
plot(nn_proj)
}
\references{
Lee, D.D., Seung, H.S., 1999. Learning the parts of objects by non-negative
matrix factorization. Nature 401, 788-791. https://doi.org/10.1038/44565
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/Q_NX-dimRedResult-method.Rd 0000644 0001762 0000144 00000002311 13753034327 017210 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{Q_NX,dimRedResult-method}
\alias{Q_NX,dimRedResult-method}
\alias{Q_NX}
\title{Method Q_NX}
\usage{
\S4method{Q_NX}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculate the Q_NX score (Chen & Buja 2006, the notation in the
publication is M_k). Which is the fraction of points that remain inside
the same K-ary neighborhood in high and low dimensional space.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/Q_local-dimRedResult-method.Rd 0000644 0001762 0000144 00000002307 13753034327 017762 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{Q_local,dimRedResult-method}
\alias{Q_local,dimRedResult-method}
\alias{Q_local}
\title{Method Q_local}
\usage{
\S4method{Q_local}{dimRedResult}(object, ndim = getNDim(object))
}
\arguments{
\item{object}{of class dimRedResult.}
\item{ndim}{use the first ndim columns of the embedded data for calculation.}
}
\description{
Calculate the Q_local score to assess the quality of a dimensionality reduction.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/dimRedMethodList.Rd 0000644 0001762 0000144 00000002225 13753034327 015731 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dimRedMethod-class.R
\name{dimRedMethodList}
\alias{dimRedMethodList}
\title{dimRedMethodList}
\usage{
dimRedMethodList()
}
\value{
a character vector with the names of classes that inherit
from \code{dimRedMethod}.
}
\description{
Get the names of all methods for dimensionality reduction.
}
\details{
Returns the name of all classes that inherit from
\code{\link{dimRedMethod-class}} to use with \code{\link{embed}}.
}
\examples{
dimRedMethodList()
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/getData.Rd 0000644 0001762 0000144 00000000406 13065033470 014072 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getData}
\alias{getData}
\title{Method getData}
\usage{
getData(object)
}
\arguments{
\item{object}{The object to be converted.}
}
\description{
Extracts the data slot.
}
dimRed/man/getMeta.Rd 0000644 0001762 0000144 00000000451 13065033470 014107 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getMeta}
\alias{getMeta}
\title{Method getMeta}
\usage{
getMeta(object, ...)
}
\arguments{
\item{object}{The object to be converted.}
\item{...}{other arguments.}
}
\description{
Extracts the meta slot.
}
dimRed/man/MDS-class.Rd 0000644 0001762 0000144 00000005113 13753034327 014255 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mds.R
\docType{class}
\name{MDS-class}
\alias{MDS-class}
\alias{MDS}
\title{Metric Dimensional Scaling}
\description{
An S4 Class implementing classical scaling (MDS).
}
\details{
MDS tries to maintain distances in high- and low-dimensional space,
it has the advantage over PCA that arbitrary distance functions can
be used, but it is computationally more demanding.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
MDS can take the following parameters:
\describe{
\item{ndim}{The number of dimensions.}
\item{d}{The function to calculate the distance matrix from the input coordinates, defaults to euclidean distances.}
}
}
\section{Implementation}{
Wraps around \code{\link[stats]{cmdscale}}. The implementation also
provides an out-of-sample extension which is not completely
optimized yet.
}
\examples{
\dontrun{
dat <- loadDataSet("3D S Curve")
emb <- embed(dat, "MDS")
plot(emb, type = "2vars")
# a "manual" kPCA:
emb2 <- embed(dat, "MDS", d = function(x) exp(stats::dist(x)))
plot(emb2, type = "2vars")
# a "manual", more customizable, and slower Isomap:
emb3 <- embed(dat, "MDS", d = function(x) vegan::isomapdist(vegan::vegdist(x, "manhattan"), k = 20))
plot(emb3)
}
}
\references{
Torgerson, W.S., 1952. Multidimensional scaling: I. Theory and method.
Psychometrika 17, 401-419. https://doi.org/10.1007/BF02288916
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/dimRed-package.Rd 0000644 0001762 0000144 00000002757 14153177013 015332 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dimRed.R
\docType{package}
\name{dimRed-package}
\alias{dimRed}
\alias{dimRed-package}
\title{The dimRed package}
\description{
This package simplifies dimensionality reduction in R by
providing a framework of S4 classes and methods. dimRed collects
dimensionality reduction methods that are implemented in R and implements
others. It gives them a common interface and provides plotting
functions for visualization and functions for quality assessment.
Funding provided by the Department for Biogeochemical Integration,
Empirical Inference of the Earth System Group, at the Max Plack
Institute for Biogeochemistry, Jena.
}
\references{
Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M.,
2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost
functions in dimensionality reduction based on similarity
preservation. Neurocomputing. 112,
92-107. doi:10.1016/j.neucom.2012.12.036
Lee, J.A., Lee, J.A., Verleysen, M., 2008. Rank-based quality
assessment of nonlinear dimensionality reduction. Proceedings of
ESANN 2008 49-54.
Chen, L., Buja, A., 2006. Local Multidimensional Scaling for
Nonlinear Dimension Reduction, Graph Layout and Proximity Analysis.
}
\seealso{
Useful links:
\itemize{
\item \url{https://www.guido-kraemer.com/software/dimred/}
\item Report bugs at \url{https://github.com/gdkrmr/dimRed/issues}
}
}
\author{
\strong{Maintainer}: Guido Kraemer \email{guido.kraemer@uni-leipzig.de}
}
dimRed/man/makeKNNgraph.Rd 0000644 0001762 0000144 00000001415 13065033470 015030 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{makeKNNgraph}
\alias{makeKNNgraph}
\title{makeKNNgraph}
\usage{
makeKNNgraph(x, k, eps = 0, diag = FALSE)
}
\arguments{
\item{x}{data, a matrix, observations in rows, dimensions in
columns}
\item{k}{the number of nearest neighbors.}
\item{eps}{number, if \code{eps > 0} the KNN search is approximate,
see \code{\link[RANN]{nn2}}}
\item{diag}{logical, if \code{TRUE} every edge of the returned
graph will have an edge with weight \code{0} to itself.}
}
\value{
an object of type \code{\link[igraph]{igraph}} with edge
weight being the distances.
}
\description{
Create a K-nearest neighbor graph from data x. Uses
\code{\link[RANN]{nn2}} as a fast way to find the neares neighbors.
}
dimRed/man/getOrgData.Rd 0000644 0001762 0000144 00000000475 13065033470 014550 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{getOrgData}
\alias{getOrgData}
\title{Method getOrgData}
\usage{
getOrgData(object, ...)
}
\arguments{
\item{object}{The object to extract data from.}
\item{...}{other arguments.}
}
\description{
Extract the Original data.
}
dimRed/man/as.dimRedData.Rd 0000644 0001762 0000144 00000001746 13753034327 015137 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R, R/dimRedData-class.R
\name{as.dimRedData}
\alias{as.dimRedData}
\alias{as.dimRedData,formula-method}
\title{Converts to dimRedData}
\usage{
as.dimRedData(formula, ...)
\S4method{as.dimRedData}{formula}(formula, data)
}
\arguments{
\item{formula}{The formula, left hand side is assigned to the meta slot right
hand side is assigned to the data slot.}
\item{...}{other arguments.}
\item{data}{Will be coerced into a \code{\link{data.frame}} with
\code{\link{as.data.frame}}}
}
\description{
Conversion functions to dimRedData.
}
\section{Methods (by class)}{
\itemize{
\item \code{formula}: Convert a \code{data.frame} to a dimRedData
object using a formula
}}
\examples{
## create a dimRedData object using a formula
as.dimRedData(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
iris)[1:5]
}
\seealso{
Other dimRedData:
\code{\link{dimRedData-class}}
}
\concept{dimRedData}
dimRed/man/DrL-class.Rd 0000644 0001762 0000144 00000005041 13753034327 014313 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/graph_embed.R
\docType{class}
\name{DrL-class}
\alias{DrL-class}
\alias{DrL}
\title{Distributed Recursive Graph Layout}
\description{
An S4 Class implementing Distributed recursive Graph Layout.
}
\details{
DrL uses a complex algorithm to avoid local minima in the graph
embedding which uses several steps.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
DrL can take the following parameters:
\describe{
\item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
\item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
\item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
}
}
\section{Implementation}{
Wraps around \code{\link[igraph]{layout_with_drl}}. The parameters
maxiter, epsilon and kkconst are set to the default values and
cannot be set, this may change in a future release. The DimRed
Package adds an extra sparsity parameter by constructing a knn
graph which also may improve visualization quality.
}
\examples{
\dontrun{
dat <- loadDataSet("Swiss Roll", n = 200)
emb <- embed(dat, "DrL")
plot(emb, type = "2vars")
}
}
\references{
Martin, S., Brown, W.M., Wylie, B.N., 2007. Dr.l: Distributed Recursive
(graph) Layout (No. dRl; 002182MLTPL00). Sandia National Laboratories.
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{FastICA-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/as.data.frame.Rd 0000644 0001762 0000144 00000001242 13065033470 015124 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{as.data.frame}
\alias{as.data.frame}
\title{Converts to data.frame}
\usage{
as.data.frame(x, row.names, optional, ...)
}
\arguments{
\item{x}{The object to be converted}
\item{row.names}{unused in \code{dimRed}}
\item{optional}{unused in \code{dimRed}}
\item{...}{other arguments.}
}
\description{
General conversions of objects created by \code{dimRed} to \code{data.frame}.
See class documentations for details (\code{\link{dimRedData}},
\code{\link{dimRedResult}}). For the documentation of this function in base
package, see here: \code{\link[base]{as.data.frame.default}}.
}
dimRed/man/plot.Rd 0000644 0001762 0000144 00000003700 13753034327 013505 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot.R
\name{plot}
\alias{plot}
\alias{plot.dimRed}
\alias{plot,dimRedData,ANY-method}
\alias{plot.dimRedData}
\alias{plot,dimRedResult,ANY-method}
\alias{plot.dimRedResult}
\title{Plotting of dimRed* objects}
\usage{
plot(x, y, ...)
\S4method{plot}{dimRedData,ANY}(
x,
type = "pairs",
vars = seq_len(ncol(x@data)),
col = seq_len(min(3, ncol(x@meta))),
...
)
\S4method{plot}{dimRedResult,ANY}(
x,
type = "pairs",
vars = seq_len(ncol(x@data@data)),
col = seq_len(min(3, ncol(x@data@meta))),
...
)
}
\arguments{
\item{x}{dimRedResult/dimRedData class, e.g. output of
embedded/loadDataSet}
\item{y}{Ignored}
\item{...}{handed over to the underlying plotting function.}
\item{type}{plot type, one of \code{c("pairs", "parpl", "2vars",
"3vars", "3varsrgl")}}
\item{vars}{the axes of the embedding to use for plotting}
\item{col}{the columns of the meta slot to use for coloring, can be
referenced as the column names or number of x@data}
}
\description{
Plots a object of class dimRedResult and dimRedData. For the
documentation of the plotting function in base see here:
\code{\link{plot.default}}.
}
\details{
Plotting functions for the classes usind in \code{dimRed}. they are
intended to give a quick overview over the results, so they are
somewhat inflexible, e.g. it is hard to modify color scales or
plotting parameters.
If you require more control over plotting, it is better to convert
the object to a \code{data.frame} first and use the standard
functions for plotting.
}
\section{Methods (by class)}{
\itemize{
\item \code{x = dimRedData,y = ANY}: Ploting of dimRedData objects
\item \code{x = dimRedResult,y = ANY}: Ploting of dimRedResult objects.
}}
\examples{
scurve = loadDataSet("3D S Curve")
plot(scurve, type = "pairs", main = "pairs plot of S curve")
plot(scurve, type = "parpl")
plot(scurve, type = "2vars", vars = c("y", "z"))
plot(scurve, type = "3vars")
}
dimRed/man/reconstruction_rmse-dimRedResult-method.Rd 0000644 0001762 0000144 00000002270 13753034327 022516 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/quality.R
\name{reconstruction_rmse,dimRedResult-method}
\alias{reconstruction_rmse,dimRedResult-method}
\alias{reconstruction_rmse}
\title{Method reconstruction_rmse}
\usage{
\S4method{reconstruction_rmse}{dimRedResult}(object)
}
\arguments{
\item{object}{of class dimRedResult}
}
\description{
Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping.
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{plot_R_NX}()},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/FruchtermanReingold-class.Rd 0000644 0001762 0000144 00000004520 13753034327 017575 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/graph_embed.R
\docType{class}
\name{FruchtermanReingold-class}
\alias{FruchtermanReingold-class}
\alias{FruchtermanReingold}
\title{Fruchterman Reingold Graph Layout}
\description{
An S4 Class implementing the Fruchterman Reingold Graph Layout
algorithm.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
\describe{
\item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
\item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
\item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
}
}
\section{Implementation}{
Wraps around \code{\link[igraph]{layout_with_fr}}, see there for
details. The Fruchterman Reingold algorithm puts the data into
a circle and puts connected points close to each other.
}
\examples{
dat <- loadDataSet("Swiss Roll", n = 100)
emb <- embed(dat, "FruchtermanReingold")
plot(emb, type = "2vars")
}
\references{
Fruchterman, T.M.J., Reingold, E.M., 1991. Graph drawing by force-directed
placement. Softw: Pract. Exper. 21, 1129-1164.
https://doi.org/10.1002/spe.4380211102
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FastICA-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/man/print.Rd 0000644 0001762 0000144 00000000477 13065033470 013665 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/misc.R
\name{print}
\alias{print}
\title{Method print}
\usage{
print(x, ...)
}
\arguments{
\item{x}{The object to be printed.}
\item{...}{Other arguments for printing.}
}
\description{
Imports the print method into the package namespace.
}
dimRed/man/plot_R_NX.Rd 0000644 0001762 0000144 00000004252 13753034327 014376 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot.R
\name{plot_R_NX}
\alias{plot_R_NX}
\title{plot_R_NX}
\usage{
plot_R_NX(x, ndim = NA, weight = "inv")
}
\arguments{
\item{x}{a list of \code{\link{dimRedResult}} objects. The names of the list
will appear in the legend with the AUC_lnK value.}
\item{ndim}{the number of dimensions, if \code{NA} the original number of
embedding dimensions is used, can be a vector giving the embedding
dimensionality for each single list element of \code{x}.}
\item{weight}{the weight function used for K when calculating the AUC, one of
\code{c("inv", "log", "log10")}}
}
\value{
A ggplot object, the design can be changed by appending
\code{theme(...)}
}
\description{
Plot the R_NX curve for different embeddings. Takes a list of
\code{\link{dimRedResult}} objects as input.
Also the Area under the curve values are computed for a weighted K
(see \link{AUC_lnK_R_NX} for details) and appear in the legend.
}
\examples{
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 200)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## plot the R_NX curves:
plot_R_NX(data_emb) +
ggplot2::theme(legend.title = ggplot2::element_blank(),
legend.position = c(0.5, 0.1),
legend.justification = c(0.5, 0.1))
}
\seealso{
Other Quality scores for dimensionality reduction:
\code{\link{AUC_lnK_R_NX,dimRedResult-method}},
\code{\link{LCMC,dimRedResult-method}},
\code{\link{Q_NX,dimRedResult-method}},
\code{\link{Q_global,dimRedResult-method}},
\code{\link{Q_local,dimRedResult-method}},
\code{\link{R_NX,dimRedResult-method}},
\code{\link{cophenetic_correlation,dimRedResult-method}},
\code{\link{distance_correlation,dimRedResult-method}},
\code{\link{mean_R_NX,dimRedResult-method}},
\code{\link{quality,dimRedResult-method}},
\code{\link{reconstruction_error,dimRedResult-method}},
\code{\link{reconstruction_rmse,dimRedResult-method}},
\code{\link{total_correlation,dimRedResult-method}}
}
\concept{Quality scores for dimensionality reduction}
dimRed/man/FastICA-class.Rd 0000644 0001762 0000144 00000004514 13753034327 015050 0 ustar ligges users % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fastica.R
\docType{class}
\name{FastICA-class}
\alias{FastICA-class}
\alias{FastICA}
\title{Independent Component Analysis}
\description{
An S4 Class implementing the FastICA algorithm for Indepentend
Component Analysis.
}
\details{
ICA is used for blind signal separation of different sources. It is
a linear Projection.
}
\section{Slots}{
\describe{
\item{\code{fun}}{A function that does the embedding and returns a
dimRedResult object.}
\item{\code{stdpars}}{The standard parameters for the function.}
}}
\section{General usage}{
Dimensionality reduction methods are S4 Classes that either be used
directly, in which case they have to be initialized and a full
list with parameters has to be handed to the \code{@fun()}
slot, or the method name be passed to the embed function and
parameters can be given to the \code{...}, in which case
missing parameters will be replaced by the ones in the
\code{@stdpars}.
}
\section{Parameters}{
FastICA can take the following parameters:
\describe{
\item{ndim}{The number of output dimensions. Defaults to \code{2}}
}
}
\section{Implementation}{
Wraps around \code{\link[fastICA]{fastICA}}. FastICA uses a very
fast approximation for negentropy to estimate statistical
independences between signals. Because it is a simple
rotation/projection, forward and backward functions can be given.
}
\examples{
dat <- loadDataSet("3D S Curve")
emb <- embed(dat, "FastICA", ndim = 2)
plot(getData(getDimRedData(emb)))
}
\references{
Hyvarinen, A., 1999. Fast and robust fixed-point algorithms for independent
component analysis. IEEE Transactions on Neural Networks 10, 626-634.
https://doi.org/10.1109/72.761722
}
\seealso{
Other dimensionality reduction methods:
\code{\link{AutoEncoder-class}},
\code{\link{DRR-class}},
\code{\link{DiffusionMaps-class}},
\code{\link{DrL-class}},
\code{\link{FruchtermanReingold-class}},
\code{\link{HLLE-class}},
\code{\link{Isomap-class}},
\code{\link{KamadaKawai-class}},
\code{\link{LLE-class}},
\code{\link{MDS-class}},
\code{\link{NNMF-class}},
\code{\link{PCA-class}},
\code{\link{PCA_L1-class}},
\code{\link{UMAP-class}},
\code{\link{dimRedMethod-class}},
\code{\link{dimRedMethodList}()},
\code{\link{kPCA-class}},
\code{\link{nMDS-class}},
\code{\link{tSNE-class}}
}
\concept{dimensionality reduction methods}
dimRed/DESCRIPTION 0000644 0001762 0000144 00000003010 14153365332 013163 0 ustar ligges users Package: dimRed
Title: A Framework for Dimensionality Reduction
Version: 0.2.4
Authors@R: c(
person("Guido", "Kraemer",
email = "guido.kraemer@uni-leipzig.de",
role = c("aut", "cre"))
)
Description: A collection of dimensionality reduction
techniques from R packages and a common
interface for calling the methods.
Depends: R (>= 3.0.0), DRR
Imports: magrittr, methods
Suggests: NMF, MASS, Matrix, RANN, RSpectra, Rtsne, cccd, coRanking,
diffusionMap, energy, fastICA, ggplot2, graphics, igraph,
keras, kernlab, knitr, lle, loe, optimx, pcaL1, pcaPP,
reticulate, rgl, scales, scatterplot3d, stats, tensorflow,
testthat, tidyr, tinytex, umap, vegan
VignetteBuilder: knitr
License: GPL-3 | file LICENSE
BugReports: https://github.com/gdkrmr/dimRed/issues
URL: https://www.guido-kraemer.com/software/dimred/
Encoding: UTF-8
Collate: 'dimRedMethod-class.R' 'misc.R' 'dimRedData-class.R'
'dimRedResult-class.R' 'autoencoder.R' 'dataSets.R' 'diffmap.R'
'dimRed.R' 'drr.R' 'embed.R' 'fastica.R' 'get_info.R'
'graph_embed.R' 'hlle.R' 'isomap.R' 'kpca.R' 'l1pca.R' 'leim.R'
'lle.R' 'loe.R' 'mds.R' 'mixColorSpaces.R' 'nmds.R' 'nnmf.R'
'pca.R' 'plot.R' 'quality.R' 'rotate.R' 'soe.R' 'tsne.R'
'umap.R'
RoxygenNote: 7.1.2
NeedsCompilation: yes
Packaged: 2021-12-05 20:26:06 UTC; gkraemer
Author: Guido Kraemer [aut, cre]
Maintainer: Guido Kraemer
Repository: CRAN
Date/Publication: 2021-12-06 10:50:02 UTC
dimRed/build/ 0000755 0001762 0000144 00000000000 14153220135 012551 5 ustar ligges users dimRed/build/vignette.rds 0000644 0001762 0000144 00000000333 14153220135 015107 0 ustar ligges users b```b`adb`b2 1#'IM+K,-JM)M.rJU%̂44R:^Xt%Z_g;<f&6̜TB2K 7(1ݍ(\G^PT6 @9XVr7 ) dimRed/tests/ 0000755 0001762 0000144 00000000000 14153220136 012615 5 ustar ligges users dimRed/tests/testthat/ 0000755 0001762 0000144 00000000000 14153365332 014465 5 ustar ligges users dimRed/tests/testthat/test_HLLE.R 0000644 0001762 0000144 00000000247 13464507204 016376 0 ustar ligges users context("HLLE")
test_that("HLLE", {
expect_error(embed(iris[1:4], "HLLE", ndim = 1, .mute = c("message", "output")),
"ndim must be 2 or larger.")
})
dimRed/tests/testthat/test_dimRedMethod-class.R 0000644 0001762 0000144 00000000716 13464507204 021323 0 ustar ligges users context("dimRedMethod-class")
test_that("pars matching", {
for (m in dimRedMethodList()) {
mo <- getMethodObject(m)
expect(
all.equal(
mo@stdpars,
matchPars(mo, list())
),
paste("par matching for", m, "failed")
)
}
expect_warning(
embed(iris[1:4], "PCA", asdf = 1234),
"Parameter matching: asdf is not a standard parameter, ignoring."
)
})
dimRed/tests/testthat/test_isomap.R 0000644 0001762 0000144 00000002104 13371631672 017140 0 ustar ligges users
context("isomap")
## no isomap specific tests, because forward method is not really
## exact.
test_that("check vs vegan isomap", {
eps <- 1e-8
a <- loadDataSet("3D S Curve", n = 200)
vegiso <- vegan::isomap(dist(getData(a)), k = 8, ndim = 2)
vegy <- vegan::scores(vegiso)
drdiso <- embed(a, "Isomap", knn = 8, ndim = 2)
drdy <- drdiso@data@data
## Randomly fails:
## expect_equivalent(drdy, vegy)
err1 <- max(abs(drdy - vegy))
drdy[, 2] <- -drdy[, 2]
err2 <- max(abs(drdy - vegy))
drdy[, 1] <- -drdy[, 1]
err3 <- max(abs(drdy - vegy))
drdy[, 2] <- -drdy[, 2]
err4 <- max(abs(drdy - vegy))
err <- min(err1, err2, err3, err4)
expect_true(err < eps, info = paste0("err = ", err,
", eps = ", eps,
", expected err < eps"))
})
test_that("check other.data", {
a <- loadDataSet("3D S Curve", n = 200)
drdiso <- embed(a, "Isomap", knn = 8, ndim = 2, get_geod = TRUE)
expect_true(inherits(getOtherData(drdiso)$geod, "dist"))
})
dimRed/tests/testthat/test_diffmap.R 0000644 0001762 0000144 00000000537 13464507204 017262 0 ustar ligges users context("DiffusionMaps")
test_that("DiffusionMaps", {
expect_s4_class(embed(iris[1:4], "DiffusionMaps", ndim = 1,
.mute = c("message", "output")),
"dimRedResult")
x <- embed(iris[1:4], "DiffusionMaps", ndim = 1,
.mute = c("message", "output"))
expect_equal(dim(x@data@data), c(150, 1))
})
dimRed/tests/testthat/test_umap.R 0000644 0001762 0000144 00000003764 14153173664 016630 0 ustar ligges users
context("UMAP")
skip_if_no_umap_learn <- function() {
if (!reticulate::py_module_available("umap") &&
Sys.getenv("BNET_FORCE_UMAP_TESTS") != 1)
skip("umap-learn not available, install with `pip install umap-learn==0.4`")
}
test_that("UMAP python", {
skip_if_no_umap_learn()
res1 <- embed(iris[1:4], "UMAP", .mute = c("message", "output"))
res2 <- embed(iris[1:4], "UMAP", .mute = c("message", "output"), knn = 20)
expect_s4_class(res1, "dimRedResult")
expect_equal(res1@method, "UMAP")
expect_equal(res1@pars$d, "euclidean")
expect_equal(res1@pars$knn, 15)
expect_equal(res1@pars$method, "umap-learn")
expect_equal(res1@pars$ndim, 2)
expect_s4_class(res2, "dimRedResult")
expect_equal(res2@method, "UMAP")
expect_equal(res2@pars$d, "euclidean")
expect_equal(res2@pars$knn, 20)
expect_equal(res2@pars$method, "umap-learn")
expect_equal(res2@pars$ndim, 2)
expect_true(any(res1@data@data != res2@data@data))
pred1 <- predict(res1, iris[1:4])
pred2 <- predict(res2, iris[1:4])
expect_equal(dim(pred1@data), dim(res1@data@data))
expect_equal(dim(pred2@data), dim(res2@data@data))
})
test_that("UMAP R", {
res1 <- embed(iris[1:4], "UMAP", method = "naive", .mute = c("message", "output"))
res2 <- embed(iris[1:4], "UMAP", method = "naive", .mute = c("message", "output"), knn = 20)
expect_s4_class(res1, "dimRedResult")
expect_equal(res1@method, "UMAP")
expect_equal(res1@pars$d, "euclidean")
expect_equal(res1@pars$knn, 15)
expect_equal(res1@pars$method, "naive")
expect_equal(res1@pars$ndim, 2)
expect_s4_class(res2, "dimRedResult")
expect_equal(res2@method, "UMAP")
expect_equal(res2@pars$d, "euclidean")
expect_equal(res2@pars$knn, 20)
expect_equal(res2@pars$method, "naive")
expect_equal(res2@pars$ndim, 2)
expect_true(any(res1@data@data != res2@data@data))
pred1 <- predict(res1, iris[1:4])
pred2 <- predict(res2, iris[1:4])
expect_equal(dim(pred1@data), dim(res1@data@data))
expect_equal(dim(pred2@data), dim(res2@data@data))
})
dimRed/tests/testthat/test_kPCA.R 0000644 0001762 0000144 00000005455 13371631672 016442 0 ustar ligges users
data(iris)
context("kPCA")
test_that("general data conversions", {
irisData <- loadDataSet("Iris")
expect_equal(class(irisData)[1], "dimRedData")
irisPars <- list()
irisPars[[length(irisPars) + 1]] <-
list(kernel = "rbfdot",
kpar = list(sigma = 0.1))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "rbfdot",
kpar = list(sigma = 1))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "polydot",
kpar = list(degree = 3))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "vanilladot",
kpar = list())
irisPars[[length(irisPars) + 1]] <-
list(kernel = "laplacedot",
kpar = list(sigma = 1))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "laplacedot",
kpar = list(sigma = 0.1))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "besseldot",
kpar = list(sigma = 0.1,
order = 1,
degree = 1))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "besseldot",
kpar = list(sigma = 1,
order = 2,
degree = 3))
irisPars[[length(irisPars) + 1]] <-
list(kernel = "splinedot",
kpar = list())
irisRes <- lapply(irisPars, function(x)
do.call(
function(...) tryCatch(embed(.data = irisData,
.method = "kPCA", ...),
error = function(e) as.character(e)),
x
) )
for (i in 1:length(irisRes)) {
if (inherits(irisRes[[i]], "character")){
expect(grepl("singular", irisRes[[i]]),
"singular")
} else {
expect(inherits(irisRes[[i]], "dimRedResult"),
'should be of class "dimRedResult"')
}
}
## This test fails with multithreaded blas
## for (i in 1:length(irisRes)){
## if (inherits(irisRes[[i]], "dimRedResult")){
## expect_equal(irisRes[[i]]@apply(irisData)@data[, 1:2],
## irisRes[[i]]@data@data)
## expect_equal(2, getNDim(irisRes[[i]]))
## ## the reverse is an approximate:
## expect_less_than(
## max(
## irisRes[[i]]@inverse(irisRes[[i]]@data)@data - irisData@data
## ), 300,
## ## paste0("inverse of kpca is an approximate, ",
## ## "so this may fail due to numerical inaccuracy")
## )
## }
## }
## This one cannot calculate an inverse:
kpca.fit <- embed(loadDataSet("3D S", n = 200),
"kPCA", kernel = "splinedot", kpar = list())
expect( is.na(kpca.fit@inverse(1)), "The inverse should return NA" )
})
dimRed/tests/testthat/test_dimRedData.R 0000644 0001762 0000144 00000002601 13371631672 017650 0 ustar ligges users
context("the dimRedData class")
test_that("constructor", {
expect_equal(dimRedData(), new("dimRedData",
data = matrix(numeric(0),
nrow = 0, ncol = 0),
meta = data.frame()))
expect_error(dimRedData(iris))
expect_s4_class(dimRedData(iris[, 1:4], iris[, 5]), "dimRedData")
expect_s4_class(dimRedData(iris[, 1:4]), "dimRedData")
expect_error(dimRedData(iris))
})
test_that("conversion functions", {
expect_equal(as(iris[, 1:4], "dimRedData"), dimRedData(iris[, 1:4]))
expect_error(as(iris, "dimRedData"))
expect_equal(as(loadDataSet("Iris"), "data.frame"),
as.data.frame(loadDataSet("Iris")))
expect_equivalent(as.dimRedData(
Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, iris),
loadDataSet("Iris")
)
})
test_that("misc functions", {
Iris <- loadDataSet("Iris")
expect_equal(getData(Iris), Iris@data)
expect_equal(getMeta(Iris), Iris@meta)
## No idea why this one is broken with --run-donttest --run-dontrun --timings
## Also broken for devtools::test("dimRed")
expect_equal(nrow(Iris), 150)
expect_equal(Iris[1:4], Iris[1:4, ])
expect_equal(Iris[1:4], Iris[c(rep(TRUE, 4), rep(FALSE, 146))])
expect_equal(Iris[1:4], Iris[c(rep(TRUE, 4), rep(FALSE, 146)), ])
})
dimRed/tests/testthat/test_PCA_L1.R 0000644 0001762 0000144 00000006450 13464507273 016621 0 ustar ligges users
context("PCA L1")
test_that("general data conversions", {
skip_if_not_installed("pcaL1")
irisData <- as(iris[, 1:4], "dimRedData")
expect_equal(class(irisData)[1], "dimRedData")
irisParsCS <- list(center = TRUE, .mute = c("message", "output"), ndim = 4, scale. = TRUE, projections = "l1", fun = "l1pca")
irisParsC <- list(center = TRUE, .mute = c("message", "output"), ndim = 4, scale. = FALSE, projections = "l1", fun = "l1pca")
irisParsS <- list(center = TRUE, .mute = c("message", "output"), ndim = 4, scale. = TRUE, projections = "l1", fun = "l1pcahp")
irisPars <- list(center = FALSE, .mute = c("message", "output"), ndim = 4, scale. = FALSE, projections = "l1", fun = "l1pcastar")
irisResCS <- do.call(function(...) embed(irisData, "PCA_L1", ...), irisParsCS)
irisResS <- do.call(function(...) embed(irisData, "PCA_L1", ...), irisParsS)
irisResC <- do.call(function(...) embed(irisData, "PCA_L1", ...), irisParsC)
irisRes <- do.call(function(...) embed(irisData, "PCA_L1", ...), irisPars)
expect_equal(4, getNDim(irisResCS))
expect_equal(4, getNDim(irisResS))
expect_equal(4, getNDim(irisResC))
expect_equal(4, getNDim(irisRes))
expect_equal(class(irisResCS)[1], "dimRedResult")
expect_equal(class(irisResS)[1], "dimRedResult")
expect_equal(class(irisResC)[1], "dimRedResult")
expect_equal(class(irisRes)[1], "dimRedResult")
expect_equal(irisResCS@apply(irisData), irisResCS@data)
expect_equal(irisResS@apply(irisData), irisResS@data)
expect_equal(irisResC@apply(irisData), irisResC@data)
expect_equal(irisRes@apply(irisData), irisRes@data)
expect(sqrt(mean(
(irisResCS@inverse(irisResCS@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisResS@inverse(irisResS@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisResC@inverse(irisResC@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisRes@inverse(irisRes@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
scale2 <- function(x, center, scale.) scale(x, center, scale.)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResCS),
irisParsCS[c("center", "scale.")]),
getData( getDimRedData(irisResCS) ),
tolerance = 1e-2
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResS),
irisParsS[c("center", "scale.")]),
getData( getDimRedData(irisResS) ),
tolerance = 1e-2
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResC),
irisParsC[c("center", "scale.")]),
getData( getDimRedData(irisResC) ),
tolerance = 1e-2
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisRes),
irisPars[c("center", "scale.")]),
getData( getDimRedData(irisRes) ),
tolerance = 1e-2
)
expect_s4_class({ embed(iris[1:4], "PCA_L1", ndim = 1,
.mute = c("message", "output")) },
"dimRedResult")
})
dimRed/tests/testthat/test_misc.R 0000644 0001762 0000144 00000001760 13371631672 016612 0 ustar ligges users context("misc functions")
a <- matrix(rnorm(25), 5, 5)
b <- matrix(rnorm(25), 5, 5)
test_that("squared euclidean distance", {
expect_equivalent(
t(as.matrix(dist(rbind(a, b)))[6:10, 1:5] ^ 2),
pdist2(a, b)
)
})
test_that("formula functions", {
expect_equal(rhs(a + b ~ c + d), ~ c + d + 0)
expect_equal(lhs(a + b ~ c + d), ~ a + b + 0)
})
test_that("makeEpsGraph", {
check_makeEpsGraph <- function(x, eps){
naive <- as.matrix(dist(x))
naive[naive >= eps] <- 0
epsSp <- as.matrix(makeEpsSparseMatrix(x, eps))
all(naive == epsSp)
}
expect_true(check_makeEpsGraph(iris[1:4], 1000))
expect_true(check_makeEpsGraph(iris[1:4], 1))
expect_true(check_makeEpsGraph(iris[1:4], 0.5))
})
test_that("getRotationMatrixFail", {
irisData <- as(iris[, 1:4], "dimRedData")
expect_equal(class(irisData)[1], "dimRedData")
irisRes <- embed(irisData, "tSNE")
expect_error(getRotationMatrix(irisRes), "Not implemented for")
})
dimRed/tests/testthat/test_embed.R 0000644 0001762 0000144 00000000172 13371631672 016727 0 ustar ligges users
context("embed")
test_that("standard method is PCA", {
res <- embed(iris[1:4])
expect_equal(res@method, "PCA")
})
dimRed/tests/testthat/test_drr.R 0000644 0001762 0000144 00000000523 13371631672 016442 0 ustar ligges users
context("DRR")
test_that("drr forward and backward passes", {
spiral <- loadDataSet("Helix", n = 200)
drr_spiral <- embed(spiral, "DRR", ndim = 3, .mute = c("message", "output"))
expect_equal(3, getNDim(drr_spiral))
dsa <- drr_spiral@apply(spiral)
dsi <- drr_spiral@inverse(dsa)
expect_equal(dsi, spiral)
})
dimRed/tests/testthat/test_PCA.R 0000644 0001762 0000144 00000005045 13142050030 016235 0 ustar ligges users
context("PCA")
test_that("general data conversions", {
irisData <- as(iris[, 1:4], "dimRedData")
expect_equal(class(irisData)[1], "dimRedData")
irisParsCS <- list(center = TRUE, scale. = TRUE)
irisParsC <- list(center = TRUE, scale. = FALSE)
irisParsS <- list(center = FALSE, scale. = TRUE)
irisPars <- list(center = FALSE, scale. = FALSE)
irisResCS <- do.call(function(...) embed(irisData, "PCA", ...), irisParsCS)
irisResS <- do.call(function(...) embed(irisData, "PCA", ...), irisParsS)
irisResC <- do.call(function(...) embed(irisData, "PCA", ...), irisParsC)
irisRes <- do.call(function(...) embed(irisData, "PCA", ...), irisPars)
expect_equal(2, getNDim(irisResCS))
expect_equal(2, getNDim(irisResS))
expect_equal(2, getNDim(irisResC))
expect_equal(2, getNDim(irisRes))
expect_equal(class(irisResCS)[1], "dimRedResult")
expect_equal(class(irisResS)[1], "dimRedResult")
expect_equal(class(irisResC)[1], "dimRedResult")
expect_equal(class(irisRes)[1], "dimRedResult")
expect_equal(irisResCS@apply(irisData), irisResCS@data)
expect_equal(irisResS@apply(irisData), irisResS@data)
expect_equal(irisResC@apply(irisData), irisResC@data)
expect_equal(irisRes@apply(irisData), irisRes@data)
expect(sqrt(mean(
(irisResCS@inverse(irisResCS@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisResS@inverse(irisResS@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisResC@inverse(irisResC@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect(sqrt(mean(
(irisRes@inverse(irisRes@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
scale2 <- function(x, center, scale.) scale(x, center, scale.)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResCS), irisParsCS),
getData( getDimRedData(irisResCS) )
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResS), irisParsS),
getData( getDimRedData(irisResS) )
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisResC), irisParsC),
getData( getDimRedData(irisResC) )
)
expect_equal(
do.call(function(...) scale2(iris[1:4], ...) %*% getRotationMatrix(irisRes), irisPars),
getData( getDimRedData(irisRes) )
)
})
dimRed/tests/testthat/test_dimRedResult.R 0000644 0001762 0000144 00000001066 13464507204 020255 0 ustar ligges users
context("dimRedResult-class")
test_that("predict/inverse methods", {
dat <- loadDataSet("Iris")
emb <- embed(dat, "PCA", ndim = 4)
pred <- predict(emb, dat)
inv <- inverse(emb, pred)
expect_equal(getDimRedData(emb), pred)
expect_equal(dat, inv)
emb2 <- embed(dat, "tSNE")
expect_error(predict(emb2, dat))
expect_error(inverse(emb2, dat))
})
test_that("conversion", {
iris_data_frame_as <- as(embed(loadDataSet("Iris"), "PCA"), "data.frame")
expect_equal(colnames(iris_data_frame_as), c("meta.Species", "PC1", "PC2", colnames(iris)[-5]))
})
dimRed/tests/testthat/test_NNMF.R 0000644 0001762 0000144 00000012556 13464507273 016424 0 ustar ligges users skip_if_no_NMF <- function() {
if (!requireNamespace("NMF", quietly = TRUE) &&
Sys.getenv("BNET_FORCE_NNMF_TESTS") != "1")
skip("NMF not available for testing")
}
context("NNMF")
## if we don't load the library explicitly, the predict function does not work
## (sometimes...).
## library(NMF)
ints_trn <- matrix(seq(0, 98, by = 2), ncol = 5)
input_trn <- dimRedData(as.data.frame(ints_trn))
input_tst <- dimRedData(ints_trn[1:3,] + 1)
test_that("2D projection", {
skip_if_no_NMF()
dim_2_defaults <- embed(input_trn, "NNMF", seed = 13, nrun = 1)
expect_equal(dim_2_defaults@method, "NNMF")
## Expected results from
## tmp <- NMF::nmf(t(ints_trn), rank = 2, nrun = 1, seed = 13)
## coefs <- basis(tmp)
## rownames(coefs) <- paste0("V", 1:5)
## colnames(coefs) <- paste0("NNMF", 1:2)
## coefs
## dput(coefs)
dim_2_coef <- structure(
c(18.807241710186, 30.2191667888959,
32.1069052462692, 9.53490906878683,
164.109205703974, 0.00064246562138093,
24.3924277525021, 56.4301459918642,
108.103923297376, 17.566220349863),
.Dim = c(5L, 2L),
.Dimnames = list(c("V1", "V2", "V3", "V4", "V5"),
c("NNMF1", "NNMF2")))
expect_equivalent(dim_2_defaults@other.data$w, dim_2_coef)
dim_2_apply <- dim_2_defaults@apply(input_tst)@data
dim_2_pred <- predict(dim_2_defaults, input_tst)@data
## Expected results from
## t(solve(crossprod(basis(tmp)), t(input_tst@data %*% basis(tmp))))
## preds <- getData(input_tst) %*% t(MASS::ginv(basis(tmp)))
## getData(getDimRedData(dim_2_defaults))
## colnames(preds) <- paste0("NNMF", 1:2)
## dput(preds)
dim_2_exp <- structure(
c(0.427476458116875, 0.440237021147746, 0.452997584178617,
0.512256378881175, 0.5332094651398, 0.554162551398426),
.Dim = c(3L, 2L),
.Dimnames = list(NULL, c("NNMF1", "NNMF2"))
)
expect_equivalent(dim_2_apply, dim_2_exp, tolerance = 0.01)
expect_equivalent(dim_2_pred, dim_2_exp, tolerance = 0.01)
})
test_that("other arguments", {
skip_if_no_NMF()
dim_3_args <- embed(input_trn, "NNMF", seed = 13, nrun = 10,
ndim = 3, method = "KL",
options = list(.pbackend = NULL))
## Expected results from
## tmp <- NMF::nmf(t(ints_trn), rank = 3, nrun = 10, seed = 13,
## method = "KL", .pbackend = NULL)
## coefs <- t(NMF::coef(tmp))
## colnames(coefs) <- paste0("NNMF", 1:ncol(coefs))
## coefs
## dput(coefs)
## rot <- NMF::basis(tmp)
## rownames(rot) <- paste0("V", 1:nrow(rot))
## dput(rot)
dim_3_rot <- structure(
c(11.624951277152, 31.2554213278975, 50.8858913786408,
70.5163614293837, 90.1468314801264, 2.22044604925031e-16,
36.4357899711133, 72.8715799422292, 109.307369913346,
145.743159884462, 22.4019808842378, 42.1081005773292,
61.8142202704197, 81.52033996351, 101.2264596566),
.Dim = c(5L, 3L),
.Dimnames = list(c("V1", "V2", "V3", "V4", "V5"), NULL)
)
dim_3_pred <- structure(
c(2.22044604925031e-16, 0.0731742704517501, 0.194863499580201,
0.50224638618713, 0.557517908619563, 0.197219538171418,
0.0860784848917408, 0.159094934700865, 0.10366866301249,
0.216483929440989, 0.54891083782883, 0.481738298195276, 0.40204352636632,
0.274419226004639, 0.211867578024856, 0.256578985276104,
0.236980211423017, 0.16984840699324, 0.135869049278152,
0.0584647425861749, 2.22044604925031e-16, 0.0513058500137363,
0.0774360678481537, 0.00720517673339281, 0.0678012129377125,
0.344046917890136, 0.49099862480747, 0.542386371921862, 0.660426277478513,
0.691161417731563),
.Dim = c(10L, 3L),
.Dimnames = list(NULL, c("NNMF1", "NNMF2", "NNMF3"))
)
expect_equivalent(dim_3_args@other.data$w, dim_3_rot)
expect_equivalent(getData(getDimRedData(dim_3_args)), dim_3_pred)
dim_3_apply <- dim_3_args@apply(input_tst)@data
dim_3_pred <- predict(dim_3_args, input_tst)@data
## Expected results from
## crossprod(basis(tmp)) does not have full rank!!! This needs to be considered
## w <- getOtherData(dim_3_args)$w
## preds <- t(solve(crossprod(w), t(input_trn@data %*% w)))
## preds <- t(qr.solve(crossprod(w), t(input_trn@data %*% w)))
## preds <- getData(input_tst) %*% t(MASS::ginv(w))
## preds
## dput(preds)
## getData(getDimRedData(dim_3_args))
## preds - getData(getDimRedData(dim_3_args))
## input_trn@data
## input_tst@data %*% basis(tmp)
## colnames(preds) <- paste0("NNMF", 1:3)
dim_3_exp <- structure(
c(0.118730450278164, 0.144080695556738, 0.169430940835312,
0.494122495652466, 0.439293850852014, 0.384465206051563,
-0.0169733070286198, 0.0591496323928872, 0.135272571814394),
.Dim = c(3L, 3L)
)
expect_equivalent(dim_3_apply, dim_3_exp, tolerance = 0.01)
expect_equivalent(dim_3_pred, dim_3_exp, tolerance = 0.01)
})
test_that("Bad args", {
skip_if_no_NMF()
expect_error(embed(iris, "NNMF"))
expect_error(embed(iris[, 1], "NNMF"),
"`ndim` should be less than the number of columns")
expect_error(embed(iris[1:4], "NNMF", method = c("a", "b")),
"only supply one `method`")
expect_error(embed(scale(iris[1:4]), "NNMF"), "negative entries")
})
test_that("Full_rank", {
dim_2_full_rank_example <- embed(input_trn, "NNMF", ndim = ncol(input_trn@data))
dim_2_recon <- inverse(dim_2_full_rank_example, dim_2_full_rank_example@data@data)
expect_equivalent(dim_2_recon, input_trn)
})
dimRed/tests/testthat/test_ICA.R 0000644 0001762 0000144 00000001211 13142050040 016216 0 ustar ligges users
context("FastICA")
test_that("general data conversions", {
irisData <- as(iris[, 1:4], "dimRedData")
expect_equal(class(irisData)[1], "dimRedData")
irisRes <- embed(irisData, "FastICA")
expect_equal(class(irisRes)[1], "dimRedResult")
expect_equal(2, getNDim(irisRes))
expect_equal(irisRes@apply(irisData), irisRes@data)
expect(sqrt(mean(
(irisRes@inverse(irisRes@data)@data - irisData@data) ^ 2
)) < 0.3,
"error too large"
)
expect_equal(
scale(iris[1:4], TRUE, FALSE) %*% getRotationMatrix(irisRes),
unname(as.matrix(getData( getDimRedData(irisRes) )) )
)
})
dimRed/tests/testthat/test_autoencoder.R 0000644 0001762 0000144 00000022343 14153207515 020161 0 ustar ligges users
context("AutoEncoder")
skip_if_no_tensorflow <- function() {
if (!reticulate::py_module_available("tensorflow") &&
Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")
skip("TensorFlow not available for testing")
}
skip_if_no_keras <- function() {
if (!keras::is_keras_available() &&
Sys.getenv("BNET_FORCE_AUTOENCODER_TESTS") != "1")
skip("Keras not available for testing")
}
test_that("Check if tensorflow is installed correctly.", {
skip_if_no_tensorflow()
library(tensorflow)
tensorflow::tf$compat$v1$disable_v2_behavior()
# I have not found a way to suppress the warning tf gives on first use.
sess <- tf$compat$v1$Session()
hello <- "Hello, TensorFlow!"
tf_hello <- tf$compat$v1$constant(hello)
tf_hello_res <- sess$run(tf_hello)
# in python 3 this returns a `bytes` object $decode() transforms it into a
# sting, in python 2 this is a simple string
if(!is.character(tf_hello_res))
tf_hello_res <- tf_hello_res$decode()
## print("tf_hello_res:")
## print(str(tf_hello_res))
## print(tf_hello_res)
expect(tf_hello_res == hello, paste("tensorflow does not work:\n",
"hello =", hello, "\n",
"sess$run(tf_hello) =", tf_hello_res))
})
test_that("Check errors when building autoencoder.", {
skip_if_no_tensorflow()
iris_data <- as(iris[, 1:4], "dimRedData")
expect_error(embed(iris_data, "AutoEncoder", activation = "sigmoid"),
"declare an activation function for each layer")
expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(1, 2, 2, 1)),
"the number of layers must be impair")
expect_error(embed(iris_data, "AutoEncoder", weight_decay = -1),
"weight decay must be > 0")
expect_error(embed(iris_data, "AutoEncoder", learning_rate = -1),
"learning rate must be > 0")
expect_error(embed(iris_data, "AutoEncoder", n_steps = -1),
"n_steps must be > 0")
expect_error(embed(iris_data, "AutoEncoder", n_hidden = c(4, 2, 4), ndim = 3),
"the middle of n_hidden must be equal to ndim")
})
test_that("using autoencoder with parameters", {
skip_if_no_tensorflow()
iris_data <- as(iris[, 1:4], "dimRedData")
expect_equal(class(iris_data)[1], "dimRedData")
ae <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder",
n_hidden = c(10, x, 10),
ndim = x,
n_steps = 100))
aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse"))
lapply(ae, function(x) expect_s4_class(x, "dimRedResult"))
## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions")
## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions")
## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions")
lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]])))
ae <- lapply(1:2, function(x) embed(iris_data,
"AutoEncoder",
n_hidden = c(10, x, 10),
ndim = x,
weight_decay = 0.1,
n_steps = 100))
aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse"))
lapply(ae, function(x) expect_s4_class(x, "dimRedResult"))
## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions")
## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions")
## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions")
lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]])))
ae <- lapply(1:2, function(x) embed(iris_data,
"AutoEncoder",
n_hidden = c(10, x, 10),
ndim = x,
learning_rate = 0.1,
weight_decay = 0.1,
n_steps = 100))
aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse"))
lapply(ae, function(x) expect_s4_class(x, "dimRedResult"))
## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions")
## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions")
## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions")
lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]])))
ae <- lapply(1:2, function(x) embed(iris_data,
"AutoEncoder",
n_hidden = c(10, x, 10),
activation = c("sigmoid", "sigmoid", "sigmoid"),
ndim = x,
learning_rate = 0.1,
weight_decay = 0.1,
n_steps = 100))
aq <- lapply(ae, function(x) quality(x, "reconstruction_rmse"))
lapply(ae, function(x) expect_s4_class(x, "dimRedResult"))
aa <- lapply(c("tanh", "sigmoid", "relu", "elu"),
function(x) embed(iris_data,
"AutoEncoder",
n_hidden = c(10, 2, 10),
activation = c("sigmoid", "sigmoid", "sigmoid"),
ndim = 2,
learning_rate = 0.1,
weight_decay = 0.1,
n_steps = 100))
aaq <- lapply(aa, function(x) quality(x, "reconstruction_rmse"))
lapply(aa, function(x) expect_s4_class(x, "dimRedResult"))
## expect(aq[[1]] > aq[[2]], "the error should decrease with more dimensions")
## expect(aq[[2]] > aq[[3]], "the error should decrease with more dimensions")
## expect(aq[[3]] > aq[[4]], "the error should decrease with more dimensions")
lapply(1:length(ae), function(x) expect_equal(x, getNDim(ae[[x]])))
})
test_that("using autoencoder with autoencoder results", {
skip_if_no_tensorflow()
tensorflow::tf$compat$v1$set_random_seed(2)
iris_data <- as(iris[, 1:4], "dimRedData")
expect_equal(class(iris_data)[1], "dimRedData")
ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder",
n_hidden = c(10, x, 10),
ndim = x, n_steps = 1))
aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse"))
ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder",
autoencoder = x, n_steps = 1000))
aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse"))
lapply(ae1, function(x) expect_s4_class(x, "dimRedResult"))
lapply(ae2, function(x) expect_s4_class(x, "dimRedResult"))
expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps")
expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps")
## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps")
## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps")
lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]])))
lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]])))
})
test_that("using autoencoder with keras", {
skip_if_no_tensorflow()
skip_if_no_keras()
encoder <- function(i) list(keras::layer_dense(units = 10,
activation = "tanh"),
keras::layer_dense(units = i))
decoder <- function() list(keras::layer_dense(units = 10,
activation = "tanh"),
keras::layer_dense(units = 4))
iris_data <- as(iris[, 1:4], "dimRedData")
ae1 <- lapply(1:2, function(x) embed(iris_data, "AutoEncoder",
keras_graph = list(encoder = encoder(x),
decoder = decoder()),
n_steps = 2))
aq1 <- lapply(ae1, function(x) quality(x, "reconstruction_rmse"))
ae2 <- lapply(ae1, function(x) embed(iris_data, "AutoEncoder",
autoencoder = x))
aq2 <- lapply(ae2, function(x) quality(x, "reconstruction_rmse"))
lapply(ae1, function(x) expect_s4_class(x, "dimRedResult"))
lapply(ae2, function(x) expect_s4_class(x, "dimRedResult"))
## expect(aq1[[1]] > aq2[[1]], "the error should decrease with more steps")
## expect(aq1[[2]] > aq2[[2]], "the error should decrease with more steps")
## expect(aq1[[3]] > aq2[[3]], "the error should decrease with more steps")
## expect(aq1[[4]] > aq2[[4]], "the error should decrease with more steps")
lapply(1:length(ae1), function(x) expect_equal(x, getNDim(ae1[[x]])))
lapply(1:length(ae2), function(x) expect_equal(x, getNDim(ae2[[x]])))
})
## test_that("garbage collection", {
## skip_if_no_tensorflow()
## tmp <- tf$get_session_handle(environment(ae[[1]]@apply)$dec)
## tmp <- tf$get_default_session()
## tmp$close
## tmp
## tf$get_session_handle()
## tf$Session()
## })
dimRed/tests/testthat/test_all.R 0000644 0001762 0000144 00000003053 13373523703 016421 0 ustar ligges users context("high level functions")
test_that("high level functions working?", {
embed_methods <- dimRedMethodList()
quality_methods <- dimRedQualityList()
scurve <- loadDataSet("3D S Curve", n = 300)
for(i in 1:ncol(scurve@data)){
scurve@data[, i] <- scurve@data[, i] - min(scurve@data[, i])
}
quality_results <- matrix(NA, length(embed_methods),
length(quality_methods),
dimnames = list(embed_methods, quality_methods))
embedded_data <- list()
for (e in embed_methods) {
message("embedding: ", e)
if ((e != "AutoEncoder" || reticulate::py_module_available("tensorflow")) &&
(e != "UMAP" || reticulate::py_module_available("umap-learn")) &&
(e != "PCA_L1" || ("pcaL1" %in% rownames(installed.packages()))) ) {
suppressWarnings(
embedded_data[[e]] <- embed(
scurve, e,
.mute = c("message", "output")))
for (q in quality_methods) {
message(" quality: ", q)
quality_results[e, q] <- tryCatch(
suppressWarnings(quality(embedded_data[[e]], q,
.mute = c("message", "output"))),
error = function (e) NA
)
}
}
}
lapply(embedded_data, function(x) expect_equal(2, getNDim(x)))
expect(inherits(quality_results, "matrix"), "should be matrix")
expect(storage.mode(quality_results) == "double",
'storage should be "double"')
})
dimRed/tests/testthat/test_quality.R 0000644 0001762 0000144 00000004742 13475160740 017350 0 ustar ligges users
context("quality")
test_that("quality", {
irisData <- loadDataSet("Iris")
parsPCA <- list(center = TRUE, scale. = TRUE)
resPCA <- do.call(function(...) embed(irisData, "PCA", ...), parsPCA)
suppressWarnings(
resQual <- list(
Q_local(resPCA),
Q_global(resPCA),
mean_R_NX(resPCA),
total_correlation(resPCA),
cophenetic_correlation(resPCA),
distance_correlation(resPCA),
reconstruction_rmse(resPCA)
)
)
lapply(resQual, function(x) expect_true(is.numeric(x)))
})
test_that("Q_local ndim", {
irisData <- loadDataSet("Iris")
irisData <- irisData[!duplicated(irisData@data)]
parsPCA <- list(center = TRUE, scale. = TRUE, ndim = 4)
resPCA <- do.call(function(...) embed(irisData, "PCA", ...), parsPCA)
tmp <- sapply(1:4, function(x) quality(resPCA, "Q_local", ndim = x))
expect_equal(rank(tmp), 1:4)
})
test_that("rmse_by_ndim", {
set.seed(1)
ir <- loadDataSet("Iris")
ir.drr <- embed(ir, "DRR", .mute = c("message", "output"), ndim = ndims(ir))
ir.pca <- embed(ir, "PCA", ndim = ndims(ir))
rmse_res <- data.frame(
drr = reconstruction_error(ir.drr),
pca = reconstruction_error(ir.pca)
)
for (i in 1:length(rmse_res$pca)) {
expect_true(rmse_res$pca[i] - rmse_res$drr[i] + 1e-12 > 0, info = paste0(
"ndim = ", i,
", rmse pca = ", rmse_res$pca[i],
", rmse drr = ", rmse_res$drr[i]
))
}
# expect_true(all((rmse_res$pca - rmse_res$drr) + 1e-12 > 0))
expect_error(reconstruction_error(ir.pca, 5))
expect_error(reconstruction_error(ir.pca, 0))
})
test_that("AUC_lnK_R_NX", {
irisData <- loadDataSet("Iris")
irisData <- irisData[!duplicated(irisData@data)]
parsPCA <- list(center = TRUE, scale. = TRUE, ndim = 4)
resPCA <- do.call(function(...) embed(irisData, "PCA", ...), parsPCA)
expect_true(length(AUC_lnK_R_NX(resPCA, weight = "inv")) == 1)
expect_true(length(AUC_lnK_R_NX(resPCA, weight = "log")) == 1)
expect_true(length(AUC_lnK_R_NX(resPCA, weight = "ln")) == 1)
expect_true(length(AUC_lnK_R_NX(resPCA, weight = "log10")) == 1)
expect_true(AUC_lnK_R_NX(resPCA, weight = "log") == AUC_lnK_R_NX(resPCA, weight = "ln"))
expect_error(AUC_lnK_R_NX(resPCA, weight = "asdf"))
})
dimRed/tests/testthat/test_dataSets.R 0000644 0001762 0000144 00000000304 13024272076 017412 0 ustar ligges users context("dataSets")
test_that("datasets load", {
for (d in dataSetList()) {
ds <- loadDataSet(d)
expect(inherits(ds, "dimRedData"), "must be of class 'dimRedData'")
}
})
dimRed/tests/testthat.R 0000644 0001762 0000144 00000000125 13464507273 014614 0 ustar ligges users library(testthat)
library(dimRed)
test_check("dimRed", reporter = LocationReporter)
dimRed/vignettes/ 0000755 0001762 0000144 00000000000 14153220136 013463 5 ustar ligges users dimRed/vignettes/classification_tree.tex 0000644 0001762 0000144 00000007225 13371631672 020241 0 ustar ligges users \newcommand{\imp}[1] {\textbf{#1}} % style for implemented methods
\newcommand{\noimp}[1] {#1} % style for not implemented methods
\tikz[
% tree layout,
grow cyclic, %
level 1/.style={level distance=1.2cm, sibling
angle=180, text width=1.5cm, font={\small}}, %
level 2/.style={level distance=1.9cm, sibling angle=40,
font={\scriptsize}},%, text width=1.4cm},
level 3/.style={level distance=2.2cm, sibling angle=30},
level 4/.style={level distance=2.3cm},
% text width=1.2cm,
font=\tiny,
innernode/.style={align=flush center},%, text width=1.2},
leaf/.style={%
% draw, very thin,
% fill=red!30,
rounded corners,
align=flush left,
text width=,
inner sep=2pt,
font={\hangindent=0.2cm\scriptsize\sffamily}},
]{
\node[innernode, draw, align=flush center, rounded corners, font={\normalsize\bfseries}]{
Dimensionality \\ reduction}
child[] { node[innernode] {Convex} % level 1
child[sibling angle=55]{ node[innernode] {Full spectral} % level 2
child { node[innernode] {Euclidean distances}
child { node[leaf, text width=1.3cm]{
\imp{PCA} \\
\imp{Classical scaling}
} } }
child { node[innernode] {Geodesic distances}
child { node[leaf]{
\imp{Isomap}
} } }
child { node[innernode] {Kernel-based}
child { node[leaf]{
\imp{Kernel PCA} \\
\noimp{MVU}
} } }
child { node[innernode] {Diffusion distance}
child { node[leaf]{
\imp{Diffusion maps}
} } } }
child[] { node[innernode] {Removal of shared information
by regression} %level 2
child{ node[leaf]{
\imp{DRR}
} } }
child[sibling angle=55] { node[innernode] {Sparse spectral} % level 2
child[sibling angle=45] { node[innernode] {Reconstruction weights}
child {node[leaf]{
\imp{Local Linear Embedding}
} } }
child[sibling angle=45] { node[innernode] {Neighborhood graph Laplacian}
child { node[leaf]{
\imp{Laplacian Eigenmaps}
} } }
child[sibling angle=45] { node[innernode] {Local tangent space}
child { node[leaf, text width=2cm]{
\imp{Hessian LLE} \\
\noimp{Local tangent space alignment}
} } } } }
child[level distance=1.8cm] { node[innernode] {Non-convex} %level 1
child { node[innernode] {Weighted Euclidean distances} % level 2
child { node[leaf, text width=2cm]{
\imp{Non-linear MDS} \\
\noimp{Sammon's mapping} \\
\noimp{Stochastic Proximity Embedding}
} } }
child { node[innernode] {Alignment of local linear models} % level 2
child { node[leaf]{
\noimp{LLC} \\
\noimp{Man.\ charting}
} } }
child { node[innernode] {Neural network} % level 2
child { node[leaf]{
Autoencoder
} } }
child { node[innernode] {Discrete mapping} % level 2
child { node[leaf,text width=2.5cm]{
\noimp{Self Organizing Maps} \\
\noimp{Generative Topographic Mapping} \\
\noimp{Elastic Net}
} } }
child { node[innernode] {Stochastic methods} % level 2
child { node[leaf]{
\noimp{SNE} \\
\imp{t-SNE} \\
\noimp{NeRV} \\
\noimp{JNE}
} } }
child { node[innernode] {Force directed} % level 2
child { node[leaf, text width=2cm]{
\imp{Kamada-Kawai} \\
\imp{Fruchtermann-Reingold} \\
\imp{DrL}
} } } };
}
%%% Local Variables:
%%% mode: LaTeX
%%% TeX-command-extra-options: "-shell-escape"
%%% TeX-engine: default
%%% TeX-master: "dimensionality-reduction"
%%% End: dimRed/vignettes/bibliography.bib 0000644 0001762 0000144 00000062200 13371631672 016630 0 ustar ligges users
@book{rojo-alvarez_digital_2017,
edition = {1st},
title = {Digital {Signal} {Processing} with {Kernel} {Methods}},
isbn = {978-1-118-61179-1},
publisher = {Wiley},
author = {Rojo-Álvarez, J. L. and Martínez-Ramón, M. and Muñoz-Marí, J. and Camps-Valls, G.},
month = dec,
year = {2017}
}
@article{arenas-garcia_kernel_2013,
title = {Kernel {Multivariate} {Analysis} {Framework} for {Supervised} {Subspace} {Learning}: {A} {Tutorial} on {Linear} and {Kernel} {Multivariate} {Methods}},
volume = {30},
issn = {1053-5888},
shorttitle = {Kernel {Multivariate} {Analysis} {Framework} for {Supervised} {Subspace} {Learning}},
doi = {10.1109/MSP.2013.2250591},
number = {4},
journal = {IEEE Signal Processing Magazine},
author = {Arenas-Garcia, J. and Petersen, K. B. and Camps-Valls, G. and Hansen, L. K.},
month = jul,
year = {2013},
pages = {16--29},
}
@inproceedings{scholkopf_generalized_2001,
title = {A {Generalized} {Representer} {Theorem}},
url = {https://link.springer.com/chapter/10.1007/3-540-44581-1_27},
doi = {10.1007/3-540-44581-1_27},
language = {en},
urldate = {2017-06-12},
booktitle = {Computational {Learning} {Theory}},
publisher = {Springer, Berlin, Heidelberg},
author = {Schölkopf, Bernhard and Herbrich, Ralf and Smola, Alex J.},
month = jul,
year = {2001},
pages = {416--426},
}
@incollection{bakir_learning_2004,
title = {Learning to {Find} {Pre}-{Images}},
url = {http://papers.nips.cc/paper/2417-learning-to-find-pre-images.pdf},
doi = {10.1007/978-3-540-28649-3_31},
urldate = {2017-06-12},
booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 16},
publisher = {MIT Press},
author = {Bakir, Gökhan H. and Weston, Jason and Schölkopf, Prof. Bernhard},
editor = {Thrun, S. and Saul, L. K. and Schölkopf, P. B.},
year = {2004},
pages = {449--456},
}
@inproceedings{babaee_assessment_2013,
title = {Assessment of dimensionality reduction based on communication channel model; application to immersive information visualization},
url = {http://elib.dlr.de/88828/},
doi = {10.1109/BigData.2013.6691726},
booktitle = {Big {Data} 2013},
publisher = {IEEE Xplore},
author = {Babaee, Mohammadreza and Datcu, Mihai and Rigoll, Gerald},
year = {2013},
pages = {1--6},
}
@article{mahecha_nonlinear_2007,
title = {Nonlinear dimensionality reduction: {Alternative} ordination approaches for extracting and visualizing biodiversity patterns in tropical montane forest vegetation data},
volume = {2},
issn = {1574-9541},
shorttitle = {Nonlinear dimensionality reduction},
url = {http://www.sciencedirect.com/science/article/pii/S1574954107000325},
doi = {10.1016/j.ecoinf.2007.05.002},
number = {2},
urldate = {2016-08-26},
journal = {Ecological Informatics},
author = {Mahecha, Miguel D. and Martínez, Alfredo and Lischeid, Gunnar and Beck, Erwin},
month = jun,
year = {2007},
pages = {138--149},
}
@inproceedings{bengio_out--sample_2003,
title = {Out-of-{Sample} {Extensions} for {LLE}, {Isomap}, {MDS}, {Eigenmaps}, and {Spectral} {Clustering}},
booktitle = {In {Advances} in {Neural} {Information} {Processing} {Systems}},
publisher = {MIT Press},
author = {Bengio, Yoshua and Paiement, Jean-Francois and Vincent, Pascal},
year = {2004},
pages = {177--184},
}
@misc{noauthor_scopus_nodate,
author = {Elsevier},
year = {2017},
title = {Scopus - {Advanced} search},
url = {https://www.scopus.com/},
urldate = {2017-03-28}
}
@article{diaz_global_2016,
title = {The global spectrum of plant form and function},
volume = {529},
issn = {0028-0836},
url = {http://www.nature.com/nature/journal/v529/n7585/full/nature16489.html},
doi = {10.1038/nature16489},
language = {en},
number = {7585},
urldate = {2017-03-22},
journal = {Nature},
author = {Díaz, Sandra and Kattge, Jens and Cornelissen, Johannes H. C. and Wright, Ian J. and Lavorel, Sandra and Dray, Stéphane and Reu, Björn and Kleyer, Michael and Wirth, Christian and Colin Prentice, I. and Garnier, Eric and Bönisch, Gerhard and Westoby, Mark and Poorter, Hendrik and Reich, Peter B. and Moles, Angela T. and Dickie, John and Gillison, Andrew N. and Zanne, Amy E. and Chave, Jérôme and Joseph Wright, S. and Sheremet’ev, Serge N. and Jactel, Hervé and Baraloto, Christopher and Cerabolini, Bruno and Pierce, Simon and Shipley, Bill and Kirkup, Donald and Casanoves, Fernando and Joswig, Julia S. and Günther, Angela and Falczuk, Valeria and Rüger, Nadja and Mahecha, Miguel D. and Gorné, Lucas D.},
month = jan,
year = {2016},
pages = {167--171},
}
first application of pca in ecology?
@article{aart_distribution_1972,
title = {Distribution {Analysis} of {Wolfspiders} ({Araneae}, {Lycosidae}) in a {Dune} {Area} {By} {Means} of {Principal} {Component} {Analysis}},
volume = {23},
issn = {1568-542X},
url = {http://booksandjournals.brillonline.com/content/journals/10.1163/002829673x00076},
doi = {10.1163/002829673X00076},
number = {3},
urldate = {2016-07-18},
journal = {Netherlands Journal of Zoology},
author = {Aart, P. J. M. Van Der},
month = jan,
year = {1972},
pages = {266--329},
}
@article{morrall_soil_1974,
title = {Soil microfungi associated with aspen in {Saskatchewan}: synecology and quantitative analysis},
volume = {52},
issn = {0008-4026},
shorttitle = {Soil microfungi associated with aspen in {Saskatchewan}},
url = {http://www.nrcresearchpress.com/doi/abs/10.1139/b74-233},
doi = {10.1139/b74-233},
number = {8},
urldate = {2016-07-18},
journal = {Can. J. Bot.},
author = {Morrall, R. A. A.},
month = aug,
year = {1974},
pages = {1803--1817},
}
@article{pearson_lines_1901,
title = {On lines and planes of closest fit to systems of points in space},
volume = {2},
number = {6},
journal = {Philosophical Magazine},
doi = {10.1080/14786440109462720},
author = {Pearson, K},
year = {1901},
pages = {559--572},
}
@article{kramer_nonlinear_1991,
title = {Nonlinear principal component analysis using autoassociative neural networks},
volume = {37},
issn = {1547-5905},
doi = {10.1002/aic.690370209},
language = {en},
number = {2},
urldate = {2016-07-15},
journal = {AIChE J.},
author = {Kramer, Mark A.},
month = feb,
year = {1991},
pages = {233--243},
}
@article{hsieh_nonlinear_2004,
title = {Nonlinear multivariate and time series analysis by neural network methods},
volume = {42},
issn = {1944-9208},
doi = {10.1029/2002RG000112},
language = {en},
number = {1},
urldate = {2016-07-15},
journal = {Rev. Geophys.},
author = {Hsieh, William W.},
month = mar,
year = {2004},
pages = {RG1003},
}
@article{optimx,
author = {John Nash},
title = {On Best Practice Optimization Methods in R},
journal = {Journal of Statistical Software},
volume = 60,
number = 1,
year = 2014,
issn = {1548-7660},
pages = {1--14},
doi = {10.18637/jss.v060.i02},
url = {https://www.jstatsoft.org/index.php/jss/article/view/v060i02}
}
@manual{energy,
title = {energy: E-statistics (energy statistics)},
author = {Maria L. Rizzo and Gabor J. Szekely},
year = {2014},
note = {R package version 1.6.2},
url = {https://CRAN.R-project.org/package=energy},
}
@misc{soeren_sonnenburg_2017_1067840,
author = {Soeren Sonnenburg and
Heiko Strathmann and
Sergey Lisitsyn and
Viktor Gal and
Fernando J. Iglesias García and
Wu Lin and
Soumyajit De and
Chiyuan Zhang and
frx and
tklein23 and
Evgeniy Andreev and
JonasBehr and
sploving and
Parijat Mazumdar and
Christian Widmer and
Pan Deng / Zora and
Giovanni De Toni and
Saurabh Mahindre and
Abhijeet Kislay and
Kevin Hughes and
Roman Votyakov and
khalednasr and
Sanuj Sharma and
Alesis Novik and
Abinash Panda and
Evangelos Anagnostopoulos and
Liang Pang and
Alex Binder and
serialhex and
Björn Esser},
title = {shogun-toolbox/shogun: Shogun 6.1.0},
month = nov,
year = 2017,
doi = {10.5281/zenodo.1067840},
url = {https://doi.org/10.5281/zenodo.1067840}
}
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}
@article{torgerson_multidimensional_1952,
title = {Multidimensional scaling: {I}. {Theory} and method},
volume = {17},
issn = {0033-3123, 1860-0980},
shorttitle = {Multidimensional scaling},
url = {http://link.springer.com/article/10.1007/BF02288916},
doi = {10.1007/BF02288916},
language = {en},
number = {4},
urldate = {2016-08-16},
journal = {Psychometrika},
author = {Torgerson, Warren S.},
year = {1952},
pages = {401--419},
}
@article{tenenbaum_global_2000,
title = {A {Global} {Geometric} {Framework} for {Nonlinear} {Dimensionality} {Reduction}},
volume = {290},
issn = {0036-8075, 1095-9203},
url = {http://science.sciencemag.org/content/290/5500/2319},
doi = {10.1126/science.290.5500.2319},
language = {en},
number = {5500},
urldate = {2016-07-13},
journal = {Science},
author = {Tenenbaum, Joshua B. and Silva, Vin de and Langford, John C.},
month = dec,
year = {2000},
pmid = {11125149},
pages = {2319--2323},
}
@article{roweis_nonlinear_2000,
title = {Nonlinear {Dimensionality} {Reduction} by {Locally} {Linear} {Embedding}},
volume = {290},
issn = {0036-8075, 1095-9203},
url = {http://science.sciencemag.org/content/290/5500/2323},
doi = {10.1126/science.290.5500.2323},
language = {en},
number = {5500},
urldate = {2016-08-16},
journal = {Science},
author = {Roweis, Sam T. and Saul, Lawrence K.},
month = dec,
year = {2000},
pmid = {11125150},
pages = {2323--2326},
}
@article{kruskal_multidimensional_1964,
title = {Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis},
volume = {29},
issn = {0033-3123, 1860-0980},
url = {http://link.springer.com/article/10.1007/BF02289565},
doi = {10.1007/BF02289565},
language = {en},
number = {1},
urldate = {2016-12-22},
journal = {Psychometrika},
author = {Kruskal, J. B.},
month = mar,
year = {1964},
pages = {1--27},
}
@article{kruskal_nonmetric_1964,
title = {Nonmetric multidimensional scaling: {A} numerical method},
volume = {29},
issn = {0033-3123, 1860-0980},
shorttitle = {Nonmetric multidimensional scaling},
url = {http://link.springer.com/article/10.1007/BF02289694},
doi = {10.1007/BF02289694},
language = {en},
number = {2},
urldate = {2016-12-22},
journal = {Psychometrika},
author = {Kruskal, J. B.},
month = jun,
year = {1964},
pages = {115--129},
}
@article{coifman_geometric_2005,
title = {Geometric diffusions as a tool for harmonic analysis and structure definition of data: {Diffusion} maps},
volume = {102},
issn = {0027-8424, 1091-6490},
shorttitle = {Geometric diffusions as a tool for harmonic analysis and structure definition of data},
url = {http://www.pnas.org/content/102/21/7426},
doi = {10.1073/pnas.0500334102},
language = {en},
number = {21},
urldate = {2016-03-30},
journal = {Proceedings of the National Academy of Sciences of the United States of America},
author = {Coifman, R. R. and Lafon, S. and Lee, A. B. and Maggioni, M. and Nadler, B. and Warner, F. and Zucker, S. W.},
month = may,
year = {2005},
pmid = {15899970},
pages = {7426--7431},
}
@article{coifman_diffusion_2006,
title = {Diffusion maps},
volume = {21},
issn = {10635203},
url = {http://linkinghub.elsevier.com/retrieve/pii/S1063520306000546},
doi = {10.1016/j.acha.2006.04.006},
language = {en},
number = {1},
urldate = {2016-08-16},
journal = {Applied and Computational Harmonic Analysis},
author = {Coifman, Ronald R. and Lafon, Stéphane},
month = jul,
year = {2006},
pages = {5--30},
}
@article{scholkopf_nonlinear_1998,
title = {Nonlinear {Component} {Analysis} as a {Kernel} {Eigenvalue} {Problem}},
volume = {10},
issn = {08997667},
doi = {10.1162/089976698300017467},
number = {5},
journal = {Neural Computation},
author = {Schölkopf, Bernhard and Smola, Alexander and Müller, Klaus-Robert},
year = {1998},
pages = {1299--1319},
}
@article{hyvarinen_fast_1999,
title = {Fast and robust fixed-point algorithms for independent component analysis},
volume = {10},
issn = {1045-9227},
doi = {10.1109/72.761722},
number = {3},
journal = {IEEE Transactions on Neural Networks},
author = {Hyvarinen, A.},
month = may,
year = {1999},
pages = {626--634},
}
@article{comon_independent_1994,
title = {Independent component analysis, {A} new concept?},
volume = {36},
issn = {01651684},
url = {http://linkinghub.elsevier.com/retrieve/pii/0165168494900299},
doi = {10.1016/0165-1684(94)90029-9},
language = {en},
number = {3},
urldate = {2016-08-17},
journal = {Signal Processing},
author = {Comon, Pierre},
month = apr,
year = {1994},
pages = {287--314},
}
@article{kamada_algorithm_1989,
title = {An algorithm for drawing general undirected graphs},
volume = {31},
issn = {0020-0190},
url = {http://www.sciencedirect.com/science/article/pii/0020019089901026},
doi = {10.1016/0020-0190(89)90102-6},
number = {1},
urldate = {2016-08-17},
journal = {Information Processing Letters},
author = {Kamada, Tomihisa and Kawai, Satoru},
month = apr,
year = {1989},
pages = {7--15},
}
@article{fruchterman_graph_1991,
title = {Graph drawing by force-directed placement},
volume = {21},
issn = {1097-024X},
doi = {10.1002/spe.4380211102},
language = {en},
number = {11},
urldate = {2016-08-17},
journal = {Softw: Pract. Exper.},
author = {Fruchterman, Thomas M. J. and Reingold, Edward M.},
month = nov,
year = {1991},
pages = {1129--1164},
}
@techreport{martin_dr.l:_2007,
title = {Dr.l: {Distributed} {Recursive} (graph) {Layout}},
shorttitle = {Dr.l},
url = {http://www.osti.gov/scitech/biblio/1231060-dr-distributed-recursive-graph-layout},
number = {dRl; 002182MLTPL00},
urldate = {2016-08-17},
institution = {Sandia National Laboratories},
author = {Martin, Shawn and Brown, W. Michael and Wylie, Brian N.},
month = nov,
year = {2007},
}
@article{belkin_laplacian_2003,
title = {Laplacian {Eigenmaps} for {Dimensionality} {Reduction} and {Data} {Representation}},
volume = 15,
issn = 08997667,
doi = {10.1162/089976603321780317},
number = 6,
urldate = {2016-08-17},
journal = {Neural Computation},
author = {Belkin, Mikhail and Niyogi, Partha},
month = jun,
year = 2003,
pages = 1373,
}
@inproceedings{terada_local_2014,
title = {Local {Ordinal} {Embedding}},
url = {http://jmlr.org/proceedings/papers/v32/terada14.html},
urldate = {2016-04-21},
author = {Terada, Yoshikazu and Luxburg, Ulrike von},
year = {2014},
pages = {847--855}
}
@article{van_der_maaten_visualizing_2008,
title = {Visualizing {Data} using t-{SNE}},
volume = {9},
issn = {1532-4435},
language = {English},
journal = {J. Mach. Learn. Res.},
author = {van der Maaten, Laurens and Hinton, Geoffrey},
month = nov,
year = {2008},
note = {WOS:000262637600007},
pages = {2579--2605},
}
@incollection{hinton_stochastic_2003,
title = {Stochastic {Neighbor} {Embedding}},
url = {http://papers.nips.cc/paper/2276-stochastic-neighbor-embedding.pdf},
urldate = {2016-08-17},
booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 15},
publisher = {MIT Press},
author = {Hinton, Geoffrey E. and Roweis, Sam T.},
editor = {Becker, S. and Thrun, S. and Obermayer, K.},
year = {2003},
pages = {857--864},
}
@article{lee_multi-scale_2015,
series = {Learning for {Visual} {Semantic} {Understanding} in {Big} {DataESANN} 2014Industrial {Data} {Processing} and {AnalysisSelected} papers from the 22nd {European} {Symposium} on {Artificial} {Neural} {Networks}, {Computational} {Intelligence} and {Machine} {Learning} ({ESANN} 2014){Selected} papers from the 11th {World} {Congress} on {Intelligent} {Control} and {Automation} ({WCICA}2014)},
title = {Multi-scale similarities in stochastic neighbour embedding: {Reducing} dimensionality while preserving both local and global structure},
volume = {169},
issn = {0925-2312},
shorttitle = {Multi-scale similarities in stochastic neighbour embedding},
url = {http://www.sciencedirect.com/science/article/pii/S0925231215003641},
doi = {10.1016/j.neucom.2014.12.095},
urldate = {2016-04-28},
journal = {Neurocomputing},
author = {Lee, John A. and Peluffo-Ordóñez, Diego H. and Verleysen, Michel},
month = dec,
year = {2015},
pages = {246--261},
}
@article{lee_type_2013,
series = {Advances in artificial neural networks, machine learning, and computational {intelligenceSelected} papers from the 20th {European} {Symposium} on {Artificial} {Neural} {Networks} ({ESANN} 2012)},
title = {Type 1 and 2 mixtures of {Kullback}–{Leibler} divergences as cost functions in dimensionality reduction based on similarity preservation},
volume = {112},
issn = {0925-2312},
url = {http://www.sciencedirect.com/science/article/pii/S0925231213001471},
doi = {10.1016/j.neucom.2012.12.036},
urldate = {2016-04-28},
journal = {Neurocomputing},
author = {Lee, John A. and Renard, Emilie and Bernard, Guillaume and Dupont, Pierre and Verleysen, Michel},
month = jul,
year = {2013},
pages = {92--108},
}
@article{venna_information_2010,
title = {Information {Retrieval} {Perspective} to {Nonlinear} {Dimensionality} {Reduction} for {Data} {Visualization}},
volume = {11},
issn = {1532-4435},
language = {English},
journal = {J. Mach. Learn. Res.},
author = {Venna, Jarkko and Peltonen, Jaakko and Nybo, Kristian and Aidos, Helena and Kaski, Samuel},
month = feb,
year = {2010},
note = {WOS:000277186500001},
pages = {451--490},
}
@article{laparra_dimensionality_2015,
title = {Dimensionality {Reduction} via {Regression} in {Hyperspectral} {Imagery}},
volume = {9},
issn = {1932-4553},
doi = {10.1109/JSTSP.2015.2417833},
number = {6},
journal = {IEEE Journal of Selected Topics in Signal Processing},
author = {Laparra, V. and Malo, J. and Camps-Valls, G.},
month = sep,
year = {2015},
pages = {1026--1036},
}
@article{chen_local_2006,
author = {Lisha Chen and Andreas Buja},
title = {Local Multidimensional Scaling for Nonlinear Dimension Reduction, Graph Drawing, and Proximity Analysis},
journal = {Journal of the American Statistical Association},
volume = {104},
number = {485},
pages = {209-219},
year = {2009},
publisher = {Taylor & Francis},
doi = {10.1198/jasa.2009.0111},
URL = {https://doi.org/10.1198/jasa.2009.0111},
eprint = {https://doi.org/10.1198/jasa.2009.0111}
}
@inproceedings{saunders_ridge_1998,
author = {Saunders, Craig and Gammerman, Alexander and Vovk, Volodya},
title = {Ridge Regression Learning Algorithm in Dual Variables},
booktitle = {Proceedings of the Fifteenth International Conference on Machine Learning},
series = {ICML '98},
year = {1998},
isbn = {1-55860-556-8},
pages = {515--521},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=645527.657464},
acmid = {657464},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
}
@article{lee_quality_2009,
series = {Advances in {Machine} {Learning} and {Computational} {Intelligence}16th {European} {Symposium} on {Artificial} {Neural} {Networks} 200816th {European} {Symposium} on {Artificial} {Neural} {Networks} 2008},
title = {Quality assessment of dimensionality reduction: {Rank}-based criteria},
volume = {72},
issn = {0925-2312},
shorttitle = {Quality assessment of dimensionality reduction},
url = {http://www.sciencedirect.com/science/article/pii/S0925231209000101},
doi = {10.1016/j.neucom.2008.12.017},
number = {7–9},
urldate = {2016-04-04},
journal = {Neurocomputing},
author = {Lee, John A. and Verleysen, Michel},
month = mar,
year = {2009},
pages = {1431--1443},
}
@article{sokal_comparison_1962,
title = {The {Comparison} of {Dendrograms} by {Objective} {Methods}},
volume = {11},
issn = {0040-0262},
url = {http://www.jstor.org/stable/1217208},
doi = {10.2307/1217208},
number = {2},
urldate = {2016-08-15},
journal = {Taxon},
author = {Sokal, Robert R. and Rohlf, F. James},
year = {1962},
pages = {33--40},
}
@article{szekely_measuring_2007,
title = {Measuring and testing dependence by correlation of distances},
volume = {35},
issn = {0090-5364, 2168-8966},
url = {http://projecteuclid.org/euclid.aos/1201012979},
doi = {10.1214/009053607000000505},
language = {EN},
number = {6},
urldate = {2016-06-10},
journal = {The Annals of Statistics},
author = {Székely, Gábor J. and Rizzo, Maria L. and Bakirov, Nail K.},
month = dec,
year = {2007},
mrnumber = {MR2382665},
zmnumber = {1129.62059},
pages = {2769--2794},
}
@article{kireeva_nonlinear_2014,
title = {Nonlinear {Dimensionality} {Reduction} for {Visualizing} {Toxicity} {Data}: {Distance}-{Based} {Versus} {Topology}-{Based} {Approaches}},
volume = {9},
issn = {1860-7187},
shorttitle = {Nonlinear {Dimensionality} {Reduction} for {Visualizing} {Toxicity} {Data}},
doi = {10.1002/cmdc.201400027},
language = {en},
number = {5},
urldate = {2016-08-19},
journal = {ChemMedChem},
author = {Kireeva, Natalia V. and Ovchinnikova, Svetlana I. and Tetko, Igor V. and Asiri, Abdullah M. and Balakin, Konstantin V. and Tsivadze, Aslan Yu.},
month = may,
year = {2014},
pages = {1047--1059},
}
@article{han_deep_2016,
author = {Han, Yoonchang and Kim, Jaehun and Lee, Kyogu},
title = {{Deep Convolutional Neural Networks for Predominant Instrument Recognition in Polyphonic Music}},
journal = {{IEEE-ACM TRANSACTIONS ON AUDIO SPEECH AND LANGUAGE PROCESSING}},
year = {{2017}},
volume = {{25}},
number = {{1}},
pages = {{208-221}},
month = {{JAN}},
publisher = {{IEEE-INST ELECTRICAL ELECTRONICS ENGINEERS INC}},
language = {{English}},
doi = {{10.1109/TASLP.2016.2632307}},
issn = {{2329-9290}},
}
@article{van_der_maaten_dimensionality_2009,
title = {Dimensionality reduction: a comparative review},
volume = {10},
shorttitle = {Dimensionality reduction},
urldate = {2016-06-28},
journal = {J Mach Learn Res},
author = {Van Der Maaten, Laurens and Postma, Eric and Van den Herik, Jaap},
year = {2009},
pages = {66--71},
}
@inproceedings{bengio_out--sample_2003,
title = {Out-of-{Sample} {Extensions} for {LLE}, {Isomap}, {MDS}, {Eigenmaps}, and {Spectral} {Clustering}},
booktitle = {In {Advances} in {Neural} {Information} {Processing} {Systems}},
publisher = {MIT Press},
author = {Bengio, Yoshua and Paiement, Jean-Francois and Vincent, Pascal},
year = {2003},
pages = {177--184},
}
@article{lueks_how_2011,
title = {How to {Evaluate} {Dimensionality} {Reduction}? - {Improving} the {Co}-ranking {Matrix}},
shorttitle = {How to {Evaluate} {Dimensionality} {Reduction}?},
url = {http://arxiv.org/abs/1110.3917},
urldate = {2016-03-18},
journal = {arXiv:1110.3917 [cs]},
author = {Lueks, Wouter and Mokbel, Bassam and Biehl, Michael and Hammer, Barbara},
month = oct,
year = {2011},
note = {arXiv: 1110.3917},
}
@techreport{de_silva_sparse_2004,
title = {Sparse multidimensional scaling using landmark points},
author = {De Silva, Vin and Tenenbaum, Joshua B/r},
year = {2004},
}
@article{groenen_multidimensional_2016,
title = {Multidimensional {Scaling} by {Majorization}: {A} {Review}},
volume = {73},
issn = {1548-7660},
url = {https://www.jstatsoft.org/index.php/jss/article/view/v073i08},
doi = {10.18637/jss.v073.i08},
number = {1},
journal = {Journal of Statistical Software},
author = {Groenen, Patrick and Velden, Michel van de},
year = {2016},
pages = {1--26},
}
@article{leeuw_multidimensional_2009,
author = {de Leeuw, Jan and Mair, Patrick},
title = {{Multidimensional Scaling Using Majorization: SMACOF in R}},
journal = {{JOURNAL OF STATISTICAL SOFTWARE}},
year = {{2009}},
volume = {{31}},
number = {{3}},
pages = {{1--30}},
month = {{AUG}},
publisher = {{JOURNAL STATISTICAL SOFTWARE}},
ISSN = {{1548-7660}},
}
@article{bengio_learning_2004,
title = {Learning {Eigenfunctions} {Links} {Spectral} {Embedding} and {Kernel} {PCA}},
volume = {16},
issn = {0899-7667},
url = {http://dx.doi.org/10.1162/0899766041732396},
doi = {10.1162/0899766041732396},
number = {10},
urldate = {2016-10-05},
journal = {Neural Computation},
author = {Bengio, Yoshua and Delalleau, Olivier and Roux, Nicolas Le and Paiement, Jean-François and Vincent, Pascal and Ouimet, Marie},
month = oct,
year = {2004},
pages = {2197--2219},
}
@misc{_gdkrmr/dimred_????,
title = {gdkrmr/{dimRed}},
url = {https://github.com/gdkrmr/dimRed},
urldate = {2016-11-30},
journal = {GitHub},
}
@book{luxburg_tutorial_2007,
title = {A {Tutorial} on {Spectral} {Clustering}},
author = {Luxburg, Ulrike Von},
year = {2007},
}
@article{kraemer_dimred_2018,
title = {{dimRed} and {coRanking} - {Unifying} {Dimensionality} {Reduction} in {R}},
url = {https://journal.r-project.org/archive/2018/RJ-2018-039/index.html},
journal = {The R Journal},
author = {Kraemer, Guido and Reichstein, Markus and Mahecha, Miguel D.},
year = {2018},
} dimRed/vignettes/dimensionality-reduction.Rnw 0000644 0001762 0000144 00000173524 13371631672 021225 0 ustar ligges users \documentclass{article}
%\VignetteEngine{knitr::knitr}
%\VignetteIndexEntry{Dimensionality Reduction}
%\VignetteKeyword{Dimensionality Reduction}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{amsmath,amssymb}
\usepackage{booktabs}
\usepackage{tikz}
\usetikzlibrary{trees}
\usepackage[sectionbib,round]{natbib}
\title{\pkg{dimRed} and \pkg{coRanking}---Unifying Dimensionality Reduction in R}
\author{Guido Kraemer \and Markus Reichstein \and Miguel D.\ Mahecha}
% these are taken from RJournal.sty:
\makeatletter
\DeclareRobustCommand\code{\bgroup\@noligs\@codex}
\def\@codex#1{\texorpdfstring%
{{\normalfont\ttfamily\hyphenchar\font=-1 #1}}%
{#1}\egroup}
\newcommand{\kbd}[1]{{\normalfont\texttt{#1}}}
\newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}}
\DeclareRobustCommand\samp{`\bgroup\@noligs\@sampx}
\def\@sampx#1{{\normalfont\texttt{#1}}\egroup'}
\newcommand{\var}[1]{{\normalfont\textsl{#1}}}
\let\env=\code
\newcommand{\file}[1]{{`\normalfont\textsf{#1}'}}
\let\command=\code
\let\option=\samp
\newcommand{\dfn}[1]{{\normalfont\textsl{#1}}}
% \acronym is effectively disabled since not used consistently
\newcommand{\acronym}[1]{#1}
\newcommand{\strong}[1]{\texorpdfstring%
{{\normalfont\fontseries{b}\selectfont #1}}%
{#1}}
\let\pkg=\strong
\newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}%
\let\cpkg=\CRANpkg
\newcommand{\ctv}[1]{\href{https://CRAN.R-project.org/view=#1}{\emph{#1}}}
\newcommand{\BIOpkg}[1]{\href{https://www.bioconductor.org/packages/release/bioc/html/#1.html}{\pkg{#1}}}
\makeatother
\begin{document}
\maketitle
\abstract{ %
This document is based on the manuscript of \citet{kraemer_dimred_2018} which
was published in the R-Journal and has been modified and extended to fit the
format of a package vignette and to match the extended functionality of the
\pkg{dimRed} package.
``Dimensionality reduction'' (DR) is a widely used approach to find low
dimensional and interpretable representations of data that are natively
embedded in high-dimensional spaces. %
DR can be realized by a plethora of methods with different properties,
objectives, and, hence, (dis)advantages. The resulting low-dimensional data
embeddings are often difficult to compare with objective criteria. %
Here, we introduce the \CRANpkg{dimRed} and \CRANpkg{coRanking} packages for
the R language. %
These open source software packages enable users to easily access multiple
classical and advanced DR methods using a common interface. %
The packages also provide quality indicators for the embeddings and easy
visualization of high dimensional data. %
The \pkg{coRanking} package provides the functionality for assessing DR methods in the
co-ranking matrix framework. %
In tandem, these packages allow for uncovering complex structures high
dimensional data. %
Currently 15 DR methods are available in the package, some of which were not
previously available to R users. %
Here, we outline the \pkg{dimRed} and \pkg{coRanking} packages and
make the implemented methods understandable to the interested reader. %
}
\section{Introduction}
\label{sec:intro}
Dimensionality Reduction (DR) essentially aims to find low dimensional
representations of data while preserving their key properties. %
Many methods exist in literature, optimizing different criteria: %
maximizing the variance or the statistical independence of the projected data, %
minimizing the reconstruction error under different constraints, %
or optimizing for different error metrics, %
just to name a few. %
Choosing an inadequate method may imply that much of the underlying structure
remains undiscovered. %
Often the structures of interest in a data set can be well represented by fewer
dimensions than exist in the original data. %
Data compression of this kind has the additional benefit of making the encoded
information better conceivable to our brains for further analysis tasks
like classification or regression problems. %
For example, the morphology of a plant's leaves, stems, and seeds reflect the
environmental conditions the species usually grow in (e.g.,\ plants with large
soft leaves will never grow in a desert but might have an advantage in a humid
and shadowy environment). %
Because the morphology of the entire plant depends on the environment, many
morphological combinations will never occur in nature and the morphological
space of all plant species is tightly constrained. %
\citet{diaz_global_2016} found that out of six observed morphological characteristics
only two embedding dimensions were enough to represent three quarters of the totally
observed variability. %
DR is a widely used approach for the detection of structure in multivariate
data, and has applications in a variety of fields. %
In climatology, DR is used to find the modes of some phenomenon, e.g.,\ the first
Empirical Orthogonal Function of monthly mean sea surface temperature of a given
region over the Pacific is often linked to the El Ni\~no Southern
Oscillation or
ENSO \citep[e.g.,\ ][]{hsieh_nonlinear_2004}. %
In ecology the comparison of sites with different species abundances is a
classical multivariate problem: each observed species adds an extra dimension,
and because species are often bound to certain habitats, there is a lot of
redundant information. Using DR is a popular technique to represent the sites in
few dimensions, e.g.,\ \citet{aart_distribution_1972} matches wolfspider
communities to habitat and \citet{morrall_soil_1974} match soil fungi data to
soil types. (In ecology the general name for DR is ordination or indirect
gradient analysis.) %
Today, hyperspectral satellite imagery collects so many bands that it is very
difficult to analyze and interpret the data directly. %
Resuming the data into a set of few, yet independent, components is one way to
reduce complexity \citep[e.g.,\ see][]{laparra_dimensionality_2015}. %
DR can also be used to visualize the interiors of deep neural networks
\citep[e.g.,\ see ][]{han_deep_2016}, where the high dimensionality comes from
the large number of weights used in a neural network and convergence can be
visualized by means of DR\@. %
We could find many more example applications here but this is not the main focus
of this publication. %
The difficulty in applying DR is that each DR method is designed to maintain
certain aspects of the original data and therefore may be appropriate for one
task and inappropriate for another. %
Most methods also have parameters to tune and follow different assumptions. The
quality of the outcome may strongly depend on their tuning, which adds
additional complexity. %
DR methods can be modeled after physical models with attracting and repelling
forces (Force Directed Methods), projections onto low dimensional planes (PCA,
ICA), divergence of statistical distributions (SNE family), or the reconstruction
of local spaces or points by their neighbors (LLE). %
As an example for how changing internal parameters of a method can have a great
impact, the breakthrough for Stochastic Neighborhood Embedding (SNE) methods
came when a Student's $t$-distribution was used instead of a normal distribution
to model probabilities in low dimensional space to avoid the ``crowding
problem'', that is,\ a sphere in high dimensional space has a much larger volume
than in low dimensional space and may contain too many points to be represented
accurately in few dimensions. %
The $t$-distribution, allows medium distances to be accurately represented in
few dimensions by larger distances due to its heavier tails. %
The result is called in $t$-SNE and is especially good at preserving local
structures in very few dimensions, this feature made $t$-SNE useful for a wide
array of data visualization tasks and the method became much more popular than
standard SNE (around six times more citations of
\citet{van_der_maaten_visualizing_2008} compared to
\citet{hinton_stochastic_2003} in Scopus \citep{noauthor_scopus_nodate}). %
There are a number of software packages for other languages providing collections of methods: In
Python there is scikit-learn \citep{scikit-learn}, which contains a module for
DR. In Julia we currently find ManifoldLearning.jl for nonlinear and
MultivariateStats.jl for linear DR methods. %
There are several toolboxes for DR implemented in Matlab
\citep{van_der_maaten_dimensionality_2009,
arenas-garcia_kernel_2013}. The Shogun
toolbox \citep{soeren_sonnenburg_2017_1067840} implements a variety of methods for
dimensionality reduction in C++ and offers bindings for a many common high level
languages (including R, but the installation is anything but simple, as
there is no CRAN package). %
However, there is no comprehensive package for R and none of the former
mentioned software packages provides means to consistently compare the quality
of different methods for DR. %
For many applications it can be difficult to objectively find the right method
or parameterization for the DR task. %
This paper presents the \pkg{dimRed} and \pkg{coRanking} packages for
the popular programming language R. Together, they
provide a standardized interface to various dimensionality reduction methods and quality
metrics for embeddings. They are implemented using the S4 class system
of R, making the packages
both easy to use and to extend.
The design goal for these packages is to enable researchers, who may not necessarily be experts in DR, to
apply the methods in their own work and to objectively identify the
most suitable
methods for their data. %
This paper provides an overview of the methods collected in the
packages and contains examples as to how
to use the packages. %
The notation in this paper will be as follows: $X = [x_i]_{1\leq i \leq n}^T \in
\mathbb{R}^{n\times p}$, and the observations $x_i \in \mathbb{R}^p$. %
These observations may be transformed prior to the dimensionality reduction
step (e.g.,\ centering and/or standardization) resulting in $X' = [x'_i]_{1\leq i
\leq n}^T \in \mathbb{R}^{n\times p}$. %
A DR method then embeds each vector in $X'$ onto a vector in $Y = [y_i]_{1\leq i
\leq n}^T \in \mathbb{R}^{n\times q}$ with $y_i \in \mathbb{R}^q$, ideally
with $q \ll p$. %
Some methods provide an explicit mapping $f(x'_i) = y_i$. Some even offer an
inverse mapping $f^{-1}(y_{i}) = \hat x'_{i}$, such that one can reconstruct a
(usually approximate) sample from the low-dimensional representation. %
For some methods, pairwise distances between points are needed, we set $d_{ij} =
d(x_{i}, x_{j})$ and $\hat{d}_{ij} = d(y_i, y_j)$, where $d$ is some appropriate
distance function.
When referring to \code{functions} in the \pkg{dimRed} package or base R simply the
function name is mentioned, functions from other packages are
referenced with their namespace, as with \code{package::function}.
\begin{figure}[htbp]
\centering
\input{classification_tree.tex}
\caption{%
Classification of dimensionality reduction methods. Methods
in bold face are implemented in \pkg{dimRed}.
Modified from \citet{van_der_maaten_dimensionality_2009}.
}\label{fig:classification}
\end{figure}
\section{Dimensionality Reduction Methods}
\label{sec:dimredtec}
In the following section we do not aim for an exhaustive explanation to every
method in \pkg{dimRed} but rather to provide a general idea on how the
methods work. %
An overview and classification of the most commonly used DR methods can be found
in Figure~\ref{fig:classification}.
In all methods, parameters have to be optimized or decisions have to be made,
even if it is just about the preprocessing steps of data. %
The \pkg{dimRed} package tries to make the optimization process for parameters as easy as
possible, but, if possible, the parameter space should be narrowed down using
prior knowledge. %
Often decisions can be made based on theoretical knowledge. For example,\ sometimes an
analysis requires data to be kept in their original scales and sometimes this is
exactly what has to be avoided as when comparing different physical
units. %
Sometimes decisions based on the experience of others can be made, e.g.,\ the
Gaussian kernel is probably the most universal kernel and therefore should be
tested first if there is a choice. %
All methods presented here have the embedding dimensionality, $q$, as a
parameter (or \code{ndim} as a parameter for \code{embed}). %
For methods based on eigenvector decomposition, the result generally does not
depend on the number of dimensions, i.e.,\ the first dimension will be the same,
no matter if we decide to calculate only two dimensions or more. %
If more dimensions are added, more information is maintained, the first
dimension is the most important and higher dimensions are successively less
important. %
This means, that a method based on eigenvalue decomposition only has to be run
once if one wishes to compare the embedding in different dimensions. %
In optimization based methods this is generally not the case, the number of
dimensions has to be chosen a priori, an embedding of 2 and 3 dimensions may
vary significantly, and there is no ordered importance of dimensions. %
This means that comparing dimensions of optimization-based methods is
computationally much more expensive. %
We try to give the computational complexity of the methods. Because of the
actual implementation, computation times may differ largely. %
R is an interpreted language, so all parts of an algorithm that are implemented
in R often will tend to be slow compared to methods that call efficient
implementations in a compiled language. %
Methods where most of the computing time is spent for eigenvalue decomposition
do have very efficient implementations as R uses optimized linear algebra
libraries. Although, eigenvalue decomposition itself does not scale very well in
naive implementations ($\mathcal{O}(n^3)$).
\subsection{PCA}
\label{sec:pca}
Principal Component Analysis (PCA) is the most basic technique for reducing
dimensions. It dates back to \citet{pearson_lines_1901}. PCA finds a linear
projection ($U$) of the high dimensional space into a low dimensional space $Y =
XU$, maintaining maximum variance of the data. It is based on solving the
following eigenvalue problem:
\begin{equation}
(C_{XX}-\lambda_k I)u_k=0\label{eq:pca}
\end{equation}
where $C_{XX} = \frac 1 n X^TX$ is the covariance matrix, $\lambda_k$ and $u_k$
are the $k$-th eigenvalue and eigenvector, and $I$ is the identity matrix. %
The equation has several solutions for different values of $\lambda_k$ (leaving
aside the trivial solution $u_k = 0$). %
PCA can be efficiently applied to large data sets, because it computationally
scales as $\mathcal{O}(np^2 + p^3)$, that is, it scales linearly with the number of
samples and R uses specialized linear algebra libraries for such kind of
computations.
PCA is a rotation around the origin and there exist a forward and inverse
mapping. %
PCA may suffer from a scale problem, i.e.,\ when one variable dominates the
variance simply because it is in a higher scale, to remedy this, the data can be
scaled to zero mean and unit variance, depending on the use case, if this is
necessary or desired. %
Base R implements PCA in the functions \code{prcomp} and \code{princomp}; but
several other implementations exist i.e., \BIOpkg{pcaMethods} from Bioconductor
which implements versions of PCA that can deal with missing data. %
The \pkg{dimRed} package wraps \code{prcomp}.
\subsection{kPCA}
\label{sec:kpca}
Kernel Principal Component Analysis (kPCA) extends PCA to deal with nonlinear
dependencies among variables. %
The idea behind kPCA is to map the data into a high dimensional space using a
possibly non-linear function $\phi$ and then to perform a PCA in this high
dimensional space. %
Some mathematical tricks are used for efficient computation. %
If the columns of X are centered around $0$, then the principal components can
also be computed from the inner product matrix $K = X^TX$. %
Due to this way of calculating a PCA, we do not need to explicitly map all points
into the high dimensional space and do the calculations there, it is enough to
obtain the inner product matrix or kernel matrix $K \in \mathbb{R}^{n\times n}$
of the mapped points \citep{scholkopf_nonlinear_1998}. %
Here is an example calculating the kernel matrix using a Gaussian kernel:
\begin{equation}\label{eq:gauss}
K = \phi(x_i)^T \phi(x_j) = \kappa(x_i, x_j) = \exp\left(
-\frac{\| x_i- x_j\|^2}{2 \sigma^2}
\right),
\end{equation}
where $\sigma$ is a length scale parameter accounting for the width of the
kernel. %
The other trick used is known as the ``representers theorem.'' The interested
reader is referred to \citet{scholkopf_generalized_2001}.
The kPCA method is very flexible and there exist many kernels for special
purposes. The most common kernel function is the Gaussian kernel
(Equation\ \ref{eq:gauss}). %
The flexibility comes at the price that the method has to be finely
tuned for the data set because some parameter combinations are simply
unsuitable for certain data. %
The method is not suitable for very large data sets, because memory
scales with $\mathcal{O}(n^2)$ and computation time with
$\mathcal{O}(n^3)$. %
Diffusion Maps, Isomap, Locally Linear Embedding, and some other techniques can
be seen as special cases of kPCA. In which case, an out-of-sample extension using the Nyström
formula can be applied \citep{bengio_learning_2004}. %
This can also yield applications for bigger data, where an embedding is trained
with a sub-sample of all data and then the data is embedded using the Nyström
formula.
Kernel PCA in R is implemented in the \CRANpkg{kernlab} package using the function
\code{kernlab::kpca}, and supports a number of kernels and
user defined functions. For details see the help page for \code{kernlab::kpca}.
The \pkg{dimRed} package wraps \code{kernlab::kpca} but additionally
provides forward and inverse methods \citep{bakir_learning_2004} which can be
used to fit out-of-sample data or to visualize the transformation of the data
space. %
\subsection{Classical Scaling}
\label{sec:classscale}
What today is called Classical Scaling was first introduced by
\citet{torgerson_multidimensional_1952}. It uses an eigenvalue decomposition of
a transformed distance matrix to find an embedding that maintains the distances
of the distance matrix. %
The method works because of the same reason that kPCA works, i.e.,\ classical
scaling can be seen as a kPCA with kernel $x^Ty$. %
A matrix of Euclidean distances can be transformed into an inner product matrix
by some simple transformations and therefore yields the same result as a
PCA\@. %
Classical scaling is conceptually more general than PCA in that arbitrary
distance matrices can be used, i.e.,\ the method does not even need the original
coordinates, just a distance matrix $D$. %
Then it tries to find an embedding $Y$ so that $\hat d_{ij}$ is as similar to
$d_{ij}$ as possible.
The disadvantage is that it is computationally much more demanding, i.e.,\
an eigenvalue decomposition of an $n\times n$ matrix has to be computed.
This step requires $\mathcal{O}(n^2)$ memory and $\mathcal{O}(n^3)$
computation time, while PCA requires only the eigenvalue decomposition
of a $d\times d$ matrix and usually $n \gg d$. %
R implements classical scaling in the \code{cmdscale}
function. %
The \pkg{dimRed} package wraps \code{cmdscale} and allows the specification
of arbitrary distance functions for calculating the distance matrix. Additionally
a forward method is implemented.
\subsection{Isomap}
\label{sec:isomap}
As Classical Scaling can deal with arbitrarily defined distances,
\citet{tenenbaum_global_2000} suggested to approximate the
structure of the manifold by using geodesic distances. %
In practice, a graph is created by either keeping only
the connections between every point and its $k$ nearest neighbors to
produce a $k$-nearest neighbor graph ($k$-NNG), or simply by keeping all
distances smaller than a value $\varepsilon$ producing an
$\varepsilon$-neighborhood graph ($\varepsilon$-NNG). %
Geodesic distances are obtained by recording the distance on the
graph and classical scaling is used to find an embedding in fewer
dimensions. This leads to an ``unfolding'' of possibly convoluted
structures (see Figure~\ref{fig:knn}).
Isomap's computational cost is dominated by the eigenvalue decomposition and
therefore scales with $\mathcal{O}(n^3)$. %
Other related techniques can use more efficient algorithms
because the distance matrix becomes sparse due to a different preprocessing.
In R, Isomap is implemented in the \CRANpkg{vegan} package. The
\code{vegan::isomap} calculates an Isomap embedding and \code{vegan::isomapdist}
calculates a geodesic distance matrix. %
The \pkg{dimRed} package uses its own implementation. This implementation is
faster mainly due to using a KD-tree for the nearest neighbor search (from the
\CRANpkg{RANN} package) and to a faster implementation for the shortest path
search in the $k$-NNG (from the \CRANpkg{igraph} package). %
The implementation in \pkg{dimRed} also includes a forward method that can be
used to train the embedding on a subset of data points and then use these points
to approximate an embedding for the remaining points. This technique is
generally referred to as landmark Isomap \citep{de_silva_sparse_2004}. %
\subsection{Locally Linear Embedding}
\label{sec:lle}
Points that lie on a manifold in a high dimensional space can be
reconstructed through linear combinations of their neighborhoods if the
manifold is well sampled and the neighbohoods lie on a locally linear
patch. %
These reconstruction weights, $W$, are the same in the high dimensional
space as the internal coordinates of the manifold. %
Locally Linear Embedding \citep[LLE; ][]{roweis_nonlinear_2000} is a
technique that constructs a weight matrix
$W \in \mathbb{R}^{n\times n}$ with elements $w_{ij}$ so that
\begin{equation}
\sum_{i=1}^n \bigg\| x_i-
\sum_{j=1}^{n} w_{ij}x_j \bigg\|^2\label{eq:lle}
\end{equation}
is minimized under the constraint that $w_{ij} = 0 $ if $x_j$ does not belong
to the neighborhood and the constraint that $\sum_{j=1}^n w_{ij} = 1$. %
Finally the embedding is made in such a way that the following cost function is
minimized for $Y$,
\begin{equation}
\sum_{i=1}^n\bigg\| y_i - \sum_{j=1}^n w_{ij}y_j
\bigg\|^2.\label{eq:lle2}
\end{equation}
This can be solved using an eigenvalue decomposition.
Conceptually the method is similar to Isomap but it is
computationally much nicer because the weight matrix is
sparse and there exist efficient solvers. %
In R, LLE is implemented by the package \CRANpkg{lle}, the embedding can be
calculated with \code{lle::lle}.
Unfortunately the implementation does not make use of the sparsity of the weight matrix
$W$. %
The manifold must be well sampled and the neighborhood size must be
chosen appropriately for LLE to give good results. %
\subsection{Laplacian Eigenmaps}
\label{sec:laplaceigenmaps}
Laplacian Eigenmaps were originally developed under the name spectral clustering
to separate non-convex clusters. %
Later it was also used for graph embedding and DR
\citep{belkin_laplacian_2003}. %
A number of variants have been proposed. %
First, a graph is constructed, usually from a distance matrix, the graph can be
made sparse by keeping only the $k$ nearest neighbors, or by specifying an
$\varepsilon$ neighborhood. %
Then, a similarity matrix $W$ is calculated by using a Gaussian kernel (see Equation
\ref{eq:gauss}), if $c = 2 \sigma^2 = \infty$, then all distances are treated
equally, the smaller $c$ the more emphasis is given to differences in
distance. %
The degree of vertex $i$ is $d_i = \sum_{j=1}^n w_{ij}$ and the degree
matrix, $D$, is the diagonal matrix with entries $d_i$. %
Then we can form the graph Laplacian $L = D - W$ and, then, there are several ways how
to proceed, an overview can be found in \citet{luxburg_tutorial_2007}. %
The \pkg{dimRed} package implements the algorithm from
\citet{belkin_laplacian_2003}. Analogously to LLE, Laplacian eigenmaps
avoid computational complexity by creating a sparse matrix and not
having to estimate the distances between all pairs of points. %
Then the eigenvectors corresponding to the lowest eigenvalues larger
than $0$ of either the matrix $L$ or the normalized Laplacian
$D^{-1/2}LD^{-1/2}$ are computed and form the embedding.
\subsection{Diffusion Maps}
\label{sec:isodiffmaplle}
Diffusion Maps \citep{coifman_diffusion_2006} take a distance matrix
as input and calculates the transition probability matrix $P$ of a
diffusion process between the points to approximate the manifold. %
Then the embedding is done by an eigenvalue decompositon of $P$ to
calculate the coordinates of the embedding. %
The algorithm for calculating Diffusion Maps shares some elements with
the way Laplacian Eigenmaps are calculated. %
Both algorithms depart from the same weight matrix, Diffusion Maps
calculate the transition probability on the graph after $t$ time steps
and do the embedding on this probability matrix.
The idea is to simulate a diffusion process between the nodes of the
graph, which is more robust to short-circuiting than the $k$-NNG from
Isomap (see bottom right Figure \ref{fig:knn}). %
Diffusion maps in R are accessible via the
\code{diffusionMap::diffuse()} function, which is available in the
\CRANpkg{diffusionMap} package. %
Additional points can be approximated into an existing embedding using
the Nyström formula \citep{bengio_learning_2004}. %
The implementation in \pkg{dimRed} is based on the
\code{diffusionMap::diffuse} function.
% , which does not contain an
% approximation for unequally sampled manifolds
% \citep{coifman_geometric_2005}. %
\subsection{non-Metric Dimensional Scaling}
\label{sec:nmds}
While Classical Scaling and derived methods (see section
\nameref{sec:classscale}) use eigenvector decomposition to embed the data in
such a way that the given distances are maintained, non-Metric Dimensional
Scaling \citep[nMDS, ][]{kruskal_multidimensional_1964,kruskal_nonmetric_1964}
uses optimization methods to reach the same goal. %
Therefore a stress function,
\begin{equation}
\label{eq:stress}
S = \sqrt{\frac{\sum_{i>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed); library(ggplot2); #library(dplyr); library(tidyr)
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 1000)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## plot data set, embeddings, and quality analysis
## plot(data_set, type = "3vars")
## lapply(data_emb, plot, type = "2vars")
## plot_R_NX(data_emb)
add_label <- function(label)
grid::grid.text(label, 0.2, 1, hjust = 0, vjust = 1,
gp = grid::gpar(fontface = "bold",
cex = 1.5))
## pdf('~/phd/text/dimRedPackage/plots/plot_example.pdf', width = 4, height = 4)
## plot the results
plot(data_set, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("a")
par(mar = c(4, 4, 0, 0) + 0.1, bty = "n", las = 1)
plot(data_emb$Isomap, type = "2vars", pch = 16)
add_label("b")
plot(data_emb$PCA, type = "2vars", pch = 16)
add_label("d")
## calculate quality scores
print(
plot_R_NX(data_emb) +
theme(legend.title = element_blank(),
legend.position = c(0.5, 0.1),
legend.justification = c(0.5, 0.1))
)
add_label("c")
} else {
# These cannot all be plot(1:10)!!! It's a mistery to me.
plot(1:10)
barplot(1:10)
hist(1:10)
plot(1:10)
}
@
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-1.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-2.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-3.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-4.pdf}
\caption[dimRed example]{%
Comparing PCA and Isomap: %
(a) An S-shaped manifold, colors represent the internal coordinates of the
manifold. %
(b) Isomap embedding, the S-shaped manifold is unfolded. %
(c) $R_{NX}$ plotted agains neighborhood sizes, Isomap is much better at
preserving local distances and PCA is better at preserving global Euclidean
distances. %
The numbers on the legend are the $\text{AUC}_{1 / K}$.
(d) PCA projection of the data, the directions of maximum variance are preserved. %
}\label{fig:plotexample}
\end{figure}
<>=
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 1000)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## figure \ref{fig:plotexample}a, the data set
plot(data_set, type = "3vars")
## figures \ref{fig:plotexample}b (Isomap) and \ref{fig:plotexample}d (PCA)
lapply(data_emb, plot, type = "2vars")
## figure \ref{fig:plotexample}c, quality analysis
plot_R_NX(data_emb)
@
The function \code{plot\_R\_NX} produces a figure that plots the neighborhood
size ($k$ at a log-scale) against the quality measure $\text{R}_{NX}(k)$ (see
Equation \ref{eq:rnx}). %
This gives an overview of the general behavior of methods: if $\text{R}_{NX}$ is
high for low values of $K$, then local neighborhoods are maintained well; if
$\text{R}_{NX}$ is high for large values of $K$, then global gradients are
maintained well. %
It also provides a way to directly compare methods by plotting more than one
$\text{R}_{NX}$ curve and an overall quality of the embedding by taking the area
under the curve as an indicator for the overall quality of the embedding (see
fig~\ref{eq:auclnk}) which is shown as a number in the legend.
Therefore we can see from Figure~\ref{fig:plotexample}c that $t$-SNE is very good a
maintaining close and medium distances for the given data set, whereas PCA is only
better at maintaining the very large distances. %
The large distances are dominated by the overall bent shape of the S in 3D
space, while the close distances are not affected by this bending. %
This is reflected in the properties recovered by the different methods, the PCA
embedding recovers the S-shape, while $t$-SNE ignores the S-shape and recovers
the inner structure of the manifold.
% Example 2:
Often the quality of an embedding strongly depends on the choice of parameters,
the interface of \pkg{dimRed} can be used to facilitate searching the
parameter space.
Isomap has one parameter $k$ which determines
the number of neighbors used to construct the $k$-NNG\@. %
If this number is too large, then Isomap will resemble an MDS
(Figure~\ref{fig:knn} e), if the number is too small, the resulting embedding
contains holes (Figure~\ref{fig:knn} c). %
The following code finds the optimal value, $k_{\text{max}}$, for $k$ using the
$Q_{\text{local}}$ criterion, the results are visualized in Figure~\ref{fig:knn}
a:
\begin{figure}[htp]
\centering
<>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed)
library(cccd)
## Load data
ss <- loadDataSet("3D S Curve", n = 500)
## Parameter space
kk <- floor(seq(5, 100, length.out = 40))
## Embedding over parameter space
emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
## Quality over embeddings
qual <- sapply(emb, function(x) quality(x, "Q_local"))
## Find best value for K
ind_max <- which.max(qual)
k_max <- kk[ind_max]
add_label <- function(label){
par(xpd = TRUE)
b = par("usr")
text(b[1], b[4], label, adj = c(0, 1), cex = 1.5, font = 2)
par(xpd = FALSE)
}
names(qual) <- kk
}
@
<<"select_k",include=FALSE,fig.width=11,fig.height=5>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 2),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
plot(kk, qual, type = "l", xlab = "k", ylab = expression(Q[local]), bty = "n")
abline(v = k_max, col = "red")
add_label("a")
plot(ss, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("b")
} else {
plot(1:10)
plot(1:10)
}
@
<<"knngraphs",include=FALSE,fig.width=8,fig.height=3>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 3),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
add_knn_graph <- function(ind) {
nn1 <- nng(ss@data, k = kk[ind])
el <- get.edgelist(nn1)
segments(x0 = emb[[ind]]@data@data[el[, 1], 1],
y0 = emb[[ind]]@data@data[el[, 1], 2],
x1 = emb[[ind]]@data@data[el[, 2], 1],
y1 = emb[[ind]]@data@data[el[, 2], 2],
col = "#00000010")
}
plot(emb[[2]]@data@data, type = "n", bty = "n")
add_knn_graph(2)
points(emb[[2]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("c")
plot(emb[[ind_max]]@data@data, type = "n", bty = "n")
add_knn_graph(ind_max)
points(emb[[ind_max]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("d")
plot(emb[[length(emb)]]@data@data, type = "n", bty = "n")
add_knn_graph(length(emb))
points(emb[[length(emb)]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("e")
} else {
plot(1:10)
plot(1:10)
plot(1:10)
}
@
\includegraphics[width=.95\textwidth]{figure/select_k-1.pdf}
\includegraphics[width=.95\textwidth]{figure/knngraphs-1.pdf}
\caption[estimating $k$ using @Q_\text{local}]{%
Using \pkg{dimRed} and the $Q_\text{local}$ indicator to estimate a
good value for the parameter $k$ in Isomap. %
(a) $Q_\text{local}$ for different values of $k$, the vertical red
line indicates the maximum $k_{\text{max}}$. %
(b) The original data set, a 2 dimensional manifold bent in an
S-shape in 3 dimensional space. %
Bottom row: Embeddings and $k$-NNG for different values of $k$. %
(c) When $k = 5$, the value for $k$ is too small resulting in holes in the
embedding, the manifold itself is still unfolded correctly. %
(d) Choose $k = k_\text{max}$, the best representation of the original
manifold in two dimensions achievable with Isomap. %
(e) $k = 100$, too large, the $k$-NNG does not approximate the manifold
any more. %
}\label{fig:knn}
\end{figure}
<>=
## Load data
ss <- loadDataSet("3D S Curve", n = 500)
## Parameter space
kk <- floor(seq(5, 100, length.out = 40))
## Embedding over parameter space
emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
## Quality over embeddings
qual <- sapply(emb, function(x) quality(x, "Q_local"))
## Find best value for K
ind_max <- which.max(qual)
k_max <- kk[ind_max]
@
Figure~\ref{fig:knn}a shows how the $Q_{\text{local}}$ criterion changes when
varying the neighborhood size $k$ for Isomap, the gray lines in
Figure~\ref{fig:knn} represent the edges of the $k$-NN Graph. %
If the value for $k$ is too low, the inner structure of the manifold will still
be recovered, but it will be imperfect (Figure~\ref{fig:knn}c, note that the holes
appear in places that are not covered by the edges of the $k$-NN Graph),
therefore the $Q_{\text{local}}$ score is lower than optimal. %
If $k$ is too large, the error of the embedding is much larger due to short
circuiting and we observe a very steep drop in the $Q_{\text{local}}$ score. %
The short circuiting can be observed in Figure~\ref{fig:knn}e with the edges that
cross the gap between the tips and the center of the S-shape. %
% Example 3:
It is also very easy to compare across methods and quality scores. %
The following code produces a matrix of quality scores and methods,
where \code{dimRedMethodList} returns a character vector with all methods. A
visualization of the matrix can be found in Figure~\ref{fig:qualityexample}. %
\begin{figure}[htp]
\centering
<<"plot_quality",include=FALSE>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
embed_methods <- dimRedMethodList()
quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
"cophenetic_correlation")
iris_data <- loadDataSet("Iris")
quality_results <- matrix(
NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods)
)
embedded_data <- list()
for (e in embed_methods) {
try(embedded_data[[e]] <- embed(iris_data, e))
for (q in quality_methods)
try(quality_results[e,q] <- quality(embedded_data[[e]], q))
}
quality_results <- quality_results[order(rowMeans(quality_results)), ]
palette(c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e"))
col_hsv <- rgb2hsv(col2rgb(palette()))
## col_hsv["v", ] <- col_hsv["v", ] * 3 / 1
palette(hsv(col_hsv["h",], col_hsv["s",], col_hsv["v",]))
par(mar = c(2, 8, 0, 0) + 0.1)
barplot(t(quality_results), beside = TRUE, col = 1:4,
legend.text = quality_methods, horiz = TRUE, las = 1,
cex.names = 0.85,
args.legend = list(x = "topleft", bg = "white", cex = 0.8))
} else {
plot(1:10)
}
@
\includegraphics[width=.5\textwidth]{figure/plot_quality-1.pdf}
\caption[Quality comparision]{%
A visualization of the \code{quality\_results} matrix. %
The methods are ordered by mean quality score. %
The reconstruction error was omitted, because a higher value means
a worse embedding, while in the present methods a higher score
means a better embedding. %
Parameters were not tuned for the example, therefore it should not
be seen as a general quality assessment of the methods. %
}\label{fig:qualityexample}
\end{figure}
<>=
embed_methods <- dimRedMethodList()
quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
"cophenetic_correlation")
scurve <- loadDataSet("3D S Curve", n = 2000)
quality_results <- matrix(
NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods)
)
embedded_data <- list()
for (e in embed_methods) {
embedded_data[[e]] <- embed(scurve, e)
for (q in quality_methods)
try(quality_results[e, q] <- quality(embedded_data[[e]], q))
}
@
This example showcases the simplicity with which different methods and quality criteria
can be combined. %
Because of the strong dependencies on parameters it is not advised to apply this
kind of analysis without tuning the parameters for each method separately. %
There is no automatized way to tune parameters in \pkg{dimRed}. %
\section{Conclusion}
\label{sec:conc}
This paper presents the \pkg{dimRed} and \pkg{coRanking} packages and
it provides a brief overview of the methods implemented therein. %
The \pkg{dimRed} package is written in the R language, one of the most popular
languages for data analysis. The package is freely available from CRAN. %
The package is object oriented and completely open source and therefore easily available
and extensible. %
Although most of the DR methods already had implementations in R,
\pkg{dimRed} adds some new methods for dimensionality reduction, and
\pkg{coRanking} adds methods for an independent quality control of DR
methods to the R ecosystem. %
DR is a widely used technique. However, due to the lack of easily usable tools,
choosing the right method for DR is complex and depends upon a variety of factors. %
The \pkg{dimRed} package aims to facilitate experimentation with different
techniques, parameters, and quality measures so that choosing the right method
becomes easier. %
The \pkg{dimRed} package wants to enable the user to objectively compare methods that
rely on very different algorithmic approaches. %
It makes the life of the programmer easier, because all methods are aggregated
in one place and there is a single interface and standardized classes to access
the functionality. %
\section{Acknowledgments}
\label{sec:ack}
We thank Dr.\ G.\ Camps-Valls and an anonymous reviewer for many useful
comments. %
This study was supported by the European Space Agency (ESA) via the Earth System
Data Lab project (\url{http://earthsystemdatacube.org}) and the EU via the H2020
project BACI, grant agreement No 640176. %
\bibliographystyle{abbrvnat}
\bibliography{bibliography}
\end{document}
dimRed/vignettes/Makefile 0000644 0001762 0000144 00000000434 14153220136 015124 0 ustar ligges users all:
echo "BNET_BUILD_VIGNETTE: $(BNET_BUILD_VIGNETTE)"
$(R_HOME)/bin/Rscript -e "knitr::knit2pdf('dimensionality-reduction.Rnw')"
$(R_HOME)/bin/Rscript -e "tools::compactPDF('dimensionality-reduction.pdf', gs_quality = 'ebook')"
rm -rf dimensionality-reduction.tex figure/ auto/
dimRed/R/ 0000755 0001762 0000144 00000000000 14153220136 011654 5 ustar ligges users dimRed/R/diffmap.R 0000644 0001762 0000144 00000010670 13562225201 013412 0 ustar ligges users #' Diffusion Maps
#'
#' An S4 Class implementing Diffusion Maps
#'
#' Diffusion Maps uses a diffusion probability matrix to robustly
#' approximate a manifold.
#'
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' Diffusion Maps can take the following parameters:
#' \describe{
#' \item{d}{a function transforming a matrix row wise into a
#' distance matrix or \code{dist} object,
#' e.g. \code{\link[stats]{dist}}.}
#' \item{ndim}{The number of dimensions}
#' \item{eps}{The epsilon parameter that determines the
#' diffusion weight matrix from a distance matrix \code{d},
#' \eqn{exp(-d^2/eps)}, if set to \code{"auto"} it will
#' be set to the median distance to the 0.01*n nearest
#' neighbor.}
#' \item{t}{Time-scale parameter. The recommended value, 0,
#' uses multiscale geometry.}
#' \item{delta}{Sparsity cut-off for the symmetric graph Laplacian,
#' a higher value results in more sparsity and faster calculation.
#' The predefined value is 10^-5.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[diffusionMap]{diffuse}}, see there for
#' details. It uses the notation of Richards et al. (2009) which is
#' slightly different from the one in the original paper (Coifman and
#' Lafon, 2006) and there is no \eqn{\alpha} parameter.
#' There is also an out-of-sample extension, see examples.
#'
#'
#' @references
#' Richards, J.W., Freeman, P.E., Lee, A.B., Schafer,
#' C.M., 2009. Exploiting Low-Dimensional Structure in
#' Astronomical Spectra. ApJ 691,
#' 32. doi:10.1088/0004-637X/691/1/32
#'
#' Coifman, R.R., Lafon, S., 2006. Diffusion maps. Applied and
#' Computational Harmonic Analysis 21,
#' 5-30. doi:10.1016/j.acha.2006.04.006
#'
#' @examples
#' dat <- loadDataSet("3D S Curve", n = 300)
#' emb <- embed(dat, "DiffusionMaps")
#'
#' plot(emb, type = "2vars")
#'
#' # predicting is possible:
#' samp <- sample(floor(nrow(dat) / 10))
#' emb2 <- embed(dat[samp])
#' emb3 <- predict(emb2, dat[-samp])
#'
#' plot(emb2, type = "2vars")
#' points(getData(emb3))
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export DiffusionMaps
#' @exportClass DiffusionMaps
DiffusionMaps <- setClass(
"DiffusionMaps",
contains = "dimRedMethod",
prototype = list(
stdpars = list(d = stats::dist,
ndim = 2,
eps = "auto",
t = 0,
delta = 1e-5),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("diffusionMap")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
distmat <- pars$d(indata)
if (pars$eps == "auto")
pars$eps <- diffusionMap::epsilonCompute(distmat)
diffres <- diffusionMap::diffuse(
D = distmat,
t = pars$t,
eps.val = pars$eps,
neigen = pars$ndim,
maxdim = pars$ndim,
delta = pars$delta
)
outdata <- as.matrix(diffres$X)
appl <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(data@data))
stop("x must have the same number of dimensions ",
"as the original data")
dd <- sqrt(pdist2(proj, indata))
appl.res <-
diffusionMap::nystrom(diffres, dd, sigma = diffres$epsilon)
dimnames(appl.res) <- list(
rownames(x), paste0("diffMap", seq_len(ncol(outdata)))
)
new("dimRedData", data = appl.res, meta = appl.meta)
}
colnames(outdata) <- paste0("diffMap", seq_len(ncol(outdata)))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
apply = appl,
has.apply = TRUE,
has.org.data = keep.org.data,
method = "diffmap",
pars = pars
))
})
)
dimRed/R/fastica.R 0000644 0001762 0000144 00000006663 13562225201 013425 0 ustar ligges users #' Independent Component Analysis
#'
#' An S4 Class implementing the FastICA algorithm for Indepentend
#' Component Analysis.
#'
#' ICA is used for blind signal separation of different sources. It is
#' a linear Projection.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' FastICA can take the following parameters:
#' \describe{
#' \item{ndim}{The number of output dimensions. Defaults to \code{2}}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[fastICA]{fastICA}}. FastICA uses a very
#' fast approximation for negentropy to estimate statistical
#' independences between signals. Because it is a simple
#' rotation/projection, forward and backward functions can be given.
#'
#' @references
#'
#' Hyvarinen, A., 1999. Fast and robust fixed-point algorithms for independent
#' component analysis. IEEE Transactions on Neural Networks 10, 626-634.
#' https://doi.org/10.1109/72.761722
#'
#' @examples
#' dat <- loadDataSet("3D S Curve")
#' emb <- embed(dat, "FastICA", ndim = 2)
#' plot(getData(getDimRedData(emb)))
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export FastICA
#' @exportClass FastICA
FastICA <- setClass(
"FastICA",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2),
fun = function (data,
pars,
keep.org.data = TRUE) {
chckpkg("fastICA")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
orgdata.colmeans <- colMeans(orgdata)
indata <- data@data
res <- fastICA::fastICA(indata, n.comp = pars$ndim, method = "C")
outdata <- res$S
colnames(outdata) <- paste0("ICA", 1:ncol(outdata))
appl <- function(x){
appl.meta <- if (inherits(x, "dimRedData"))
x@meta
else
matrix(numeric(0), 0, 0)
proj <- if (inherits(x, "dimRedData"))
x@data
else
x
out <- scale(proj, center = orgdata.colmeans, scale = FALSE) %*%
res$K %*%
res$W
colnames(out) <- paste0("ICA", 1:ncol(out))
return(new("dimRedData", data = out, meta = appl.meta))
}
inv <- function(x){
appl.meta <- if (inherits(x, "dimRedData"))
x@meta
else
matrix(numeric(0), 0, 0)
proj <- if (inherits(x, "dimRedData"))
x@data
else
x
out <- scale(proj %*% res$A[1:ncol(proj), ],
center = -orgdata.colmeans,
scale = FALSE)
reproj <- new("dimRedData", data = out, meta = appl.meta)
return(reproj)
}
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
apply = appl,
inverse = inv,
has.apply = TRUE,
has.inverse = TRUE,
method = "FastICA",
pars = pars
))
})
)
dimRed/R/nmds.R 0000644 0001762 0000144 00000003502 13562225201 012741 0 ustar ligges users #' Non-Metric Dimensional Scaling
#'
#' An S4 Class implementing Non-Metric Dimensional Scaling.
#'
#' A non-linear extension of MDS using monotonic regression
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' nMDS can take the following parameters:
#' \describe{
#' \item{d}{A distance function.}
#' \item{ndim}{The number of embedding dimensions.}
#' }
#'
#' @section Implementation:
#' Wraps around the
#' \code{\link[vegan]{monoMDS}}. For parameters that are not
#' available here, the standard configuration is used.
#'
#' @references
#'
#' Kruskal, J.B., 1964. Nonmetric multidimensional scaling: A numerical method.
#' Psychometrika 29, 115-129. https://doi.org/10.1007/BF02289694
#'
#' @examples
#' dat <- loadDataSet("3D S Curve", n = 300)
#' emb <- embed(dat, "nMDS")
#' plot(emb, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export nMDS
#' @exportClass nMDS
nMDS <- setClass(
"nMDS",
contains = "dimRedMethod",
prototype = list(
stdpars = list(d = stats::dist, ndim = 2),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("vegan")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- vegan::monoMDS(pars$d(indata), k = pars$ndim)$points
colnames(outdata) <- paste0("NMDS", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "nmds",
pars = pars
))
})
)
dimRed/R/tsne.R 0000644 0001762 0000144 00000005335 13562225201 012757 0 ustar ligges users #' t-Distributed Stochastic Neighborhood Embedding
#'
#' An S4 Class for t-SNE.
#'
#' t-SNE is a method that uses Kullback-Leibler divergence between the
#' distance matrices in high and low-dimensional space to embed the
#' data. The method is very well suited to visualize complex
#' structures in low dimensions.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' t-SNE can take the following parameters:
#' \describe{
#' \item{d}{A distance function, defaults to euclidean distances}
#' \item{perplexity}{The perplexity parameter, roughly equivalent to neighborhood size.}
#' \item{theta}{Approximation for the nearest neighbour search, large values are more inaccurate.}
#' \item{ndim}{The number of embedding dimensions.}
#' }
#'
#' @section Implementation:
#'
#' Wraps around \code{\link[Rtsne]{Rtsne}}, which is very well
#' documented. Setting \code{theta = 0} does a normal t-SNE, larger
#' values for \code{theta < 1} use the Barnes-Hut algorithm which
#' scales much nicer with data size. Larger values for perplexity take
#' larger neighborhoods into account.
#'
#' @references
#' Maaten, L. van der, 2014. Accelerating t-SNE using Tree-Based
#' Algorithms. Journal of Machine Learning Research 15, 3221-3245.
#'
#' van der Maaten, L., Hinton, G., 2008. Visualizing Data using
#' t-SNE. J. Mach. Learn. Res. 9, 2579-2605.
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("3D S Curve", n = 300)
#' emb <- embed(dat, "tSNE", perplexity = 80)
#' plot(emb, type = "2vars")
#' }
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export tSNE
#' @exportClass tSNE
tSNE <- setClass(
"tSNE",
contains = "dimRedMethod",
prototype = list(
stdpars = list(d = stats::dist,
perplexity = 30,
theta = 0.5,
ndim = 2),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("Rtsne")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- Rtsne::Rtsne(pars$d(indata),
perplexity = pars$perplexity,
theta = pars$theta,
dims = pars$ndim)$Y
colnames(outdata) <- paste0("tSNE", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "tsne",
pars = pars
))
})
)
dimRed/R/kpca.R 0000644 0001762 0000144 00000010512 13562225201 012715 0 ustar ligges users #' Kernel PCA
#'
#' An S4 Class implementing Kernel PCA
#'
#' Kernel PCA is a nonlinear extension of PCA using kernel methods.
#'
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' Kernel PCA can take the following parameters:
#' \describe{
#' \item{ndim}{the number of output dimensions, defaults to 2}
#' \item{kernel}{The kernel function, either as a function or a
#' character vector with the name of the kernel. Defaults to
#' \code{"rbfdot"}}
#' \item{kpar}{A list with the parameters for the kernel function,
#' defaults to \code{list(sigma = 0.1)}}
#' }
#'
#' The most comprehensive collection of kernel functions can be found in
#' \code{\link[kernlab]{kpca}}. In case the function does not take any
#' parameters \code{kpar} has to be an empty list.
#'
#' @section Implementation:
#'
#' Wraps around \code{\link[kernlab]{kpca}}, but provides additionally
#' forward and backward projections.
#'
#' @references
#'
#' Sch\"olkopf, B., Smola, A., M\"uller, K.-R., 1998. Nonlinear Component Analysis
#' as a Kernel Eigenvalue Problem. Neural Computation 10, 1299-1319.
#' https://doi.org/10.1162/089976698300017467
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("3D S Curve")
#' emb <- embed(dat, "kPCA")
#' plot(emb, type = "2vars")
#' }
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export kPCA
#' @exportClass kPCA
kPCA <- setClass(
"kPCA",
contains = "dimRedMethod",
prototype = list(
stdpars = list(kernel = "rbfdot",
kpar = list(sigma = 0.1),
ndim = 2),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("kernlab")
if (is.null(pars$ndim)) pars$ndim <- 2
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
message(Sys.time(), ": Calculating kernel PCA")
res <- do.call(kernlab::kpca, c(list(x = indata), pars))
kernel <- get_kernel_fun(pars$kernel, pars$kpar)
message(Sys.time(), ": Trying to calculate reverse")
K_rev <- kernlab::kernelMatrix(kernel, res@rotated)
diag(K_rev) <- 0.1 + diag(K_rev)
dual_coef <- try(solve(K_rev, indata), silent = TRUE)
appl <- function (x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
proj <- kernlab::predict(res, proj)[, 1:pars$ndim, drop = FALSE]
colnames(proj) <- paste0("kPCA", 1:ncol(proj))
new("dimRedData", data = proj, meta = appl.meta)
}
inv <-
if (inherits(dual_coef, "try-error")) {
message("No inverse function.")
function(x) NA
} else {
function (x) {
appl.meta <-
if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
resrot <- res@rotated[, 1:ncol(proj)]
rot <- kernlab::kernelMatrix(kernel, proj, resrot)
proj <- rot %*% dual_coef
new("dimRedData", data = proj, meta = appl.meta)
}
}
outdata <- res@rotated[, 1:pars$ndim, drop = FALSE]
colnames(outdata) <- paste0("kPCA", 1:ncol(outdata))
message(Sys.time(), ": DONE")
return(
new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.org.data = keep.org.data,
has.apply = TRUE,
has.inverse = TRUE,
method = "kpca",
pars = pars
)
)
})
)
## get the kernel function out of the kernlab namespace:
get_kernel_fun <- function (kernel, pars) {
if (!is(kernel, "kernel")) {
if (is(kernel, "function")) {
kernel <- deparse(substitute(kernel))
} else {
kernel <- get(kernel, asNamespace("kernlab"))
}
kernel <- do.call(kernel, pars)
}
return(kernel)
}
dimRed/R/autoencoder.R 0000644 0001762 0000144 00000036355 14153211745 014331 0 ustar ligges users #' AutoEncoder
#'
#' An S4 Class implementing an Autoencoder
#'
#' Autoencoders are neural networks that try to reproduce their input. Consider
#' this method unstable, as the internals may still be changed.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' Autoencoder can take the following parameters:
#' \describe{
#' \item{ndim}{The number of dimensions for reduction.}
#' \item{n_hidden}{The number of neurons in the hidden
#' layers, the length specifies the number of layers,
#' the length must be impair, the middle number must
#' be the same as ndim.}
#' \item{activation}{The activation functions for the layers,
#' one of "tanh", "sigmoid", "relu", "elu", everything
#' else will silently be ignored and there will be no
#' activation function for the layer.}
#' \item{weight_decay}{the coefficient for weight decay,
#' set to 0 if no weight decay desired.}
#' \item{learning_rate}{The learning rate for gradient descend}
#' \item{graph}{Optional: A list of bits and pieces that define the
#' autoencoder in tensorflow, see details.}
#' \item{keras_graph}{Optional: A list of keras layers that define
#' the encoder and decoder, specifying this, will ignore all
#' other topology related variables, see details.}
#' \item{batchsize}{If NA, all data will be used for training,
#' else only a random subset of size batchsize will be used}
#' \item{n_steps}{the number of training steps.}
#' }
#'
#' @section Details:
#' There are several ways to specify an autoencoder, the simplest is to pass the
#' number of neurons per layer in \code{n_hidden}, this must be a vector of
#' integers of impair length and it must be symmetric and the middle number must
#' be equal to \code{ndim}, For every layer an activation function can be
#' specified with \code{activation}.
#'
#' For regularization weight decay can be specified by setting
#' \code{weight_decay} > 0.
#'
#' Currently only a gradient descent optimizer is used, the learning rate can be
#' specified by setting \code{learning_rate}.
#' The learner can operate on batches if \code{batchsize} is not \code{NA}.
#' The number of steps the learner uses is specified using \code{n_steps}.
#'
#' @section Further training a model:
#' If the model did not converge in the first training phase or training with
#' different data is desired, the \code{\link{dimRedResult}} object may be
#' passed as \code{autoencoder} parameter; In this case all topology related
#' parameters will be ignored.
#'
#' @section Using Keras layers:
#' The encoder and decoder part can be specified using a list of \pkg{keras}
#' layers. This requires a list with two entries, \code{encoder} should contain
#' a LIST of keras layers WITHOUT the \code{\link[keras]{layer_input}}
#' that will be concatenated in order to form the encoder part.
#' \code{decoder} should be
#' defined accordingly, the output of \code{decoder} must have the same number
#' of dimensions as the input data.
#'
#' @section Using Tensorflow:
#' The model can be entirely defined in \pkg{tensorflow}, it must contain a
#' list with the following entries:
#' \describe{
#' \item{encoder}{A tensor that defines the encoder.}
#' \item{decoder}{A tensor that defines the decoder.}
#' \item{network}{A tensor that defines the reconstruction (encoder + decoder).}
#' \item{loss}{A tensor that calculates the loss (network + loss function).}
#' \item{in_data}{A \code{placeholder} that points to the data input of
#' the network AND the encoder.}
#' \item{in_decoder}{A \code{placeholder} that points to the input of
#' the decoder.}
#' \item{session}{A \pkg{tensorflow} \code{Session} object that holds
#' the values of the tensors.}
#' }
#'
#' @section Implementation:
#' Uses \pkg{tensorflow} as a backend, for details an
#' problems relating tensorflow, see \url{https://tensorflow.rstudio.com}.
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("3D S Curve")
#'
#' emb <- embed(dat, "AutoEncoder")
#'
#' # predicting is possible:
#' samp <- sample(floor(nrow(dat) / 10))
#' emb2 <- embed(dat[samp])
#' emb3 <- predict(emb2, dat[-samp])
#'
#' plot(emb, type = "2vars")
#' plot(emb2, type = "2vars")
#' points(getData(emb3))
#' }
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export AutoEncoder
#' @exportClass AutoEncoder
AutoEncoder <- setClass(
"AutoEncoder",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
n_hidden = c(10, 2, 10),
activation = c("tanh", "lin", "tanh"),
weight_decay = 0.001,
learning_rate = 0.15,
graph = NULL,
keras_graph = NULL,
## is.na() of an S4 class gives a warning
autoencoder = NULL,
batchsize = NA,
n_steps = 500),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("tensorflow")
tensorflow::tf$compat$v1$disable_v2_behavior()
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
graph <-
if (!is.null(pars$graph)) {
message("using predefined graph, ",
"ignoring other parameters that define topology, ",
"be sure to set ndim to the correct value ",
"else you might run into trouble.")
pars$graph
} else if (!is.null(pars$autoencoder)) {
message("using predefined autoencoder object, ",
" ignoring other parameters that define topology.")
if (!(inherits(pars$autoencoder, "dimRedResult") &&
pars$autoencoder@method == "AutoEncoder"))
stop("autoencoder must be NULL, ",
"or of type dimRedResult by an AutoEncoder object.")
## setting topology related parameters from autoencoder
pars$ndim <- pars$autoencoder@pars$ndim
pars$n_hidden <- pars$autoencoder@pars$n_hidden
pars$activation <- pars$autoencoder@pars$activation
pars$autoencoder@pars$graph
} else if (!is.null(pars$keras_graph)) {
message("using predefined keras graph, ",
"ignoring parameters that define topology")
tmp <- graph_keras(encoder = pars$keras_graph$encoder,
decoder = pars$keras_graph$decoder,
n_in = ncol(indata))
pars$ndim <- tmp$encoder$shape$dims[[2]]$value
tmp
} else {
with(pars, {
graph_params(
d_in = ncol(indata),
n_hidden = n_hidden,
activation = activation,
weight_decay = weight_decay,
learning_rate = learning_rate,
n_steps = n_steps,
ndim = ndim
)
})
}
if (!"encoder" %in% names(graph)) stop("no encoder in graph")
if (!"decoder" %in% names(graph)) stop("no decoder in graph")
if (!"network" %in% names(graph)) stop("no network in graph")
if (!"loss" %in% names(graph)) stop("no loss in graph")
if (!"in_decoder" %in% names(graph)) stop("no in_decoder in graph")
if (!"in_data" %in% names(graph)) stop("no in_data in graph")
if (!"session" %in% names(graph)) stop("no session in graph")
## TODO: I am not sure if there is a way to do this directly on the list
## objects
graph_data_input <- graph$in_data
graph_decoder_input <- graph$in_dec
sess <- graph$session
optimizer <-
tensorflow::tf$compat$v1$train$GradientDescentOptimizer(pars$learning_rate)
train <- optimizer$minimize(graph$loss)
## TODO: do proper batching and hold out
for (step in 1:pars$n_steps) {
sess$run(train, feed_dict =
tensorflow::dict(
graph_data_input =
if (is.na(pars$batchsize)) {
indata
} else {
indata[
sample(seq_len(nrow(indata)), pars$batchsize),
]
}
)
)
}
outdata <-
sess$run(graph$encoder,
feed_dict = tensorflow::dict(graph_data_input = indata))
appl <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(data@data))
stop("x must have the same number of dimensions ",
"as the original data")
res <-
sess$run(graph$encoder,
feed_dict = tensorflow::dict(graph_data_input = proj))
colnames(res) <- paste0("AE", seq_len(ncol(res)))
new("dimRedData", data = res, meta = appl.meta)
}
inv <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != pars$ndim)
stop("x must have the same number of dimensions ",
"as ndim data")
res <- sess$run(
graph$decoder,
feed_dict = tensorflow::dict(
graph_decoder_input = proj
))
colnames(res) <- colnames(indata)
new("dimRedData", data = res, meta = appl.meta)
}
## TODO: this is a hack and there should be an "official" way to save
## extra data in a dimRedResult object
pars$graph <- graph
colnames(outdata) <- paste0("AE", seq_len(ncol(outdata)))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.apply = TRUE,
has.inverse = TRUE,
has.org.data = keep.org.data,
method = "AutoEncoder",
pars = pars
))
})
)
get_activation_function <- function(x) {
switch(
x,
tanh = tensorflow::tf$compat$v1$tanh,
sigmoid = tensorflow::tf$compat$v1$sigmoid,
relu = tensorflow::tf$compat$v1$nn$relu,
elu = tensorflow::tf$compat$v1$elu,
I
)
}
## no idea why these and variants do not work:
## chain_list <- function(x1, x2) Reduce(`%>%`, x2, init = x1)
## chain_list <- function(x) Reduce(`%>%`, x)
chain_list <- function (x1, x2 = NULL) {
if(is.null(x2)) {
stopifnot(is.list(x1))
result <- x1[[1]]
if(length(x1) > 1) for (i in 2:length(x1)) {
result <- result %>% (x1[[i]])
}
} else {
stopifnot(is.list(x2))
result <- x1
for (i in 1:length(x2)) {
result <- result %>% (x2[[i]])
}
}
return(result)
}
graph_keras <- function(encoder, decoder, n_in) {
chckpkg("keras")
chckpkg("tensorflow")
inenc <- keras::layer_input(shape = n_in)
enc <- inenc %>% chain_list(encoder)
ndim <- enc$shape$dims[[2]]$value
indec <- keras::layer_input(shape = ndim)
dec <- indec %>% chain_list(decoder)
encdec <- inenc %>% chain_list(encoder) %>% chain_list(decoder)
## TODO: check if this uses weight decay, probably not:
loss <- tensorflow::tf$compat$v1$reduce_mean((encdec - inenc) ^ 2)
sess <- tensorflow::tf$compat$v1$keras$backend$get_session()
return(list(
encoder = enc,
decoder = dec,
network = encdec,
loss = loss,
in_data = inenc,
in_decoder = indec,
session = sess
))
}
graph_params <- function (
d_in,
n_hidden,
activation,
weight_decay,
learning_rate,
n_steps,
ndim
) {
if (length(n_hidden) %% 2 == 0)
stop("the number of layers must be impair")
if (ndim != n_hidden[ceiling(length(n_hidden) / 2)])
stop("the middle of n_hidden must be equal to ndim")
if (length(n_hidden) != length(activation))
stop("declare an activation function for each layer:",
"\nn_hidden: ", paste(n_hidden, collapse = " "),
"\nactivation functions: ", paste(activation, collapse = " "))
if (weight_decay < 0)
stop("weight decay must be > 0")
if (learning_rate <= 0)
stop("learning rate must be > 0")
if (n_steps <= 0)
stop("n_steps must be > 0")
input <- tensorflow::tf$compat$v1$placeholder(
"float", shape = tensorflow::shape(NULL, d_in),
name = "input"
)
indec <- tensorflow::tf$compat$v1$placeholder(
"float",
shape = tensorflow::shape(NULL, ndim),
name = "nlpca"
)
w <- lapply(seq_len(length(n_hidden) + 1), function(x) {
n1 <- if (x == 1) d_in else n_hidden[x - 1]
n2 <- if (x > length(n_hidden)) d_in else n_hidden[x]
tensorflow::tf$compat$v1$Variable(tensorflow::tf$compat$v1$random_uniform(tensorflow::shape(n1, n2), 1.0, -1.0),
name = paste0("w_", x))
})
b <- lapply(seq_len(length(n_hidden) + 1), function (x) {
n <- if (x > length(n_hidden)) d_in else n_hidden[x]
tensorflow::tf$compat$v1$Variable(tensorflow::tf$compat$v1$zeros(tensorflow::shape(n)),
name = paste0("b_", x))
})
enc <- input
for (i in 1:ceiling(length(n_hidden) / 2)) {
sigma <- get_activation_function(activation[i])
enc <- sigma(tensorflow::tf$compat$v1$matmul(enc, w[[i]]) + b[[i]])
}
dec <- indec
for (i in (ceiling(length(n_hidden) / 2) + 1):(length(n_hidden) + 1)) {
sigma <- get_activation_function(activation[i])
dec <- sigma(tensorflow::tf$compat$v1$matmul(dec, w[[i]]) + b[[i]])
}
encdec <- enc
for (i in (ceiling(length(n_hidden) / 2) + 1):(length(n_hidden) + 1)) {
sigma <- get_activation_function(activation[i])
encdec <- sigma(tensorflow::tf$compat$v1$matmul(encdec, w[[i]]) + b[[i]])
}
loss <- Reduce(`+`, lapply(w, function (x) tensorflow::tf$compat$v1$reduce_sum(tensorflow::tf$compat$v1$pow(x, 2))), 0)
loss <- Reduce(`+`, lapply(b, function (x) tensorflow::tf$compat$v1$reduce_sum(tensorflow::tf$compat$v1$pow(x, 2))), loss)
loss <- tensorflow::tf$compat$v1$reduce_mean((encdec - input) ^ 2) + weight_decay * loss
sess <- tensorflow::tf$compat$v1$Session()
## This closes sess if it is garbage collected.
reg.finalizer(sess, function(x) x$close())
sess$run(tensorflow::tf$compat$v1$global_variables_initializer())
return(list(
encoder = enc,
decoder = dec,
network = encdec,
loss = loss,
in_data = input,
in_decoder = indec,
session = sess
))
}
dimRed/R/isomap.R 0000644 0001762 0000144 00000020244 13562225201 013272 0 ustar ligges users #' Isomap embedding
#'
#' An S4 Class implementing the Isomap Algorithm
#'
#' The Isomap algorithm approximates a manifold using geodesic
#' distances on a k nearest neighbor graph. Then classical scaling is
#' performed on the resulting distance matrix.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' Isomap can take the following parameters:
#' \describe{
#' \item{knn}{The number of nearest neighbors in the graph. Defaults to 50.}
#' \item{ndim}{The number of embedding dimensions, defaults to 2.}
#' \item{get_geod}{Should the geodesic distance matrix be kept,
#' if \code{TRUE}, access it as \code{getOtherData(x)$geod}}
#' }
#'
#' @section Implementation:
#'
#' The dimRed package uses its own implementation of Isomap which also
#' comes with an out of sample extension (known as landmark
#' Isomap). The default Isomap algorithm scales computationally not
#' very well, the implementation here uses \code{\link[RANN]{nn2}} for
#' a faster search of the nearest neighbors. If data are too large it
#' may be useful to fit a subsample of the data and use the
#' out-of-sample extension for the other points.
#'
#' @references
#' Tenenbaum, J.B., Silva, V. de, Langford, J.C., 2000. A Global Geometric
#' Framework for Nonlinear Dimensionality Reduction. Science 290, 2319-2323.
#' https://doi.org/10.1126/science.290.5500.2319
#'
#' @examples
#' dat <- loadDataSet("3D S Curve", n = 500)
#' emb <- embed(dat, "Isomap", knn = 10)
#' plot(emb)
#'
#' ## or simpler, use embed():
#' samp <- sample(nrow(dat), size = 200)
#' emb2 <- embed(dat[samp], "Isomap", .mute = NULL, knn = 10)
#' emb3 <- predict(emb2, dat[-samp])
#'
#' plot(emb2, type = "2vars")
#' plot(emb3, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export Isomap
#' @exportClass Isomap
Isomap <- setClass(
"Isomap",
contains = "dimRedMethod",
prototype = list(
stdpars = list(knn = 50,
ndim = 2,
get_geod = FALSE),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("RSpectra")
chckpkg("igraph")
chckpkg("RANN")
message(Sys.time(), ": Isomap START")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
if (is.null(pars$eps)) pars$eps <- 0
if (is.null(pars$get_geod)) pars$get_geod <- FALSE
## geodesic distances
message(Sys.time(), ": constructing knn graph")
knng <- makeKNNgraph(x = indata, k = pars$knn, eps = pars$eps)
message(Sys.time(), ": calculating geodesic distances")
geodist <- igraph::distances(knng, algorithm = "dijkstra")
message(Sys.time(), ": Classical Scaling")
## TODO: add regularization
k <- geodist ^ 2
k <- .Call(stats:::C_DoubleCentre, k)
k <- - k / 2
## TODO: explicit symmetrizing
## TODO: return eigenvectors?
e <- RSpectra::eigs_sym(k, pars$ndim, which = "LA",
opts = list(retvec = TRUE))
e_values <- e$values
e_vectors <- e$vectors
neig <- sum(e_values > 0)
if (neig < pars$ndim) {
warning("Isomap: eigenvalues < 0, returning less dimensions!")
e_values <- e_values[seq_len(neig)]
e_vectors <- e_vectors[, seq_len(neig), drop = FALSE]
}
e_vectors <- e_vectors * rep(sqrt(e_values), each = nrow(e_vectors))
colnames(e_vectors) <- paste("iso", seq_len(neig))
appl <- function (x) {
message(Sys.time(), ": L-Isomap embed START")
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
indata <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(indata) != ncol(data@data))
stop("x must have the same number of dimensions as the original data")
nindata <- nrow(indata)
norg <- nrow(orgdata)
message(Sys.time(), ": constructing knn graph")
lknng <- makeKNNgraph(rbind(indata, orgdata),
k = pars$knn, eps = pars$eps)
message(Sys.time(), ": calculating geodesic distances")
lgeodist <- igraph::distances(lknng,
seq_len(nindata),
nindata + seq_len(norg))
message(Sys.time(), ": embedding")
dammu <- sweep(lgeodist ^ 2, 2, colMeans(geodist ^ 2), "-")
Lsharp <- sweep(e_vectors, 2, e_values, "/")
out <- -0.5 * (dammu %*% Lsharp)
message(Sys.time(), ": DONE")
return(new("dimRedData", data = out, meta = appl.meta))
}
return(new(
"dimRedResult",
data = new("dimRedData",
data = e_vectors,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
apply = appl,
has.apply = TRUE,
method = "Isomap",
pars = pars,
other.data = if (pars$get_geod) list(geod = as.dist(geodist))
else list()
))
})
)
## input data(matrix or data frame) return knn graph implements
## "smart" choices on RANN::nn2 parameters we ignore radius search
## TODO: find out a good limit to switch from kd to bd trees COMMENT:
## bd trees are buggy, they dont work if there are duplicated data
## points and checking would neutralize the performance gain, so bd
## trees are not really usable.
makeKNNgraph <- function (x, k, eps = 0, diag = FALSE){
## requireNamespace("RANN")
## requireNamespace("igraph")
## consts
INF_VAL <- 1.340781e+15
NA_IDX <- 0
BDKD_LIM <- 1000000 #todo: figure out a good value here
## select parameters
M <- nrow(x)
treetype <- "kd" # if (M < BDKD_LIM) "kd" else "bd"
# see:
# https://github.com/jefferis/RANN/issues/19
searchtype <- if (eps == 0) "standard" else "priority"
## RANN::nn2 returns the points in data with respect to query
## e.g. the rows in the output are the points in query and the
## columns the points in data.
nn2res <- RANN::nn2(data = x, query = x, k = k + 1, treetype = treetype,
searchtype = searchtype, eps = eps)
## create graph: the first ny nodes will be y, the last nx nodes
## will be x, if x != y
## it is not really pretty to create a
## directed graph first and then make it undirected.
g <- igraph::make_empty_graph(M, directed = TRUE)
g[from = if (diag) rep(seq_len(M), times = k + 1)
else rep(seq_len(M), times = k),
to = if (diag) as.vector(nn2res$nn.idx)
else as.vector(nn2res$nn.idx[, -1]),
attr = "weight"] <-
if (diag) as.vector(nn2res$nn.dists)
else as.vector(nn2res$nn.dists[, -1])
return(igraph::as.undirected(g, mode = "collapse", edge.attr.comb = "first"))
}
## the original isomap method I'll keep it here for completeness:
## isomap <- new("dimRedMethod",
## stdpars = list(knn = 50,
## d = dist,
## ndim = 2)
## fun = function (data, pars,
## keep.org.data = TRUE) {
## chckpkg("vegan")
## meta <- data@meta
## orgdata <- if (keep.org.data) data@data else NULL
## indata <- data@data
## outdata <- vegan::isomap(pars$d(indata),
## ndim = pars$ndim,
## k = pars$knn)$points
## colnames(outdata) <- paste0("Iso", 1:ncol(outdata))
## return(new(
## "dimRedResult",
## data = new("dimRedData",
## data = outdata,
## meta = meta),
## org.data = orgdata,
## has.org.data = keep.org.data,
## method = "isomap",
## pars = pars
## ))
## })
dimRed/R/umap.R 0000644 0001762 0000144 00000010644 14153200510 012740 0 ustar ligges users #' Umap embedding
#'
#' An S4 Class implementing the UMAP algorithm
#'
#' Uniform Manifold Approximation is a gradient descend based algorithm that
#' gives results similar to t-SNE, but scales better with the number of points.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#'
#' UMAP can take the follwing parameters:
#' \describe{
#' \item{ndim}{The number of embedding dimensions.}
#' \item{knn}{The number of neighbors to be used.}
#' \item{d}{The distance metric to use.}
#' \item{method}{\code{"naive"} for an R implementation, \code{"python"}
#' for the reference implementation.}
#' }
#'
#' Other method parameters can also be passed, see
#' \code{\link[umap]{umap.defaults}} for details. The ones above have been
#' standardized for the use with \code{dimRed} and will get automatically
#' translated for \code{\link[umap]{umap}}.
#'
#' @section Implementation:
#'
#' The dimRed package wraps the \code{\link[umap]{umap}} packages which provides
#' an implementation in pure R and also a wrapper around the original python
#' package \code{umap-learn} (https://github.com/lmcinnes/umap/). This requires
#' \code{umap-learn} version 0.4 installed, at the time of writing, there is
#' already \code{umap-learn} 0.5 but it is not supported by the R package
#' \code{\link[umap]{umap}}.
#'
#' The \code{"naive"} implementation is a pure R implementation and considered
#' experimental at the point of writing this, it is also much slower than the
#' python implementation.
#'
#' The \code{"python"} implementation is the reference implementation used by
#' McInees et. al. (2018). It requires the \code{\link[reticulate]{reticulate}}
#' package for the interaction with python and the python package
#' \code{umap-learn} installed (use \code{pip install umap-learn}).
#'
#' @references
#'
#' McInnes, Leland, and John Healy.
#' "UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction."
#' https://arxiv.org/abs/1802.03426
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("3D S Curve", n = 300)
#' emb <- embed(dat, "UMAP", .mute = NULL, knn = 10)
#' plot(emb, type = "2vars")
#' }
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export UMAP
#' @exportClass UMAP
UMAP <- setClass(
"UMAP",
contains = "dimRedMethod",
prototype = list(
stdpars = list(
knn = 15,
ndim = 2,
d = "euclidean",
method = "umap-learn"
),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("umap")
if (pars$method == "python") {
chckpkg("reticulate")
if (!reticulate::py_module_available("umap"))
stop("cannot find python umap, install with `pip install umap-learn`")
}
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
## Create config
umap_call_pars <- umap::umap.defaults
umap_call_pars$n_neighbors <- pars$knn
umap_call_pars$n_components <- pars$ndim
umap_call_pars$metric <- pars$d
umap_call_pars$method <- pars$method
umap_call_pars$d <- indata
pars_2 <- pars
pars_2$knn <- NULL
pars_2$ndim <- NULL
pars_2$d <- NULL
pars_2$method <- NULL
for (n in names(pars_2))
umap_call_pars[[n]] <- pars_2[[n]]
## Do the embedding
outdata <- do.call(umap::umap, umap_call_pars)
## Post processing
colnames(outdata$layout) <- paste0("UMAP", 1:ncol(outdata$layout))
appl <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(orgdata))
stop("x must have the same number of dimensions ",
"as the original data")
new_proj <- umap:::predict.umap(outdata, as.matrix(proj))
colnames(new_proj) <- paste0("UMAP", 1:ncol(new_proj))
rownames(new_proj) <- NULL
out_data <- new("dimRedData", data = new_proj, meta = appl.meta)
return(out_data)
}
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata$layout,
meta = meta),
org.data = orgdata,
apply = appl,
has.org.data = keep.org.data,
has.apply = TRUE,
method = "UMAP",
pars = pars
))
}
)
)
dimRed/R/mds.R 0000644 0001762 0000144 00000011500 13562741725 012576 0 ustar ligges users #' Metric Dimensional Scaling
#'
#' An S4 Class implementing classical scaling (MDS).
#'
#' MDS tries to maintain distances in high- and low-dimensional space,
#' it has the advantage over PCA that arbitrary distance functions can
#' be used, but it is computationally more demanding.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' MDS can take the following parameters:
#' \describe{
#' \item{ndim}{The number of dimensions.}
#' \item{d}{The function to calculate the distance matrix from the input coordinates, defaults to euclidean distances.}
#' }
#'
#' @section Implementation:
#'
#' Wraps around \code{\link[stats]{cmdscale}}. The implementation also
#' provides an out-of-sample extension which is not completely
#' optimized yet.
#'
#' @references
#'
#' Torgerson, W.S., 1952. Multidimensional scaling: I. Theory and method.
#' Psychometrika 17, 401-419. https://doi.org/10.1007/BF02288916
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("3D S Curve")
#' emb <- embed(dat, "MDS")
#' plot(emb, type = "2vars")
#'
#' # a "manual" kPCA:
#' emb2 <- embed(dat, "MDS", d = function(x) exp(stats::dist(x)))
#' plot(emb2, type = "2vars")
#'
#' # a "manual", more customizable, and slower Isomap:
#' emb3 <- embed(dat, "MDS", d = function(x) vegan::isomapdist(vegan::vegdist(x, "manhattan"), k = 20))
#' plot(emb3)
#'
#' }
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export MDS
#' @exportClass MDS
MDS <- setClass(
"MDS",
contains = "dimRedMethod",
prototype = list(
stdpars = list(d = stats::dist, ndim = 2),
fun = function (data, pars,
keep.org.data = TRUE) {
##
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
## there are only efficient implementations for euclidean
## distances: extra efficient implementation for euclidean
## distances are possible, D is quared several times, it would be
## much faster to compute the squared distance right away.
has.apply <- identical(all.equal(pars$d, dist), TRUE) # == TRUE
# necessary,
# because
# all.equal
# returns
# TRUE or an
# error
# string!!!!
D <- as.matrix(pars$d(indata))
if (has.apply) mD2 <- mean(D ^ 2)
## cmdscale square the matrix internally
res <- stats::cmdscale(D, k = pars$ndim)
outdata <- res
D <- NULL
## Untested: remove that from environment before creating
## appl function, else it will stay in its environment
## forever
appl <- if (!has.apply) function(x) NA else function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
## double center new data with respect to old: TODO: optimize
## this method, according to the de Silva, Tenenbaum(2004)
## paper. Need an efficient method to calculate the distance
## matrices between different point sets and arbitrary
## distances.
Kab <- as.matrix(pars$d(proj) ^ 2)
Exa <- colMeans(pdist2(indata, proj))
Kab <- sweep(Kab, 1, Exa) #, "-")
Kab <- sweep(Kab, 2, Exa) #, "-")
Kab <- -0.5 * (Kab + mD2)
## Eigenvalue decomposition
tmp <- eigen(Kab, symmetric = TRUE)
ev <- tmp$values[seq_len(pars$ndim)]
evec <- tmp$vectors[, seq_len(pars$ndim), drop = FALSE]
k1 <- sum(ev > 0)
if (k1 < pars$ndim) {
warning(gettextf("only %d of the first %d eigenvalues are > 0",
k1, k), domain = NA)
evec <- evec[, ev > 0, drop = FALSE]
ev <- ev[ev > 0]
}
points <- evec * rep(sqrt(ev), each = nrow(proj))
dimnames(points) <- list(NULL, paste0("MDS", seq_len(ncol(points))))
new("dimRedData", data = points, meta = appl.meta)
}
colnames(outdata) <- paste0("MDS", seq_len(ncol(outdata)))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
apply = appl,
has.org.data = keep.org.data,
has.apply = has.apply,
method = "mds",
pars = pars
))
})
)
dimRed/R/dimRedMethod-class.R 0000644 0001762 0000144 00000006502 13371631672 015466 0 ustar ligges users #' Class "dimRedMethod"
#'
#' A virtual class "dimRedMethod" to serve as a template to implement
#' methods for dimensionality reduction.
#'
#' Implementations of dimensionality reductions should inherit from
#' this class.
#'
#' The \code{fun} slot should be a function that takes three arguments
#' \describe{
#' \item{data}{An object of class \code{\link{dimRedData}}.}
#' \item{pars}{A list with the standard parameters.}
#' \item{keep.org.data}{Logical. If the original data should be kept in the output.}
#' }
#' and returns an object of class \code{\link{dimRedResult}}.
#'
#' The \code{stdpars} slot should take a list that contains standard
#' parameters for the implemented methods.
#'
#' This way the method can be called by \code{embed(data, "method-name",
#' ...)}, where \code{...} can be used to to change single parameters.
#'
#'
#' @slot fun A function that does the embedding.
#' @slot stdpars A list with the default parameters for the \code{fun}
#' slot.
#'
#' @family dimensionality reduction methods
#' @export
setClass("dimRedMethod",
contains = "VIRTUAL",
slots = c(fun = "function",
stdpars = "list"))
#' dimRedMethodList
#'
#' Get the names of all methods for dimensionality reduction.
#'
#' Returns the name of all classes that inherit from
#' \code{\link{dimRedMethod-class}} to use with \code{\link{embed}}.
#'
#' @return a character vector with the names of classes that inherit
#' from \code{dimRedMethod}.
#'
#' @examples
#' dimRedMethodList()
#'
#' @family dimensionality reduction methods
#' @export
dimRedMethodList <- function () {
## return(c(
## "graph_kk",
## "graph_drl",
## "graph_fr",
## "drr",
## "isomap",
## "diffmap",
## "tsne",
## "nmds",
## "mds",
## "ica",
## "pca",
## "lle",
## ## those two methods are buggy and can produce segfaults:
## ## "loe", "soe",
## "leim",
## "kpca"
## ))
names(completeClassDefinition("dimRedMethod", doExtends = FALSE)@subclasses)
}
# to put standard values for omitted arguments
setGeneric("matchPars", function(object, pars) standardGeneric("matchPars"),
valueClass = c("list"))
setMethod("matchPars",
signature(object = "dimRedMethod",
pars = "list"),
definition = function(object, pars) {
nsp <- names(object@stdpars)
ncp <- names(pars)
nap <- union(nsp, ncp)
res <- list()
## exists can deal with elements being NULL
## to assign list@el <- NULL do:
## list["el"] <- list(NULL)
for (np in nap) {
miss.std <- !exists(np, where = object@stdpars)
miss.par <- !exists(np, where = pars)
if (miss.std) {
warning("Parameter matching: ", np,
" is not a standard parameter, ignoring.")
} else if (miss.par) {
res[np] <- object@stdpars[np]
} else {
res[np] <- pars[np]
}
}
## if the method does not accept parameters we have to return
## null, so in embed there is no args$par created. and passed by
## do.call in the embed() function. if (length(res) != 0)
## return(res) else return(NULL)
## first try without the above, all methods should have a pars
## argument.
return(res)
})
dimRed/R/mixColorSpaces.R 0000644 0001762 0000144 00000004523 13354441317 014746 0 ustar ligges users #' Mixing color ramps
#'
#' mix different color ramps
#'
#' automatically create colors to represent a varying number of
#' dimensions.
#'
#' @param vars a list of variables
#' @param ramps a list of color ramps, one for each variable.
#'
#' @examples
#' cols <- expand.grid(x = seq(0, 1, length.out = 10),
#' y = seq(0, 1, length.out = 10),
#' z = seq(0, 1, length.out = 10))
#' mixed <- mixColor3Ramps(cols)
#'
#' \dontrun{
#' library(rgl)
#' plot3d(cols$x, cols$y, cols$z, col = mixed, pch = 15)
#'
#' cols <- expand.grid(x = seq(0, 1, length.out = 10),
#' y = seq(0, 1, length.out = 10))
#' mixed <- mixColor2Ramps(cols)
#' }
#'
#' plot(cols$x, cols$y, col = mixed, pch = 15)
#' @importFrom grDevices colorRamp
#' @importFrom grDevices rgb
#' @export
mixColorRamps <- function (vars, ramps) {
if (length(vars) > length(ramps)) stop("need more or equal ramps than vars")
nvars <- length(vars)
rgbs <- list()
for (i in 1:nvars){
rgbs[[i]] <- ramps[[i]](scale01(as.numeric(vars[[i]])))
}
retrgb <- Reduce(`+`, rgbs)
res <- apply(retrgb, 2, function(x) (x - min(x)) / (max(x) - min(x)))
res[is.nan(res)] <- 0
return(rgb(res))
}
#' @rdname mixColorRamps
#' @export
mixColor1Ramps <- function (vars,
ramps = colorRamp(c("blue", "black", "red"))) {
mixColorRamps(vars, list(ramps))
}
#' @rdname mixColorRamps
#' @export
mixColor2Ramps <- function (vars,
ramps = list(colorRamp(c("blue", "green")),
colorRamp(c("blue", "red")))) {
mixColorRamps(vars, ramps)
}
#' @rdname mixColorRamps
#' @export
mixColor3Ramps <- function (vars,
ramps = list(colorRamp(c("#001A00", "#00E600")),
colorRamp(c("#00001A", "#0000E6")),
colorRamp(c("#1A0000", "#E60000")))) {
mixColorRamps(vars, ramps)
}
colorize <- function (vars) {
l <- length(vars)
if (l == 1) return(mixColor1Ramps(vars))
if (l == 2) return(mixColor2Ramps(vars))
if (l == 3) return(mixColor3Ramps(vars))
return("#000000")
}
scale01 <- function(x,
low = min(x, na.rm = TRUE),
high = max(x, na.rm = FALSE)) {
x <- (x - low) / (high - low)
x
}
dimRed/R/get_info.R 0000644 0001762 0000144 00000002054 13562225201 013573 0 ustar ligges users #' getRotationMatrix
#'
#' Extract the rotation matrix from \code{\link{dimRedResult}} objects derived from PCA and FastICA
#'
#' The data has to be pre-processed the same way as the method does, e.g.
#' centering and/or scaling.
#'
#' @param x of type \code{\link{dimRedResult}}
#' @return a matrix
#'
#' @examples
#' dat <- loadDataSet("Iris")
#'
#' pca <- embed(dat, "PCA")
#' ica <- embed(dat, "FastICA")
#'
#' rot_pca <- getRotationMatrix(pca)
#' rot_ica <- getRotationMatrix(ica)
#'
#' scale(getData(dat), TRUE, FALSE) %*% rot_pca - getData(getDimRedData(pca))
#' scale(getData(dat), TRUE, FALSE) %*% rot_ica - getData(getDimRedData(ica))
#'
#' @family convenience functions
#' @export
getRotationMatrix <- function(x) {
if(!inherits(x, "dimRedResult")) stop("x must be of type 'dimRedResult'")
if(x@method == "PCA") return(environment(x@apply)$rot)
if(x@method == "PCA_L1") return(environment(x@apply)$rot)
if(x@method == "FastICA") return(environment(x@apply)$res$K %*% environment(x@apply)$res$W)
stop(paste("Not implemented for", x@method))
}
dimRed/R/l1pca.R 0000644 0001762 0000144 00000014026 13562225201 013003 0 ustar ligges users #' Principal Component Analysis with L1 error.
#'
#' S4 Class implementing PCA with L1 error.
#'
#' PCA transforms the data so that the L2 reconstruction error is minimized or
#' the variance of the projected data is maximized. This is sensitive to
#' outliers, L1 PCA minimizes the L1 reconstruction error or maximizes the sum
#' of the L1 norm of the projected observations.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' PCA can take the following parameters:
#' \describe{
#' \item{ndim}{The number of output dimensions.}
#' \item{center}{logical, should the data be centered, defaults to \code{TRUE}.}
#' \item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.}
#' \item{fun}{character or function, the method to apply, see the \code{pcaL1} package}
#' \item{\ldots}{other parameters for \code{fun}}
#' }
#'
#' @section Implementation:
#'
#' Wraps around the different methods is the \code{pcaL1} package. Because PCA
#' can be reduced to a simple rotation, forward and backward projection
#' functions are supplied.
#'
#' @references
#'
#' Park, Y.W., Klabjan, D., 2016. Iteratively Reweighted Least Squares
#' Algorithms for L1-Norm Principal Component Analysis, in: Data Mining (ICDM),
#' 2016 IEEE 16th International Conference On. IEEE, pp. 430-438.
#'
#' @examples
#' if(requireNamespace("pcaL1", quietly = TRUE)) {
#' dat <- loadDataSet("Iris")
#' emb <- embed(dat, "PCA_L1")
#'
#' plot(emb, type = "2vars")
#' plot(inverse(emb, getData(getDimRedData((emb)))), type = "3vars")
#' }
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export PCA_L1
#' @exportClass PCA_L1
PCA_L1 <- setClass(
"PCA_L1",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
center = TRUE,
scale. = FALSE,
fun = "awl1pca",
projections = "l1"),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("pcaL1")
ndim <- pars$ndim
orgnames <- colnames(data@data)
newnames <- paste0("PC", seq_len(ndim))
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
data <- data@data
fun2 <- if(!is.function(pars$fun)) {
get(pars$fun, asNamespace("pcaL1"))
} else {
pars$fun
}
ce <- if (is.numeric(pars$center)) {
if (length(pars$center) != dim(data)[2])
error("center must be logical or have the same length as the data dimensions")
pars$center
} else if (is.logical(pars$center)) {
if (pars$center) colMeans(data) else FALSE
}
sc <- if (is.numeric(pars$scale.)) {
if (length(pars$scale.) != dim(data)[2])
stop("center must be logical or have the same length as the data dimensions")
pars$scale.
} else if (is.logical(pars$scale.)) {
if (pars$scale.) apply(data, 2, sd) else FALSE
}
if(!(pars$center == FALSE && pars$scale. == FALSE))
data <- scale(data, ce, sc)
pars$center <- NULL
pars$scale. <- NULL
pars$ndim <- NULL
pars$fun <- NULL
res <- do.call(
fun2,
c(list(X = data, projDim = ndim, center = FALSE), pars)
)
## evaluate results here for functions
data <- res$scores
colnames(data) <- paste0("PC", seq_len(ndim))
rot <- res$loadings[, seq_len(ndim), drop = FALSE]
dimnames(rot) <- list(orgnames, newnames)
rerot <- t(rot)
appl <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(orgdata))
stop("x must have the same number of dimensions ",
"as the original data")
if (ce[1] != FALSE) proj <- t(apply(proj, 1, function(x) x - ce))
if (sc[1] != FALSE) proj <- t(apply(proj, 1, function(x) x / sc))
proj <- if (pars$projections == "l1") {
tmp <- pcaL1::l1projection(proj, rot)$scores
colnames(tmp) <- paste0("PC", seq_len(ndim))
tmp
} else if (pars$projections == "l2") {
proj %*% rot
} else {
stop("projections must be eiter 'l1' or 'l2'")
}
proj <- new("dimRedData", data = proj, meta = appl.meta)
return(proj)
}
inv <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) > ncol(data))
stop("x must have less or equal number of dimensions ",
"as the original data")
d <- ncol(proj)
reproj <- proj %*% rerot[seq_len(d), ]
if (sc[1] != FALSE)
reproj <- t(apply(reproj, 1, function(x) x * sc))
if (ce[1] != FALSE)
reproj <- t(apply(reproj, 1, function(x) x + ce))
colnames(reproj) <- colnames(orgdata)
reproj <- new("dimRedData", data = reproj, meta = appl.meta)
return(reproj)
}
res <- new(
"dimRedResult",
data = new("dimRedData",
data = data,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.org.data = keep.org.data,
has.apply = TRUE,
has.inverse = TRUE,
method = "PCA_L1",
pars = pars
)
return(res)
})
)
dimRed/R/dimRedResult-class.R 0000644 0001762 0000144 00000015444 13371631672 015531 0 ustar ligges users #' @include misc.R
#' @include dimRedData-class.R
NULL
#' Class "dimRedResult"
#'
#' A class to hold the results of of a dimensionality reduction.
#'
#' @slot data Output data of class dimRedData.
#' @slot org.data original data, a matrix.
#' @slot apply a function to apply the method to out-of-sampledata,
#' may not exist.
#' @slot inverse a function to calculate the original coordinates from
#' reduced space, may not exist.
#' @slot has.org.data logical, if the original data is included in the object.
#' @slot has.apply logical, if a forward method is exists.
#' @slot has.inverse logical if an inverse method exists.
#' @slot method saves the method used.
#' @slot pars saves the parameters used.
#' @slot other.data other data produced by the method, e.g. a distance matrix.
#'
#' @examples
#' ## Create object by embedding data
#' iris.pca <- embed(loadDataSet("Iris"), "PCA")
#'
#' ## Convert the result to a data.frame
#' head(as(iris.pca, "data.frame"))
#' head(as.data.frame(iris.pca))
#'
#' ## There are no nameclashes to avoid here:
#' head(as.data.frame(iris.pca,
#' org.data.prefix = "",
#' meta.prefix = "",
#' data.prefix = ""))
#'
#' ## Print it more or less nicely:
#' print(iris.pca)
#'
#' ## Get the embedded data as a dimRedData object:
#' getDimRedData(iris.pca)
#'
#' ## Get the original data including meta information:
#' getOrgData(iris.pca)
#'
#' @family dimRedResult
#' @export dimRedResult
#' @exportClass dimRedResult
dimRedResult <- setClass(
"dimRedResult",
slots = c(
data = "dimRedData",
org.data = "matrix",
apply = "function",
inverse = "function",
has.org.data = "logical",
has.apply = "logical",
has.inverse = "logical",
method = "character",
pars = "list",
other.data = "list"
),
prototype = list(
data = new("dimRedData"),
org.data = matrix(numeric(0), 0, 0),
apply = function(x) NA,
inverse = function(x) NA,
has.org.data = FALSE,
has.apply = FALSE,
has.inverse = FALSE,
method = "",
pars = list(),
other.data = list()
)
)
setAs(
from = "dimRedResult",
to = "data.frame",
def = function(from){
if (from@has.org.data) {
org.data <- from@org.data
names(org.data) <- paste("org", names(org.data), sep = ".")
cbind(as(from@data, "data.frame"), as.data.frame(org.data))
} else {
as(from@data, "data.frame")
}
}
)
#' @importFrom stats predict
#' @export
setGeneric(
"predict", function(object, ...) standardGeneric("predict"),
useAsDefault = stats::predict
)
#' @describeIn dimRedResult apply a trained method to new data, does not work
#' with all methods, will give an error if there is no \code{apply}.
#' In some cases the apply function may only be an approximation.
#' @param xnew new data, of type \code{\link{dimRedData}}
#'
#' @export
setMethod(f = "predict",
signature = "dimRedResult",
definition = function(object, xnew) {
if (object@has.apply) object@apply(xnew)
else stop("object does not have an apply function")
})
#' @export
setGeneric(
"inverse",
function(object, ...) standardGeneric("inverse")
)
#' @describeIn dimRedResult inverse transformation of embedded data, does not
#' work with all methods, will give an error if there is no \code{inverse}.
#' In some cases the apply function may only be an approximation.
#' @param ynew embedded data, of type \code{\link{dimRedData}}
#'
#' @aliases inverse
#' @export
setMethod(f = "inverse",
signature = c("dimRedResult"),
definition = function(object, ynew) {
if (object@has.inverse) object@inverse(ynew)
else stop("object does not have an inverse function")
})
#' @param x Of class \code{dimRedResult}
#' @param org.data.prefix Prefix for the columns of the org.data slot.
#' @param meta.prefix Prefix for the columns of \code{x@@data@@meta}.
#' @param data.prefix Prefix for the columns of \code{x@@data@@data}.
#'
#' @describeIn dimRedResult convert to \code{data.frame}
#' @export
setMethod(f = "as.data.frame",
signature = c("dimRedResult"),
definition = function(x, org.data.prefix = "org.",
meta.prefix = "meta.",
data.prefix = "") {
tmp <- list()
if (nrow(x@data@meta) > 0){
tmp$meta <- as.data.frame(x@data@meta)
names(tmp$meta) <- paste0(meta.prefix,
colnames(x@data@meta))
}
tmp$data <- as.data.frame(x@data@data)
names(tmp$data) <- paste0(data.prefix, colnames(x@data@data))
if (x@has.org.data){
tmp$org.data <- as.data.frame(x@org.data)
names(tmp$org.data) <- paste0(org.data.prefix, colnames(x@org.data))
}
names(tmp) <- NULL
data.frame(tmp, stringsAsFactors = FALSE)
})
#' @param object Of class \code{dimRedResult}
#' @describeIn dimRedResult Get the parameters with which the method
#' was called.
#' @export
setMethod(
f = "getPars",
signature = "dimRedResult",
definition = function (object) {
object@pars
}
)
#' @describeIn dimRedResult Get the number of embedding dimensions.
#' @export
setMethod(
f = "getNDim",
signature = "dimRedResult",
definition = function (object) {
result <- getPars(object)$ndim
if(is.null(result)) dim(object@data@data)[2] else result
}
)
#' @describeIn dimRedResult Method for printing.
#' @import utils
#' @export
setMethod(
f = "print",
signature = "dimRedResult",
definition = function(x) {
cat("Method:\n")
cat(x@method, "\n")
cat("Parameters:\n")
utils::str(x@pars)
}
)
#' @describeIn dimRedResult Get the original data and meta.data
#' @export
setMethod(
f = "getOrgData",
signature = "dimRedResult",
definition = function(object) {
return(new("dimRedData",
data = object@org.data,
meta = object@data@meta))
}
)
#' @describeIn dimRedResult Get the embedded data
#' @export
setMethod(
f = "getDimRedData",
signature = "dimRedResult",
definition = function(object) {
return(object@data)
}
)
#' @describeIn dimRedResult Extract the number of embedding dimensions.
#'
#' @examples
#' ## Get the number of variables:
#' ndims(iris.pca)
#'
#' @export
setMethod(
"ndims",
"dimRedResult",
function(object) ncol(object@data@data)
)
#' @describeIn dimRedResult Get other data produced by the method
#' @export
setMethod(
f = "getOtherData",
signature = "dimRedResult",
definition = function(object) object@other.data
)
dimRed/R/dimRed.R 0000644 0001762 0000144 00000002324 13371631672 013220 0 ustar ligges users #' @title The dimRed package
#'
#' @description This package simplifies dimensionality reduction in R by
#' providing a framework of S4 classes and methods. dimRed collects
#' dimensionality reduction methods that are implemented in R and implements
#' others. It gives them a common interface and provides plotting
#' functions for visualization and functions for quality assessment.
#'
#' Funding provided by the Department for Biogeochemical Integration,
#' Empirical Inference of the Earth System Group, at the Max Plack
#' Institute for Biogeochemistry, Jena.
#'
#' @references
#'
#' Lee, J.A., Renard, E., Bernard, G., Dupont, P., Verleysen, M.,
#' 2013. Type 1 and 2 mixtures of Kullback-Leibler divergences as cost
#' functions in dimensionality reduction based on similarity
#' preservation. Neurocomputing. 112,
#' 92-107. doi:10.1016/j.neucom.2012.12.036
#'
#' Lee, J.A., Lee, J.A., Verleysen, M., 2008. Rank-based quality
#' assessment of nonlinear dimensionality reduction. Proceedings of
#' ESANN 2008 49-54.
#'
#' Chen, L., Buja, A., 2006. Local Multidimensional Scaling for
#' Nonlinear Dimension Reduction, Graph Layout and Proximity Analysis.
#'
#'
#' @import methods
#' @importFrom magrittr %>%
#'
"_PACKAGE"
dimRed/R/embed.R 0000644 0001762 0000144 00000012175 13371631672 013075 0 ustar ligges users #' dispatches the different methods for dimensionality reduction
#'
#' wraps around all dimensionality reduction functions.
#'
#' Method must be one of \code{\link{dimRedMethodList}()}, partial matching
#' is performed. All parameters start with a dot, to avoid clashes
#' with partial argument matching (see the R manual section 4.3.2), if
#' there should ever occur any clashes in the arguments, call the
#' function with all arguments named, e.g. \code{embed(.data = dat,
#' .method = "mymethod", .d = "some parameter")}.
#'
#' @param .data object of class \code{\link{dimRedData}}, will be converted to
#' be of class \code{\link{dimRedData}} if necessary; see examples for
#' details.
#' @param .method character vector naming one of the dimensionality reduction
#' techniques.
#' @param .mute a character vector containing the elements you want to mute
#' (\code{c("message", "output")}), defaults to \code{character(0)}.
#' @param .keep.org.data \code{TRUE}/\code{FALSE} keep the original data.
#' @param ... the parameters, internally passed as a list to the dimensionality
#' reduction method as \code{pars = list(...)}
#' @return an object of class \code{\link{dimRedResult}}
#'
#' @examples
#' ## embed a data.frame using a formula:
#' as.data.frame(
#' embed(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
#' iris, "PCA")
#' )
#'
#' ## embed a data.frame and return a data.frame
#' as.data.frame(embed(iris[, 1:4], "PCA"))
#'
#' ## embed a matrix and return a data.frame
#' as.data.frame(embed(as.matrix(iris[, 1:4]), "PCA"))
#'
#' \dontrun{
#' ## embed dimRedData objects
#' embed_methods <- dimRedMethodList()
#' quality_methods <- dimRedQualityList()
#' dataset <- loadDataSet("Iris")
#'
#' quality_results <- matrix(NA, length(embed_methods), length(quality_methods),
#' dimnames = list(embed_methods, quality_methods))
#' embedded_data <- list()
#'
#' for (e in embed_methods) {
#' message("embedding: ", e)
#' embedded_data[[e]] <- embed(dataset, e, .mute = c("message", "output"))
#' for (q in quality_methods) {
#' message(" quality: ", q)
#' quality_results[e, q] <- tryCatch(
#' quality(embedded_data[[e]], q),
#' error = function(e) NA
#' )
#' }
#' }
#'
#' print(quality_results)
#' }
#' @export
setGeneric("embed", function(.data, ...) standardGeneric("embed"),
valueClass = "dimRedResult")
#' @describeIn embed embed a data.frame using a formula.
#' @param .formula a formula, see \code{\link{as.dimRedData}}.
#' @export
setMethod(
"embed",
"formula",
function(.formula, .data, .method = dimRedMethodList(),
.mute = character(0), .keep.org.data = TRUE,
...) {
if (!is.data.frame(.data)) stop(".data must be a data.frame")
.data <- as.dimRedData(.formula, .data)
embed(.data, .method, .mute, .keep.org.data, ...)
}
)
#' @describeIn embed Embed anything as long as it can be coerced to
#' \code{\link{dimRedData}}.
#' @export
setMethod(
"embed",
"ANY",
function(.data, .method = dimRedMethodList(),
.mute = character(0), .keep.org.data = TRUE,
...) {
embed(as(.data, "dimRedData"), .method, .mute, .keep.org.data, ...)
}
)
#' @describeIn embed Embed a dimRedData object
#' @export
setMethod(
"embed",
"dimRedData",
function(.data, .method = dimRedMethodList(),
.mute = character(0), #c("message", "output"),
.keep.org.data = TRUE,
...) {
.method <- if (all(.method == dimRedMethodList())) "PCA"
else match.arg(.method)
methodObject <- getMethodObject(.method)
args <- list(
data = as(.data, "dimRedData"),
keep.org.data = .keep.org.data
)
args$pars <- matchPars(methodObject, list(...))
devnull <- if (Sys.info()["sysname"] != "Windows")
"/dev/null"
else
"NUL"
if ("message" %in% .mute){
devnull1 <- file(devnull, "wt")
sink(devnull1, type = "message")
on.exit({
sink(file = NULL, type = "message")
close(devnull1)
}, add = TRUE)
}
if ("output" %in% .mute) {
devnull2 <- file(devnull, "wt")
sink(devnull2, type = "output")
on.exit({
sink()
close(devnull2)
}, add = TRUE)
}
do.call(methodObject@fun, args)
}
)
getMethodObject <- function (method) {
## switch(
## method,
## graph_kk = kamada_kawai,
## graph_drl = drl,
## graph_fr = fruchterman_reingold,
## drr = drr,
## isomap = isomap,
## diffmap = diffmap,
## tsne = tsne,
## nmds = nmds,
## mds = mds,
## ica = fastica,
## pca = pca,
## lle = lle,
## loe = loe,
## soe = soe,
## leim = leim,
## kpca = kpca
## )
method <- match.arg(method, dimRedMethodList())
do.call(method, list())
}
dimRed/R/rotate.R 0000644 0001762 0000144 00000016425 13371631672 013321 0 ustar ligges users
## rotate X in such a way that the values of Y have maximum squared
## correlation with the dimensions specified in axes. We optimize
## axes[1] first, then axes[2] without axes[1], ...
## we maximize the squared correlations of the original variables
## with the axis of the embeding and the final result is the sum_{axes} sum(squared(correlation(variables, axis)))
setGeneric(
"maximize_correlation",
function(object, ...) standardGeneric("maximize_correlation"),
valueClass = "dimRedResult"
)
#' Maximize Correlation with the Axes
#'
#' Rotates the data in such a way that the correlation with the first
#' \code{naxes} axes is maximized.
#'
#' Methods that do not use eigenvector decomposition, like t-SNE often
#' do not align the data with axes according to the correlation of
#' variables with the data. \code{maximize_correlation} uses the
#' \code{\link[optimx]{optimx}} package to rotate the data in such a
#' way that the original variables have maximum correlation with the
#' embedding axes.
#'
#' @param object A dimRedResult object
#' @param naxes the number of axes to optimize for.
#' @param cor_method which correlation method to use
#'
#' @aliases maximize_correlation
#' @export
setMethod(
"maximize_correlation",
"dimRedResult",
function(object, naxes = ncol(object@data@data), cor_method = "pearson"){
## if (missing(naxes)) naxes <- ncol(object@data@data)
## if (missing(cor_method)) cor_method <- "pearson"
if (!object@has.org.data) stop("object requires original data")
if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data))
stop("naxes must specify the numbers of axes to optimize for, ",
"i.e. a single integer between 1 and ncol(object@data@data)")
## try to partially match cor_method:
cor_method <-
cor_method[pmatch(cor_method, c("pearson", "kendall", "spearman"))]
if (is.na(cor_method))
stop("cor_method must match one of ",
"'pearson', 'kendall', or 'spearman', ",
"at least partially.")
mcres <- .maximize_correlation(object@data@data,
object@org.data,
1:naxes,
cor_method)
res <- object
res@data@data <- mcres$rotated
return(res)
}
)
.maximize_correlation <- function(X, Y,
axes = 1:ncol(X),
cor_method = "pearson"){
if (nrow(X) != nrow(Y))
stop("'X' and 'Y' must have the same number of rows")
if (max(axes) > ncol(X)){
axes <- axes[ axes <= ncol(X) ]
warning("'max(axes)' must be <= 'ncol(X)', removing some axes")
}
chckpkg("optimx")
xndim <- ncol(X)
without_axes <- integer(0)
res <- list()
for (axis in axes){
without_axes <- c(without_axes, axis)
nplanes <- xndim - length(without_axes)
planes <- matrix(NA, 2, nplanes)
planes[1, ] <- axis
planes[2, ] <- (1:xndim)[-without_axes]
if (ncol(planes) == 0)
break
o <- optimx::optimx(
par = rep(0, nplanes),
fn = obj,
method = "L-BFGS-B",
lower = 0,
upper = 2 * pi,
X = as.matrix(X),
Y = as.matrix(Y),
axis = axis,
without_axes = without_axes,
cor_method = cor_method
)
## The result looks like this:
## p1 value fevals gevals niter convcode kkt1 kkt2 xtimes
## L-BFGS-B 0 -0.1613494 1 1 NA 0 FALSE NA 0.016
if (o$convcode > 0) stop("rotation did not converge.")
res_idx <- length(res) + 1
res[[res_idx]] <- list()
res[[res_idx]]$axis <- axis
res[[res_idx]]$without_axes <- without_axes
res[[res_idx]]$angs <- unname( unlist(o[1, 1:nplanes]) )
res[[res_idx]]$planes <- planes
res[[res_idx]]$X <- rotate(res[[res_idx]]$angs, planes, X)
res[[res_idx]]$cor <- -o$value
}
## calculate the correlation for axes
nres <- length(res)
if (nres > 0) {
## the result is the sum of the mean squared correlations of the
## original variables with the axes. "res[[i]]$cor" contains the
## mean squared correlation of the variables with axis "i"
res$result <- 0
for (i in 1:nres)
res$result <- res$result + res[[i]]$cor ^ 2
## res$result <- res$result / length(res)
## rotate the input to maximize correlations
res$rotated <- X
for (i in 1:nres)
res$rotated <- rotate(res[[i]]$angs, res[[i]]$planes, res$rotated)
} else {
## if we only had one dimension, simply return the means squared
## correlation and don't rotate
res$result <- sum(correlate(X, Y, cor_method) ^ 2)
res$rotated <- X
}
res
}
#### helper functions for rotation
## we create a number or rotation matrices around the 2d planes
## spanned by the orthonormal matrices, multiply them for a general
## rotation which is then applied to the data X
rotate <- function (angs, planes, X) {
ndim <- ncol(X)
nplanes <- ncol(planes)
if (length(angs) != nplanes)
stop("length(angs) not equal to chose(ndim, 2)")
## loop over the planes to construct general rotation matrix
rotmat <- diag(ndim)
for (p in 1:nplanes) {
## 2d rotation
## possible optimization: create large rotation matrix
## directly and insert values linearly without a for loop
rotmat2d <- matrix(
c(cos(angs[p]), -sin(angs[p]),
sin(angs[p]), cos(angs[p])),
2, 2, byrow = TRUE
)
p_rotmat <- diag(ndim)
for (i in 1:2)
for (j in 1:2)
p_rotmat[ planes[i, p], planes[j, p] ] <- rotmat2d[i, j]
rotmat <- rotmat %*% p_rotmat
}
t(rotmat %*% t(X))
}
get_planes <- function(ndims, axis, without_axes){
nplanes <- ndims - length(without_axes)
planes <- matrix(NA, 2, nplanes)
planes[1, ] <- axis
planes[2, ] <- (1:ndims)[c(-axis, -without_axes)]
planes
}
obj <- function(alpha, X, Y, axis, without_axes, cor_method = "pearson"){
## correlation with first axis
xndim <- ncol(X)
planes <- get_planes(xndim, axis, without_axes)
X2 <- rotate(alpha, planes, X)
## cor(x, y) returns a matrix with the correlations between the
## columns of x = X2 (rows) and the columns of y = Y (columns) we
## want the mean of squared correlations of all variables original
## variables with the first axis, i.e. we require the relevant
## (axis) column of the resulting matrix.
## Possible optimization: use only the relevant column of Y
-mean(correlate(
X2, Y,
#use = "pairwise.complete.obs",
method = cor_method
)[axis, ] ^ 2)
}
correlate <- function (x, y, method, ...) {
if (method != "kendall"){
return(stats::cor(x, y, method = method, ...))
} else {
chckpkg("pcaPP")
## make the cor.fk method behave like cor for matrices:
if (is.matrix(x) && is.matrix(y)) {
res <- matrix(
NA, nrow = ncol(x), ncol = ncol(y),
dimnames = list(colnames(x), colnames(y))
)
for (i in 1:ncol(x)) {
for (j in 1:ncol(y)){
res[i, j] <- pcaPP::cor.fk(x[, i], y[, j])
}
}
return(res)
} else if (is.null(dim(x)) && is.null(dim(y))){
return(pcaPP::cor.fk(x, y))
} else {
stop("something is wrong with the input of 'correlate()'")
}
}
}
dimRed/R/soe.R 0000644 0001762 0000144 00000002642 13024273620 012573 0 ustar ligges users ## #' Soft Ordinal Embedding
## #'
## #' Instance of \code{\link{dimRedMethod}} for Soft Ordinal Embedding.
## #'
## #' For details see \code{\link[loe]{SOE}}.
## #'
## #'
## #' @examples
## #' dat <- loadDataSet("3D S Curve", n = 50)
## #' soe <- SOE()
## #' emb <- soe@fun(dat, soe@stdpars)
## #'
## #'
## #' plot(emb@data@data)
## #'
## #'
## #' @include dimRedResult-class.R
## #' @include dimRedMethod-class.R
## #' @export
## SOE <- setClass(
## "SOE",
## contains = "dimRedMethod",
## prototype = list(
## stdpars = list(d = stats::dist, knn = 50, ndim = 2),
## fun = function (data,
## pars,
## keep.org.data = TRUE) {
## chckpkg("loe")
## meta <- data@meta
## orgdata <- if (keep.org.data) data@data else NULL
## indata <- data@data
## outdata <- loe::SOE(loe::get.order(as.matrix(pars$d(indata))),
## N = nrow(indata), p = pars$ndim)$X
## colnames(outdata) <- paste0("SOE", 1:ncol(outdata))
## return(new(
## "dimRedResult",
## data = new("dimRedData",
## data = outdata,
## meta = meta),
## org.data = orgdata,
## has.org.data = keep.org.data,
## method = "soe",
## pars = pars
## ))
## })
## )
dimRed/R/misc.R 0000644 0001762 0000144 00000022026 13371631672 012750 0 ustar ligges users ## if (!isClassUnion("missingORnumeric")) setClassUnion("missingORnumeric", c("numeric", "missing"))
## if (!isClassUnion("missingORcharacter")) setClassUnion("missingORcharacter", c("character", "missing"))
## if (!isClassUnion("missingORlogical")) setClassUnion("missingORlogical", c("logical", "missing"))
## if (!isClassUnion("missingORfunction")) setClassUnion("missingORfunction", c("function", "missing"))
# Squared euclidean distance between points in A and B
# taken from http://blog.felixriedel.com/2013/05/pairwise-distances-in-r/
pdist2 <- function (A, B) {
an <- rowSums(A ^ 2) # apply(A, 1, function(rvec) crossprod(rvec, rvec))
bn <- rowSums(B ^ 2) # apply(B, 1, function(rvec) crossprod(rvec, rvec))
m <- nrow(A)
n <- nrow(B)
matrix(rep(an, n), nrow = m) +
matrix(rep(bn, m), nrow = m, byrow = TRUE) -
2 * tcrossprod(A, B)
}
## a + b ~ c + d
## becomes
## ~ c + d + 0
rhs <- function (formula) {
fs <- as.character(formula)[3]
stats::as.formula(paste("~", fs, "+ 0"))
}
## a + b ~ c + d
## becomes
## ~ a + b + 0
lhs <- function (formula) {
fs <- as.character(formula)[2]
stats::as.formula(paste("~", fs, "+ 0"))
}
## check if a package is installed
chckpkg <- function (pkg) {
if (!requireNamespace(pkg, quietly = TRUE)) {
stop(paste0("require '", pkg,
"' package, install it using install.packages('",
pkg, "')"))
}
}
## create generics that appear in several different places
#' Converts to data.frame
#'
#' General conversions of objects created by \code{dimRed} to \code{data.frame}.
#' See class documentations for details (\code{\link{dimRedData}},
#' \code{\link{dimRedResult}}). For the documentation of this function in base
#' package, see here: \code{\link[base]{as.data.frame.default}}.
#'
#' @param x The object to be converted
#' @param row.names unused in \code{dimRed}
#' @param optional unused in \code{dimRed}
#' @param ... other arguments.
setGeneric(
"as.data.frame",
function(x, row.names, optional, ...) standardGeneric("as.data.frame"),
useAsDefault = base::as.data.frame,
valueClass = "data.frame"
)
#' Converts to dimRedData
#'
#' Conversion functions to dimRedData.
#'
#' @param formula The formula, left hand side is assigned to the meta slot right
#' hand side is assigned to the data slot.
#' @param ... other arguments.
setGeneric(
"as.dimRedData",
function(formula, ...) standardGeneric("as.dimRedData"),
valueClass = "dimRedData"
)
#' Method getData
#'
#' Extracts the data slot.
#'
#' @param object The object to be converted.
setGeneric("getData", function(object) standardGeneric("getData"))
#' Method getMeta
#'
#' Extracts the meta slot.
#'
#' @param object The object to be converted.
#' @param ... other arguments.
setGeneric("getMeta", function(object, ...) standardGeneric("getMeta"))
#' Method getPars
#'
#' Extracts the pars slot.
#'
#' @param object The object to be converted.
#' @param ... other arguments.
setGeneric("getPars", function (object, ...) standardGeneric("getPars"))
#' Method getNDim
#'
#' Extract the number of embedding dimensions.
#'
#' @param object The object to get the dimensions from.
#' @param ... other arguments.
setGeneric("getNDim", function (object, ...) standardGeneric("getNDim"))
#' Method getOrgData
#'
#' Extract the Original data.
#'
#' @param object The object to extract data from.
#' @param ... other arguments.
setGeneric("getOrgData", function (object, ...) standardGeneric("getOrgData"))
#' Method getDimRedData
#'
#' Extract dimRedData.
#' @param object The object to extract data from.
#' @param ... other arguments.
setGeneric("getDimRedData",
function (object, ...) standardGeneric("getDimRedData"))
#' Method getOtherData
#'
#' Extract other data produced by a dimRedMethod
#'
#' @param object The object to extract data from.
#' @param ... other arguments.
setGeneric("getOtherData",
function (object, ...) standardGeneric("getOtherData"),
valueClass = "list")
#' Method print
#'
#' Imports the print method into the package namespace.
#'
#' @param x The object to be printed.
#' @param ... Other arguments for printing.
setGeneric("print", function(x, ...) standardGeneric("print"))
#' Method ndims
#'
#' Extract the number of dimensions.
#'
#' @param object To extract the number of dimensions from.
#' @param ... Arguments for further methods
setGeneric("ndims",
function (object, ...) standardGeneric("ndims"),
valueClass = "integer")
#' getSuggests
#'
#' Install packages wich are suggested by dimRed.
#'
#' By default dimRed will not install all the dependencies, because
#' there are quite a lot and in case some of them are not available
#' for your platform you will not be able to install dimRed without
#' problems.
#'
#' To solve this I provide a function which automatically installes
#' all the suggested packages.
#'
#' @examples
#' \dontrun{
#' installSuggests()
#' }
#' @export
installSuggests <- function () {
"%w/o%" <- function(x, y) x[!x %in% y]
pkgString <- installed.packages()["dimRed", "Suggests"]
deps <- strsplit(pkgString, ", |,\n")[[1]]
deps <- gsub("\n", "", deps) # Windows needs this
installedPkgs <- rownames(installed.packages())
missingPkgs <- deps %w/o% installedPkgs
if (length(missingPkgs) > 0) {
message("The following packages are missing: ")
cat(missingPkgs, "\n")
message("installing ...")
install.packages(missingPkgs)
pkgString <- installed.packages()["dimRed", "Suggests"]
installedPkgs <- rownames(installed.packages())
missingPkgs <- deps %w/o% installedPkgs
if (length(missingPkgs) > 0) {
message("Could not install the following packages:")
cat(missingPkgs, "\n")
message("please install manually or some methods will not work.")
} else {
message("All necessary packages installed")
message("If things still don't work try 'update.packages()'")
message("If it still does not work file a bugreport!!")
}
} else {
message("All necessary packages installed")
message("If things still don't work try 'update.packages()'")
message("If it still does not work file a bugreport!!")
}
}
## input data(matrix or data frame) return knn graph implements
## "smart" choices on RANN::nn2 parameters we ignore radius search
## TODO: find out a good limit to switch from kd to bd trees COMMENT:
## bd trees are buggy, they dont work if there are duplicated data
## points and checking would neutralize the performance gain, so bd
## trees are not really usable.
#' makeKNNgraph
#'
#' Create a K-nearest neighbor graph from data x. Uses
#' \code{\link[RANN]{nn2}} as a fast way to find the neares neighbors.
#'
#' @param x data, a matrix, observations in rows, dimensions in
#' columns
#' @param k the number of nearest neighbors.
#' @param eps number, if \code{eps > 0} the KNN search is approximate,
#' see \code{\link[RANN]{nn2}}
#' @param diag logical, if \code{TRUE} every edge of the returned
#' graph will have an edge with weight \code{0} to itself.
#'
#' @return an object of type \code{\link[igraph]{igraph}} with edge
#' weight being the distances.
#'
#'
#'
makeKNNgraph <- function(x, k, eps = 0, diag = FALSE){
## requireNamespace("RANN")
## requireNamespace("igraph")
## consts
INF_VAL <- 1.340781e+15
NA_IDX <- 0
BDKD_LIM <- 1000000 #todo: figure out a good value here
## select parameters
M <- nrow(x)
treetype <- "kd" # if (M < BDKD_LIM) "kd" else "bd"
# see:
# https://github.com/jefferis/RANN/issues/19
searchtype <- if (eps == 0) "standard" else "priority"
## RANN::nn2 returns the points in data with respect to query
## e.g. the rows in the output are the points in query and the
## columns the points in data.
nn2res <- RANN::nn2(data = x, query = x, k = k + 1, treetype = treetype,
searchtype = searchtype, eps = eps)
## create graph: the first ny nodes will be y, the last nx nodes
## will be x, if x != y
g <- igraph::make_empty_graph(M, directed = FALSE)
g[from = if (diag) rep(seq_len(M), times = k + 1)
else rep(seq_len(M), times = k),
to = if (diag) as.vector(nn2res$nn.idx)
else as.vector(nn2res$nn.idx[, -1]),
attr = "weight"] <- if (diag) as.vector(nn2res$nn.dists)
else as.vector(nn2res$nn.dists[, -1])
return(g)
}
makeEpsSparseMatrix <- function(x, eps) {
chckpkg("Matrix")
n <- nrow(x)
dd <- stats::dist(x)
ddind <- dd < eps
rows <- unlist(lapply(2:n, function(x) x:n), use.names = FALSE)
cols <- rep(seq_len(n - 1), times = (n - 1):1)
Matrix::sparseMatrix(i = rows[ddind],
j = cols[ddind],
x = dd[ddind],
dims = c(n, n),
symmetric = TRUE)
}
dimRed/R/nnmf.R 0000644 0001762 0000144 00000012273 13562225201 012743 0 ustar ligges users #' Non-Negative Matrix Factorization
#'
#' S4 Class implementing NNMF.
#'
#' NNMF is a method for decomposing a matrix into a smaller
#' dimension such that the constraint that the data (and the
#' projection) are not negative is taken into account.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' The method can take the following parameters:
#' \describe{
#' \item{ndim}{The number of output dimensions.}
#' \item{method}{character, which algorithm should be used. See
#' \code{\link[NMF]{nmf}} for possible values. Defaults to
#' "brunet"}
#' \item{nrun}{integer, the number of times the computations are
#' conducted. See \code{\link[NMF]{nmf}}}
#' \item{seed}{integer, a value to control the random numbers used.}
#' \item{options}{named list, other options to pass to \code{\link[NMF]{nmf}}}
#' }
#'
#' @section Implementation:
#'
#' Wraps around \code{\link[NMF]{nmf}}. Note that the estimation uses random
#' numbers. To create reproducible results, set the random number seed in the
#' function call. Also, in many cases, the computations will be conducted
#' in parallel using multiple cores. To disable this, use the option
#' \code{.pbackend = NULL}.
#'
#' @references
#'
#' Lee, D.D., Seung, H.S., 1999. Learning the parts of objects by non-negative
#' matrix factorization. Nature 401, 788-791. https://doi.org/10.1038/44565
#'
#' @examples
#' set.seed(4646)
#' dat <- loadDataSet("Iris")
#' emb <- embed(dat, "NNMF")
#'
#' plot(emb)
#'
#' # project new values:
#' nn_proj <- predict(emb, dat[1:7])
#' plot(nn_proj)
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export NNMF
#' @exportClass NNMF
NNMF <- setClass(
"NNMF",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2L,
method = "brunet",
nrun = 1,
seed = sample.int(10^5, 1),
options = list()),
fun = function (data, pars, keep.org.data = TRUE) {
chckpkg("NMF")
chckpkg("MASS")
## TODO: remove this, depends on https://github.com/renozao/NMF/issues/114
## require("NMF")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
data <- data@data
if (!is.matrix(data))
data <- as.matrix(data)
# NMF expects variables in rows and samples in columns
data <- t(data)
if (pars$ndim > nrow(data))
stop("`ndim` should be less than the number of columns.",
call. = FALSE)
if (length(pars$method) != 1)
stop("only supply one `method`", call. = FALSE)
args <- list(x = quote(data), rank = pars$ndim, method = pars$method,
nrun = pars$nrun, seed = pars$seed)
if (length(pars$options) > 0)
args <- c(args, pars$options)
nmf_result <- do.call(NMF::nmf, args)
# this should work but doesn't
# call <- c(list(quote(NMF::nmf)), args)
w <- NMF::basis(nmf_result)
h <- t(NMF::coef(nmf_result))
colnames(w) <- paste0("NNMF", 1:ncol(w))
other.data <- list(w = w)
colnames(h) <- paste0("NNMF", 1:ncol(h))
# evaluate results here for functions
appl <- function (x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
dat <- if (inherits(x, "dimRedData")) x@data else x
if (!is.matrix(dat))
dat <- as.matrix(dat)
if (ncol(dat) != nrow(w))
stop("x must have the same number of columns ",
"as the original data (", nrow(w), ")",
call. = FALSE)
res <- dat %*% t(MASS::ginv(w))
colnames(res) <- paste0("NNMF", 1:ncol(res))
scores <- new("dimRedData", data = res, meta = appl.meta)
return(scores)
}
inv <- function (x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) > ncol(w))
stop("x must have less or equal number of dimensions ",
"as the original data")
res <- tcrossprod(proj, w)
colnames(res) <- colnames(data)
res <- new("dimRedData", data = res, meta = appl.meta)
return(res)
}
## inv <- function(x) {
## appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
## proj <- if (inherits(x, "dimRedData")) x@data else x
## if (ncol(proj) > ncol(data))
## stop("x must have less or equal number of dimensions ",
## "as the original data")
## reproj <- proj %*% other.data$H
## reproj <- new("dimRedData", data = reproj, meta = appl.meta)
## return(reproj)
## }
res <- new(
"dimRedResult",
data = new("dimRedData",
data = h,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.org.data = keep.org.data,
has.apply = TRUE,
has.inverse = TRUE,
method = "NNMF",
pars = pars,
other.data = other.data
)
return(res)
})
)
dimRed/R/hlle.R 0000644 0001762 0000144 00000010250 13562225201 012722 0 ustar ligges users #' Hessian Locally Linear Embedding
#'
#' An S4 Class implementing Hessian Locally Linear Embedding (HLLE)
#'
#' HLLE uses local hessians to approximate the curvines and is an
#' extension to non-convex subsets in lowdimensional space.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' HLLE can take the following parameters:
#' \describe{
#' \item{knn}{neighborhood size}
#' \item{ndim}{number of output dimensions}
#' }
#'
#' @section Implementation:
#' Own implementation, sticks to the algorithm in Donoho and Grimes
#' (2003). Makes use of sparsity to speed up final embedding.
#'
#' @references
#' Donoho, D.L., Grimes, C., 2003. Hessian eigenmaps: Locally linear
#' embedding techniques for high-dimensional data. PNAS 100,
#' 5591-5596. doi:10.1073/pnas.1031596100
#'
#' @examples
#' dat <- loadDataSet("3D S Curve", n = 300)
#' emb <- embed(dat, "HLLE", knn = 15)
#' plot(emb, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export HLLE
#' @exportClass HLLE
HLLE <- setClass(
"HLLE",
contains = "dimRedMethod",
prototype = list(
stdpars = list(knn = 50, ndim = 2),
fun = function(data, pars,
keep.org.data = TRUE) {
chckpkg("RSpectra")
chckpkg("Matrix")
chckpkg("RANN")
if (pars$ndim < 2) stop("ndim must be 2 or larger.")
if (is.null(pars$knn)) pars$knn <- 50
if (is.null(pars$ndim)) pars$ndim <- 2
indata <- data@data
n <- nrow(indata)
hs <- pars$ndim * (pars$ndim + 1) / 2
W <- Matrix::sparseMatrix(i = numeric(0),
j = numeric(0),
x = numeric(0),
dims = c(n, hs * n))
ii <- jj <- ww <- list()
## Identify neighbors:
message(Sys.time(), ": Finding nearest neighbors", sep = "")
nnidx <- RANN::nn2(data = indata, query = indata, k = pars$knn + 1,
treetype = "kd", "standard", eps = 0)$nn.idx#[, -1]
message(Sys.time(), ": Calculating Hessian", sep = "")
for (i in seq_len(n)) {
cat(i, "/", n, "\r", sep = "")
## get neighborhood
Nui <- indata[nnidx[i, ], , drop = FALSE]
## Form tangent coordinates:
Nui <- sweep(Nui, 2, colMeans(Nui), "-")
tc <- svd(Nui, nu = pars$ndim, nv = 0)$u
## Develop Hessian Estimator
Xi <- cbind(
1, tc, tc ^ 2,
apply(combn(seq_len(pars$ndim), 2), 2,
function(x) tc[, x[1]] * tc[, x[2]])
)
tHi <- qr.Q(qr(Xi))[, -(1:(pars$ndim + 1)),
drop = FALSE]
## Add quadratic form to hessian
ii[[i]] <- rep(nnidx[i, ], hs)
jj[[i]] <- rep((i - 1) * hs + (1:hs), each = ncol(nnidx))
ww[[i]] <- as.vector(tHi)
}
H <- as(Matrix::tcrossprod(Matrix::spMatrix(
i = unlist(ii, FALSE, FALSE),
j = unlist(jj, FALSE, FALSE),
x = unlist(ww, FALSE, FALSE),
nrow = n, ncol = n * hs)
), "dgCMatrix")
## Find null space:
message(Sys.time(), ": Embedding", sep = "")
## eigs and eigs_sym converges much more reliably and faster
## with sigma = -eps than with which = "L*"
outdata <- RSpectra::eigs_sym(H, k = pars$ndim + 1, sigma = -1e-5)
message(paste(c("Eigenvalues:", format(outdata$values)),
collapse = " "))
outdata <- outdata$vectors[, order(outdata$values)[-1], drop = FALSE]
colnames(outdata) <- paste0("HLLE", seq_len(ncol(outdata)))
message(Sys.time(), ": DONE", sep = "")
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = data@meta),
org.data = if (keep.org.data) data@data else NULL,
has.org.data = keep.org.data,
method = "HLLE",
pars = pars
))
})
)
dimRed/R/plot.R 0000644 0001762 0000144 00000016654 13371631672 013005 0 ustar ligges users #' Plotting of dimRed* objects
#'
#' Plots a object of class dimRedResult and dimRedData. For the
#' documentation of the plotting function in base see here:
#' \code{\link{plot.default}}.
#'
#' Plotting functions for the classes usind in \code{dimRed}. they are
#' intended to give a quick overview over the results, so they are
#' somewhat inflexible, e.g. it is hard to modify color scales or
#' plotting parameters.
#'
#' If you require more control over plotting, it is better to convert
#' the object to a \code{data.frame} first and use the standard
#' functions for plotting.
#'
#' @param x dimRedResult/dimRedData class, e.g. output of
#' embedded/loadDataSet
#' @param y Ignored
#' @param type plot type, one of \code{c("pairs", "parpl", "2vars",
#' "3vars", "3varsrgl")}
#' @param col the columns of the meta slot to use for coloring, can be
#' referenced as the column names or number of x@data
#' @param vars the axes of the embedding to use for plotting
#' @param ... handed over to the underlying plotting function.
#'
#' @examples
#' scurve = loadDataSet("3D S Curve")
#' plot(scurve, type = "pairs", main = "pairs plot of S curve")
#' plot(scurve, type = "parpl")
#' plot(scurve, type = "2vars", vars = c("y", "z"))
#' plot(scurve, type = "3vars")
#'
#' @include mixColorSpaces.R
#' @include dimRedData-class.R
#' @importFrom graphics plot
#'
#' @aliases plot.dimRed
#' @export
setGeneric(
"plot", function(x, y, ...) standardGeneric("plot"),
useAsDefault = graphics::plot
)
#' @describeIn plot Ploting of dimRedData objects
#' @aliases plot.dimRedData
#' @export
setMethod(
f = "plot",
signature = c("dimRedData"),
definition = function(x, type = "pairs",
vars = seq_len(ncol(x@data)),
col = seq_len(min(3, ncol(x@meta))), ...) {
cols <- colorize(x@meta[, col, drop = FALSE])
switch(
type,
"pairs" = {
chckpkg("graphics")
graphics::pairs(x@data[, vars], col = cols, ... )
},
"parpl" = {
chckpkg("MASS")
MASS::parcoord(x@data[, vars], col = cols, ... )
},
"2vars" = {
chckpkg("graphics")
graphics::plot(x@data[, vars[1:2]], col = cols, ... )
},
"3vars" = {
chckpkg("scatterplot3d")
scatterplot3d::scatterplot3d(x@data[, vars[1:3]],
color = cols,
...)
},
"3varsrgl" = {
chckpkg("rgl")
rgl::plot3d(x@data[, vars[1:3]], col = cols, ... )
},
stop("wrong argument to plot.dimRedData")
)
}
)
#' @describeIn plot Ploting of dimRedResult objects.
#' @aliases plot.dimRedResult
#' @export
setMethod(
f = "plot",
signature = c("dimRedResult"),
definition = function (x, type = "pairs",
vars = seq_len(ncol(x@data@data)),
col = seq_len(min(3, ncol(x@data@meta))), ...) {
plot(x = x@data, type = type, vars = vars, col = col, ...)
}
)
#' plot_R_NX
#'
#' Plot the R_NX curve for different embeddings. Takes a list of
#' \code{\link{dimRedResult}} objects as input.
#' Also the Area under the curve values are computed for a weighted K
#' (see \link{AUC_lnK_R_NX} for details) and appear in the legend.
#'
#' @param x a list of \code{\link{dimRedResult}} objects. The names of the list
#' will appear in the legend with the AUC_lnK value.
#' @param ndim the number of dimensions, if \code{NA} the original number of
#' embedding dimensions is used, can be a vector giving the embedding
#' dimensionality for each single list element of \code{x}.
#' @param weight the weight function used for K when calculating the AUC, one of
#' \code{c("inv", "log", "log10")}
#' @family Quality scores for dimensionality reduction
#' @return A ggplot object, the design can be changed by appending
#' \code{theme(...)}
#'
#' @examples
#'
#' ## define which methods to apply
#' embed_methods <- c("Isomap", "PCA")
#' ## load test data set
#' data_set <- loadDataSet("3D S Curve", n = 200)
#' ## apply dimensionality reduction
#' data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
#' names(data_emb) <- embed_methods
#' ## plot the R_NX curves:
#' plot_R_NX(data_emb) +
#' ggplot2::theme(legend.title = ggplot2::element_blank(),
#' legend.position = c(0.5, 0.1),
#' legend.justification = c(0.5, 0.1))
#'
#' @export
plot_R_NX <- function(x, ndim = NA, weight = "inv") {
chckpkg("ggplot2")
chckpkg("tidyr")
chckpkg("scales")
lapply(
x,
function(x)
if (!inherits(x, "dimRedResult"))
stop("x must be a list and ",
"all items must inherit from 'dimRedResult'")
)
rnx <- mapply(function(x, ndim) if(is.na(ndim)) R_NX(x) else R_NX(x, ndim),
x = x, ndim = ndim)
weight <- match.arg(weight, c("inv", "ln", "log", "log10"))
w_fun <- switch(
weight,
inv = auc_ln_k_inv,
log = auc_log_k,
ln = auc_log_k,
log10 = auc_log10_k,
stop("wrong parameter for weight")
)
auc <- apply(rnx, 2, w_fun)
df <- as.data.frame(rnx)
df$K <- seq_len(nrow(df))
qnxgrid <- expand.grid(K = df$K,
rnx = seq(0.1, 0.9, by = 0.1))
## TODO: FIND OUT WHY THIS AS IN THE PUBLICATION BUT IS WRONG!
qnxgrid$qnx <- rnx2qnx(qnxgrid$rnx, K = qnxgrid$K, N = nrow(df)) #
qnxgrid$rnx_group <- factor(qnxgrid$rnx)
df <- tidyr::gather_(df,
key_col = "embedding",
value_col = "R_NX",
names(x))
ggplot2::ggplot(df) +
ggplot2::geom_line(ggplot2::aes_string(y = "R_NX", x = "K",
color = "embedding")) +
## TODO: find out if this is wrong:
## ggplot2::geom_line(data = qnxgrid,
## mapping = ggplot2::aes_string(x = "K", y = "qnx",
## group = "rnx_group"),
## linetype = 2,
## size = 0.1) +
ggplot2::geom_line(data = qnxgrid,
mapping = ggplot2::aes_string(x = "K", y = "rnx",
group = "rnx_group"),
linetype = 3,
size = 0.1) +
ggplot2::scale_x_log10(
labels = scales::trans_format("log10",
scales::math_format()),
expand = c(0, 0)
) +
ggplot2::scale_y_continuous(expression(R[NX]),
limits = c(0, 1),
expand = c(0, 0)) +
ggplot2::annotation_logticks(sides = "b") +
ggplot2::scale_color_discrete(
breaks = names(x),
labels = paste(format(auc, digits = 3),
names(x))) +
ggplot2::labs(title = paste0(
"R_NX vs. K",
if (length(ndim) == 1 && !is.na(ndim))
paste0(", d = ", ndim)
else
""
)) +
ggplot2::theme_classic()
}
dimRed/R/loe.R 0000644 0001762 0000144 00000003107 13024273620 012561 0 ustar ligges users
## this function produces segfaults and is super slow
## #' Local Ordinal Embedding
## #'
## #' Instance of \code{\link{dimRedMethod}} for Local Ordinal Embedding.
## #'
## #' For details see \code{\link[loe]{LOE}}
## #'
## #' @examples
## #' # for whatever reason the loe package has problems if I run this
## #' # with R CMD check, running it in the REPL works just fine
## #' dat <- loadDataSet("Iris")[sample(20)]
## #' loe <- LOE()
## #' emb <- loe@fun(dat, loe@stdpars)
## #'
## #'
## #' plot(emb@data@data)
## #'
## #' @include dimRedResult-class.R
## #' @include dimRedMethod-class.R
## #' @export
## LOE <- setClass(
## "LOE",
## contains = "dimRedMethod",
## prototype = list(
## stdpars = list(d = stats::dist, knn = 50, ndim = 2),
## fun = function (data, pars,
## keep.org.data = TRUE) {
## chckpkg("loe")
## meta <- data@meta
## orgdata <- if (keep.org.data) data@data else NULL
## indata <- data@data
## data.adj <- loe:::make.kNNG(as.matrix(pars$d(indata)), k = pars$knn)
## outdata <- loe::LOE(data.adj, p = pars$ndim, method = "MM")$X
## colnames(outdata) <- paste0("LOE", 1:ncol(outdata))
## return(new(
## "dimRedResult",
## data = new("dimRedData",
## data = outdata,
## meta = meta),
## org.data = orgdata,
## has.org.data = keep.org.data,
## method = "loe",
## pars = pars
## ))
## })
## )
dimRed/R/dataSets.R 0000644 0001762 0000144 00000014320 13033377101 013550 0 ustar ligges users #' Example Data Sets for dimensionality reduction
#'
#' A compilation of standard data sets that are often being used to
#' showcase dimensionality reduction techniques.
#'
#' The argument \code{name} should be one of
#' \code{dataSetList()}. Partial matching is possible, see
#' \code{\link{match.arg}}. Generated data sets contain the internal
#' coordinates of the manifold in the \code{meta} slot. Call
#' \code{dataSetList()} to see what data sets are available.
#'
#'
#'
#' @param name A character vector that specifies the name of the data
#' set.
#' @param n In generated data sets the number of points to be
#' generated, else ignored.
#' @param sigma In generated data sets the standard deviation of the
#' noise added, else ignored.
#' @return \code{loadDataSet} an object of class
#' \code{\link{dimRedData}}. \code{dataSetList()} return a
#' character string with the implemented data sets
#'
#' @examples
#' ## a list of available data sets:
#' dataSetList()
#'
#' ## Load a data set:
#' swissRoll <- loadDataSet("Swiss Roll")
#' \donttest{plot(swissRoll, type = "3vars")}
#'
#' ## Load Iris data set, partial matching:
#' loadDataSet("I")
#'
#' @name dataSets
NULL
#' @include dimRedData-class.R
#' @rdname dataSets
#' @export
loadDataSet <- function (name = dataSetList(), n = 2000, sigma = 0.05) {
name <- match.arg(name)
switch(
name,
"Swiss Roll" = swissRoll(n, sigma),
"Broken Swiss Roll" = brokenSwissRoll(n, sigma),
"Helix" = helix(n, sigma),
"Twin Peaks" = twinPeaks(n, sigma),
"Sphere" = sphere(n, sigma),
"FishBowl" = fishbowl(n, sigma),
"Ball" = ball(n, sigma),
"3D S Curve" = sCurve(n, sigma),
"variable Noise Helix" = noisyHelix(n, sigma),
"Cube" = cube(n, sigma),
"Iris" = irisdata()
)
}
#' @rdname dataSets
#' @export
dataSetList <- function () {
return(c(
"Swiss Roll",
"Broken Swiss Roll",
"Helix",
"Twin Peaks",
"Sphere",
"Ball",
"FishBowl",
"3D S Curve",
"variable Noise Helix",
"Iris",
"Cube"
))
}
irisdata <- function() {
dd <- as.matrix(datasets::iris[, 1:4])
new("dimRedData",
data = dd,
meta = datasets::iris[, 5, drop = FALSE])
}
swissRoll <- function (n = 2000, sigma = 0.05) {
x <- stats::runif(n, 1.5 * pi, 4.5 * pi)
y <- stats::runif(n, 0, 30)
new("dimRedData",
data = swissRollMapping(x, y) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(x = x, y = y))
}
brokenSwissRoll <- function (n = 2000, sigma = 0.05) {
x <- c(
stats::runif(floor(n / 2), 1.5 * pi, 2.7 * pi),
stats::runif(ceiling(n / 2), 3.3 * pi, 4.5 * pi)
)
y <- stats::runif(n, 0, 30)
new("dimRedData",
data = swissRollMapping(x, y) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(x = x, y = y))
}
swissRollMapping <- function (x, y) {
cbind(x = x * cos(x),
y = y,
z = x * sin(x))
}
helix <- function (n = 2000, sigma = 0.05) {
t <- stats::runif(n, 0, 2 * pi)
new("dimRedData",
data = helixMapping(t) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(t = t))
}
helixMapping <- function (x) {
cbind(x = (2 + cos(8 * x)) * cos(x),
y = (2 + cos(8 * x)) * sin(x),
z = (sin(8 * x)))
}
twinPeaks <- function (n = 2000, sigma = 0.05) {
x <- stats::runif(n, -1, 1)
y <- stats::runif(n, -1, 1)
new("dimRedData",
data = twinPeaksMapping(x, y) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(x = x, y = y))
}
twinPeaksMapping <- function (x, y) {
cbind(x = x,
y = y,
z = sin(pi * x) * tanh(3 * y))
}
sphere <- function (n = 2000, sigma = 0.05) {
phi <- stats::runif(n, 0, 2 * pi)
psi <- acos(stats::runif(n, -1, 1))
new("dimRedData",
data = sphereMapping(phi, psi) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(phi = phi, psi = psi))
}
fishbowl <- function (n = 2000, sigma = 0.05) {
phi <- stats::runif(n, 0, 2 * pi)
psi <- acos(stats::runif(n, -1, 0.8))
new("dimRedData",
data = sphereMapping(phi, psi) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(psi = psi))
}
sphereMapping <- function (phi, psi) {
cbind(x = cos(phi) * sin(psi),
y = sin(phi) * sin(psi),
z = cos(psi))
}
ball <- function (n = 2000, sigma = 0.05) {
phi <- stats::runif(n, 0, 2 * pi)
psi <- acos(stats::runif(n, -1, 1))
## make it uniformly distributed inside the sphere
r <- stats::runif(n) ^ (1 / 3)
new("dimRedData",
data = ballMapping(phi, psi, r) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(phi = phi, psi = psi, r = r))
}
ballMapping <- function (phi, psi, r) {
cbind(x = r * cos(phi) * sin(psi),
y = r * sin(phi) * sin(psi),
z = r * cos(psi))
}
sCurve <- function (n = 2000, sigma = 0.05) {
t <- stats::runif(n, -1.5 * pi, 1.5 * pi)
y <- stats::runif(n, 0, 2)
new("dimRedData",
data = sCurveMapping(t, y) + stats::rnorm(3 * n, sd = sigma),
meta = data.frame(x = t, y = y))
}
sCurveMapping <- function (t, y) {
cbind(x = sin(t),
y = y,
z = sign(t) * (cos(t) - 1))
}
noisyHelix <- function (n = 2000, sigma = 0.05) {
t <- stats::runif(n, 0, 4 * pi)
min_noise <- 0.1
max_noise <- 1.4
new("dimRedData",
data = noisyHelixMapping(t, min_noise, max_noise) +
stats::rnorm(3 * n, sd = sigma),
meta = data.frame(t = t))
}
noisyHelixMapping <- function(t, min_noise, max_noise) {
make_noise <- function (t){
stats::rnorm(length(t), sd = t * max_noise / max(t) + min_noise)
}
cbind(x = 3 * cos(t) + make_noise(t),
y = 3 * sin(t) + make_noise(t),
z = 2 * t + make_noise(t))
}
cube <- function(n = 2000, sigma = 0.05){
tmp <- cbind(x = stats::runif(n) + stats::rnorm(n, sd = sigma),
y = stats::runif(n) + stats::rnorm(n, sd = sigma),
z = stats::runif(n) + stats::rnorm(n, sd = sigma))
new("dimRedData", data = tmp, meta = tmp)
}
dimRed/R/drr.R 0000644 0001762 0000144 00000014444 13562225201 012576 0 ustar ligges users #' Dimensionality Reduction via Regression
#'
#' An S4 Class implementing Dimensionality Reduction via Regression (DRR).
#'
#' DRR is a non-linear extension of PCA that uses Kernel Ridge regression.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' DRR can take the following parameters:
#' \describe{
#' \item{ndim}{The number of dimensions}
#' \item{lambda}{The regularization parameter for the ridge
#' regression.}
#' \item{kernel}{The kernel to use for KRR, defaults to
#' \code{"rbfdot"}.}
#' \item{kernel.pars}{A list with kernel parameters, elements depend
#' on the kernel used, \code{"rbfdot"} uses \code{"sigma"}.}
#' \item{pca}{logical, should an initial pca step be performed,
#' defaults to \code{TRUE}.}
#' \item{pca.center}{logical, should the data be centered before the
#' pca step. Defaults to \code{TRUE}.}
#' \item{pca.scale}{logical, should the data be scaled before the
#' pca ste. Defaults to \code{FALSE}.}
#' \item{fastcv}{logical, should \code{\link[CVST]{fastCV}} from the
#' CVST package be used instead of normal cross-validation.}
#' \item{fastcv.test}{If \code{fastcv = TRUE}, separate test data set for fastcv.}
#' \item{cv.folds}{if \code{fastcv = FALSE}, specifies the number of
#' folds for crossvalidation.}
#' \item{fastkrr.nblocks}{integer, higher values sacrifice numerical
#' accuracy for speed and less memory, see below for details.}
#' \item{verbose}{logical, should the cross-validation results be
#' printed out.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[DRR]{drr}}, see there for details. DRR is
#' a non-linear extension of principal components analysis using Kernel
#' Ridge Regression (KRR, details see \code{\link[CVST]{constructKRRLearner}}
#' and \code{\link[DRR]{constructFastKRRLearner}}). Non-linear
#' regression is used to explain more variance than PCA. DRR provides
#' an out-of-sample extension and a backward projection.
#'
#' The most expensive computations are matrix inversions therefore the
#' implementation profits a lot from a multithreaded BLAS library.
#' The best parameters for each KRR are determined by cross-validaton
#' over all parameter combinations of \code{lambda} and
#' \code{kernel.pars}, using less parameter values will speed up
#' computation time. Calculation of KRR can be accelerated by
#' increasing \code{fastkrr.nblocks}, it should be smaller than
#' n^{1/3} up to sacrificing some accuracy, for details see
#' \code{\link[DRR]{constructFastKRRLearner}}. Another way to speed up
#' is to use \code{pars$fastcv = TRUE} which might provide a more
#' efficient way to search the parameter space but may also miss the
#' global maximum, I have not ran tests on the accuracy of this method.
#'
#'
#'
#' @references
#' Laparra, V., Malo, J., Camps-Valls, G.,
#' 2015. Dimensionality Reduction via Regression in Hyperspectral
#' Imagery. IEEE Journal of Selected Topics in Signal Processing
#' 9, 1026-1036. doi:10.1109/JSTSP.2015.2417833
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("variable Noise Helix", n = 200)[sample(200)]
#'
#' emb <- embed(dat, "DRR", ndim = 3)
#'
#' plot(dat, type = "3vars")
#' plot(emb, type = "3vars")
#'
#' # We even have function to reconstruct, also working for only the first few dimensions
#' rec <- inverse(emb, getData(getDimRedData(emb))[, 1, drop = FALSE])
#' plot(rec, type = "3vars")
#' }
#'
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @import DRR
#' @family dimensionality reduction methods
#' @export DRR
#' @exportClass DRR
DRR <- setClass(
"DRR",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
lambda = c(0, 10 ^ (-3:2)),
kernel = "rbfdot",
kernel.pars = list(sigma = 10 ^ (-3:4)),
pca = TRUE,
pca.center = TRUE,
pca.scale = FALSE,
fastcv = FALSE,
cv.folds = 5,
fastcv.test = NULL,
fastkrr.nblocks = 4,
verbose = TRUE),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("DRR")
chckpkg("kernlab")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
res <- do.call(DRR::drr, c(list(X = indata), pars))
outdata <- res$fitted.data
colnames(outdata) <- paste0("DRR", 1:ncol(outdata))
appl <- function(x){
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(data@data))
stop("x must have the same number of dimensions ",
"as the original data")
appl.out <- new("dimRedData",
data = res$apply(proj),
meta = appl.meta)
dimnames(appl.out@data) <- list(
rownames(x), paste0("DRR", seq_len(ncol(appl.out@data)))
)
return(appl.out)
}
inv <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) > ncol(data@data))
stop("x must have less or equal number of dimensions ",
"as the original data")
inv.out <- new("dimRedData",
data = res$inverse(proj),
meta = appl.meta)
dimnames(inv.out@data) <- list(rownames(proj), colnames(data@data))
return(inv.out)
}
return(
new("dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.org.data = keep.org.data,
has.apply = TRUE,
has.inverse = TRUE,
method = "drr",
pars = pars
)
)
})
)
dimRed/R/pca.R 0000644 0001762 0000144 00000010141 13562225201 012540 0 ustar ligges users #' Principal Component Analysis
#'
#' S4 Class implementing PCA.
#'
#' PCA transforms the data in orthogonal components so that the first
#' axis accounts for the larges variance in the data, all the
#' following axes account for the highest variance under the
#' constraint that they are orthogonal to the preceding axes. PCA is
#' sensitive to the scaling of the variables. PCA is by far the
#' fastest and simples method of dimensionality reduction and should
#' probably always be applied as a baseline if other methods are tested.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' PCA can take the following parameters:
#' \describe{
#' \item{ndim}{The number of output dimensions.}
#' \item{center}{logical, should the data be centered, defaults to \code{TRUE}.}
#' \item{scale.}{logical, should the data be scaled, defaults to \code{FALSE}.}
#' }
#'
#' @section Implementation:
#'
#' Wraps around \code{\link{prcomp}}. Because PCA can be reduced to a
#' simple rotation, forward and backward projection functions are
#' supplied.
#'
#' @references
#'
#' Pearson, K., 1901. On lines and planes of closest fit to systems of points in
#' space. Philosophical Magazine 2, 559-572.
#'
#' @examples
#' dat <- loadDataSet("Iris")
#' emb <- embed(dat, "PCA")
#'
#' plot(emb, type = "2vars")
#' plot(inverse(emb, getDimRedData(emb)), type = "3vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export PCA
#' @exportClass PCA
PCA <- setClass(
"PCA",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
center = TRUE,
scale. = FALSE),
fun = function (data, pars,
keep.org.data = TRUE) {
ndim <- pars$ndim
pars$ndim <- NULL
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
data <- data@data
res <- do.call(
prcomp,
c(list(x = data), pars)
)
# evaluate results here for functions
data <- res$x[, seq_len(ndim), drop = FALSE]
ce <- res$center
sc <- res$scale
rot <- res$rotation[, seq_len(ndim)]
rerot <- t(rot)
appl <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) != ncol(orgdata))
stop("x must have the same number of dimensions ",
"as the original data")
if (ce[1] != FALSE) proj <- t(apply(proj, 1, function(x) x - ce))
if (sc[1] != FALSE) proj <- t(apply(proj, 1, function(x) x / sc))
proj <- proj %*% rot
proj <- new("dimRedData", data = proj, meta = appl.meta)
return(proj)
}
inv <- function(x) {
appl.meta <- if (inherits(x, "dimRedData")) x@meta else data.frame()
proj <- if (inherits(x, "dimRedData")) x@data else x
if (ncol(proj) > ncol(data))
stop("x must have less or equal number of dimensions ",
"as the original data")
d <- ncol(proj)
reproj <- proj %*% rerot[seq_len(d), ]
if (sc[1] != FALSE)
reproj <- t(apply(reproj, 1, function(x) x * sc))
if (ce[1] != FALSE)
reproj <- t(apply(reproj, 1, function(x) x + ce))
reproj <- new("dimRedData", data = reproj, meta = appl.meta)
return(reproj)
}
res <- new(
"dimRedResult",
data = new("dimRedData",
data = data,
meta = meta),
org.data = orgdata,
apply = appl,
inverse = inv,
has.org.data = keep.org.data,
has.apply = TRUE,
has.inverse = TRUE,
method = "PCA",
pars = pars
)
return(res)
})
)
dimRed/R/graph_embed.R 0000644 0001762 0000144 00000021510 13562225201 014234 0 ustar ligges users #' Graph Embedding via the Kamada Kawai Algorithm
#'
#' An S4 Class implementing the Kamada Kawai Algorithm for graph embedding.
#'
#' Graph embedding algorithms se the data as a graph. Between the
#' nodes of the graph exist attracting and repelling forces which can
#' be modeled as electrical fields or springs connecting the
#' nodes. The graph is then forced into a lower dimensional
#' representation that tries to represent the forces betweent he nodes
#' accurately by minimizing the total energy of the attracting and
#' repelling forces.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' KamadaKawai can take the following parameters:
#' \describe{
#' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
#' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
#' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[igraph]{layout_with_kk}}. The parameters
#' maxiter, epsilon and kkconst are set to the default values and
#' cannot be set, this may change in a future release. The DimRed
#' Package adds an extra sparsity parameter by constructing a knn
#' graph which also may improve visualization quality.
#'
#' @references
#'
#' Kamada, T., Kawai, S., 1989. An algorithm for drawing general undirected
#' graphs. Information Processing Letters 31, 7-15.
#' https://doi.org/10.1016/0020-0190(89)90102-6
#'
#' @examples
#' dat <- loadDataSet("Swiss Roll", n = 200)
#' emb <- embed(dat, "KamadaKawai")
#' plot(emb, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export KamadaKawai
#' @exportClass KamadaKawai
KamadaKawai <- setClass(
"KamadaKawai",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
knn = 100,
d = stats::dist),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("igraph")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- em_graph_layout(
indata,
graph_em_method = igraph::layout_with_kk,
knn = pars$knn,
d = pars$d,
ndim = pars$ndim,
weight.trans = I #pars$weight.trans
)
colnames(outdata) <- paste0("KK", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "graph_kk",
pars = pars
))
})
)
#' Distributed Recursive Graph Layout
#'
#' An S4 Class implementing Distributed recursive Graph Layout.
#'
#' DrL uses a complex algorithm to avoid local minima in the graph
#' embedding which uses several steps.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' DrL can take the following parameters:
#' \describe{
#' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
#' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
#' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[igraph]{layout_with_drl}}. The parameters
#' maxiter, epsilon and kkconst are set to the default values and
#' cannot be set, this may change in a future release. The DimRed
#' Package adds an extra sparsity parameter by constructing a knn
#' graph which also may improve visualization quality.
#'
#' @references
#'
#' Martin, S., Brown, W.M., Wylie, B.N., 2007. Dr.l: Distributed Recursive
#' (graph) Layout (No. dRl; 002182MLTPL00). Sandia National Laboratories.
#'
#' @examples
#' \dontrun{
#' dat <- loadDataSet("Swiss Roll", n = 200)
#' emb <- embed(dat, "DrL")
#' plot(emb, type = "2vars")
#' }
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export DrL
#' @exportClass DrL
DrL <- setClass(
"DrL",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
knn = 100,
d = stats::dist),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("igraph")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- em_graph_layout(
indata,
graph_em_method = igraph::layout_with_drl,
knn = pars$knn,
d = pars$d,
ndim = pars$ndim,
weight.trans = I #pars$weight.trans
)
colnames(outdata) <- paste0("DrL", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "graph_drl",
pars = pars
))
})
)
#' Fruchterman Reingold Graph Layout
#'
#' An S4 Class implementing the Fruchterman Reingold Graph Layout
#' algorithm.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' \describe{
#' \item{ndim}{The number of dimensions, defaults to 2. Can only be 2 or 3}
#' \item{knn}{Reduce the graph to keep only the neares neighbors. Defaults to 100.}
#' \item{d}{The distance function to determine the weights of the graph edges. Defaults to euclidean distances.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[igraph]{layout_with_fr}}, see there for
#' details. The Fruchterman Reingold algorithm puts the data into
#' a circle and puts connected points close to each other.
#'
#' @references
#'
#' Fruchterman, T.M.J., Reingold, E.M., 1991. Graph drawing by force-directed
#' placement. Softw: Pract. Exper. 21, 1129-1164.
#' https://doi.org/10.1002/spe.4380211102
#'
#' @examples
#' dat <- loadDataSet("Swiss Roll", n = 100)
#' emb <- embed(dat, "FruchtermanReingold")
#' plot(emb, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export FruchtermanReingold
#' @exportClass FruchtermanReingold
FruchtermanReingold <- setClass(
"FruchtermanReingold",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2,
knn = 100,
d = stats::dist),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("igraph")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- em_graph_layout(
indata,
graph_em_method = igraph::layout_with_fr,
knn = pars$knn,
d = pars$d,
ndim = pars$ndim,
weight.trans = I #pars$weight.trans
)
colnames(outdata) <- paste0("FR", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "graph_fr",
pars = pars
))
})
)
em_graph_layout <- function(data, graph_em_method,
knn = 50, d = stats::dist,
ndim = 2, weight.trans = I){
chckpkg("igraph")
data.dist <- as.matrix(d(data))
data.graph <- construct_knn_graph(data.dist, knn)
embed_graph(data.graph, graph_em_method, ndim = ndim)
}
embed_graph <- function(graph, f, weight.trans = I, ndim = 2){
f(graph, weights = weight.trans(igraph::E(graph)$weight), dim = ndim)
}
construct_knn_graph <- function (data.dist, knn) {
chckpkg("igraph")
chckpkg("coRanking")
data.graph <- igraph::graph_from_adjacency_matrix(
adjmatrix = data.dist,
mode = "undirected",
weighted = TRUE
)
if (is.infinite(knn) || is.na(knn))
return(data.graph)
## else: remove all unnecessary edges
data.rankm <- coRanking::rankmatrix(data.dist, input = "dist")
data.rankm.ind <- data.rankm <= knn + 1
inds <- which(
!(data.rankm.ind | t(data.rankm.ind)),
arr.ind = TRUE
)
data.graph[ from = inds[, 1], to = inds[, 2] ] <- FALSE
return(data.graph)
}
dimRed/R/lle.R 0000644 0001762 0000144 00000004140 13562225201 012553 0 ustar ligges users #' Locally Linear Embedding
#'
#' An S4 Class implementing Locally Linear Embedding (LLE)
#'
#' LLE approximates the points in the manifold by linear combination
#' of its neighbors. These linear combinations are the same inside the
#' manifold and in highdimensional space.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' LLE can take the following parameters:
#' \describe{
#' \item{knn}{the number of neighbors for the knn graph., defaults to 50.}
#' \item{ndim}{the number of embedding dimensions, defaults to 2.}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[lle]{lle}}, only
#' exposes the parameters \code{k} and \code{m}.
#'
#' @references
#'
#' Roweis, S.T., Saul, L.K., 2000. Nonlinear Dimensionality Reduction
#' by Locally Linear Embedding. Science 290,
#' 2323-2326. doi:10.1126/science.290.5500.2323
#'
#' @examples
#' dat <- loadDataSet("3D S Curve", n = 500)
#' emb <- embed(dat, "LLE", knn = 45)
#' plot(emb, type = "2vars")
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @family dimensionality reduction methods
#' @export LLE
#' @exportClass LLE
LLE <- setClass(
"LLE",
contains = "dimRedMethod",
prototype = list(
stdpars = list(knn = 50, ndim = 2),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("lle")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
outdata <- lle::lle(indata,
k = pars$knn,
m = pars$ndim)$Y
if (is.null(dim(outdata))) {
dim(outdata) <- c(length(outdata), 1)
}
colnames(outdata) <- paste0("LLE", 1:ncol(outdata))
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "lle",
pars = pars
))
})
)
dimRed/R/dimRedData-class.R 0000644 0001762 0000144 00000013730 13464507204 015114 0 ustar ligges users #' @include misc.R
NULL
#' Class "dimRedData"
#'
#' A class to hold data for dimensionality reduction and methods.
#'
#' The class hast two slots, \code{data} and \code{meta}. The
#' \code{data} slot contains a \code{numeric matrix} with variables in
#' columns and observations in rows. The \code{meta} slot may contain
#' a \code{data.frame} with additional information. Both slots need to
#' have the same number of rows or the \code{meta} slot needs to
#' contain an empty \code{data.frame}.
#'
#' See examples for easy conversion from and to \code{data.frame}.
#'
#' For plotting functions see \code{\link{plot.dimRedData}}.
#'
#' @slot data of class \code{matrix}, holds the data, observations in
#' rows, variables in columns
#' @slot meta of class \code{data.frame}, holds meta data such as
#' classes, internal manifold coordinates, or simply additional
#' data of the data set. Must have the same number of rows as the
#' \code{data} slot or be an empty data frame.
#'
#'
#' @examples
#' ## Load an example data set:
#' s3d <- loadDataSet("3D S Curve")
#'
#' ## Create using a constructor:
#'
#' ### without meta information:
#' dimRedData(iris[, 1:4])
#'
#' ### with meta information:
#' dimRedData(iris[, 1:4], iris[, 5])
#'
#' ### using slot names:
#' dimRedData(data = iris[, 1:4], meta = iris[, 5])
#'
#' ## Convert to a dimRedData objects:
#' Iris <- as(iris[, 1:4], "dimRedData")
#'
#' ## Convert to data.frame:
#' head(as(s3d, "data.frame"))
#' head(as.data.frame(s3d))
#' head(as.data.frame(as(iris[, 1:4], "dimRedData")))
#'
#' ## Extract slots:
#' head(getData(s3d))
#' head(getMeta(s3d))
#'
#' ## Get the number of observations:
#' nrow(s3d)
#'
#' ## Subset:
#' s3d[1:5, ]
#'
#' @family dimRedData
#' @import methods
#' @export dimRedData
#' @exportClass dimRedData
dimRedData <- setClass(
"dimRedData",
slots = c(data = "matrix", meta = "data.frame"),
prototype = prototype(data = matrix(numeric(0), 0, 0), meta = data.frame()),
validity = function (object) {
retval <- NULL
if (!is.matrix(object@data)) {
retval <- c(
retval,
c("data must be a matrix with ",
"observations in rows and dimensions in columns")
)
}
if (!is.numeric(object@data)) {
retval <- c(
retval,
c("data must be numeric")
)
}
if ((nrow(object@meta) != 0) &&
(nrow(object@meta) != nrow(object@data))){
retval <- c(
retval,
c("data and meta must have the same numbers of rows")
)
}
return(if (is.null(retval)) TRUE else retval)
}
)
setMethod("initialize",
signature = c("dimRedData"),
function (.Object,
data = matrix(numeric(0), 0, 0),
meta = data.frame()) {
data <- as.matrix(data)
meta <- as.data.frame(meta)
.Object <- callNextMethod()
return(.Object)
})
setAs(from = "ANY", to = "dimRedData",
def = function(from) new("dimRedData", data = as.matrix(from)))
setAs(from = "dimRedData", to = "data.frame",
def = function(from) as.data.frame(from))
#' @param meta.prefix Prefix for the columns of the meta data names.
#' @param data.prefix Prefix for the columns of the variable names.
#'
#' @family dimRedData
#' @describeIn dimRedData convert to data.frame
#' @export
setMethod(f = "as.data.frame",
signature = c("dimRedData"),
definition = function(x, meta.prefix = "meta.",
data.prefix = "") {
tmp <- list()
if (nrow(x@meta) > 0){
tmp$meta <- as.data.frame(x@meta, stringsAsFactors = FALSE)
names(tmp$meta) <- paste0(meta.prefix, colnames(x@meta))
}
tmp$data <- as.data.frame(x@data, stringsAsFactors = FALSE)
names(tmp$data) <- paste0(data.prefix, colnames(x@data))
names(tmp) <- NULL
data.frame(tmp, stringsAsFactors = FALSE)
})
#' @param data Will be coerced into a \code{\link{data.frame}} with
#' \code{\link{as.data.frame}}
#'
#' @examples
#' ## create a dimRedData object using a formula
#' as.dimRedData(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
#' iris)[1:5]
#'
#' @include misc.R
#' @family dimRedData
#' @describeIn as.dimRedData Convert a \code{data.frame} to a dimRedData
#' object using a formula
#' @export
setMethod(f = "as.dimRedData",
signature = c("formula"),
definition = function(formula, data) {
data <- as.data.frame(data)
meta <- stats::model.frame(lhs(formula), data)
data <- stats::model.matrix(rhs(formula), data)
return(new("dimRedData", data = data, meta = meta))
})
#' @param object Of class dimRedData.
#' @describeIn dimRedData Get the data slot.
#' @export
setMethod("getData", "dimRedData", function(object) object@data)
#' @describeIn dimRedData Get the meta slot.
#' @export
setMethod("getMeta", "dimRedData", function(object) object@meta)
#' @param x Of class dimRedData
#' @describeIn dimRedData Get the number of observations.
#' @export
setMethod("nrow", "dimRedData", function(x) nrow(x@data))
#' @param i a valid index for subsetting rows.
#' @examples
#' ## Shuffle data:
#' s3 <- s3d[nrow(s3d)]
#'
#' @describeIn dimRedData Subset rows.
#' @export
setMethod("[", signature(x = "dimRedData",
i = "ANY"),
function(x, i) {
x@data <- x@data[i, , drop = FALSE]
if (nrow(x@meta) != 0)
x@meta <- x@meta[i, , drop = FALSE]
# validObject returns a string with the description of what is wrong or
# TRUE, so the following lines have to be as they are!
vv <- validObject(x)
if (vv == TRUE) return(x)
else stop("cannot subset dimRedData object: \n",
paste(vv, collapse = "\n"))
})
#' @describeIn dimRedData Extract the number of Variables from the data.
#'
#' @examples
#' ## Get the number of variables:
#' ndims(s3d)
#'
#' @export
setMethod("ndims", "dimRedData", function(object) ncol(object@data))
dimRed/R/quality.R 0000644 0001762 0000144 00000047066 13753034327 013516 0 ustar ligges users #' @include dimRedResult-class.R
#' @include dimRedData-class.R
#' @export
setGeneric("quality",
function (.data, ...) standardGeneric("quality"),
valueClass = "numeric")
#' Quality Criteria for dimensionality reduction.
#'
#' A collection of functions to compute quality measures on
#' \code{\link{dimRedResult}} objects.
#'
#' @section Implemented methods:
#'
#' Method must be one of \code{"\link{Q_local}", "\link{Q_global}",
#' "\link{mean_R_NX}", "\link{total_correlation}",
#' "\link{cophenetic_correlation}", "\link{distance_correlation}",
#' "\link{reconstruction_rmse}"}
#'
#' @section Rank based criteria:
#'
#' \code{Q_local}, \code{Q_global}, and \code{mean_R_NX} are
#' quality criteria based on the Co-ranking matrix. \code{Q_local}
#' and \code{Q_global} determine the local/global quality of the
#' embedding, while \code{mean_R_NX} determines the quality of the
#' overall embedding. They are parameter free and return a single
#' number. The object must include the original data. The number
#' returns is in the range [0, 1], higher values mean a better
#' local/global embedding.
#'
#' @section Correlation based criteria:
#'
#' \code{total_correlation} calculates the sum of the mean squared
#' correlations of the original axes with the axes in reduced
#' dimensions, because some methods do not care about correlations
#' with axes, there is an option to rotate data in reduced space to
#' maximize this criterium. The number may be greater than one if more
#' dimensions are summed up.
#'
#' \code{cophenetic_correlation} calculate the correlation between the
#' lower triangles of distance matrices, the correlation and distance
#' methods may be specified. The result is in range [-1, 1].
#'
#' \code{distance_correlation} measures the independes of samples by
#' calculating the correlation of distances. For details see
#' \code{\link[energy]{dcor}}.
#'
#' @section Reconstruction error:
#'
#' \code{reconstruction_rmse} calculates the root mean squared error
#' of the reconstrucion. \code{object} requires an inverse function.
#'
#'
#' @references
#'
#' Lueks, W., Mokbel, B., Biehl, M., Hammer, B., 2011. How
#' to Evaluate Dimensionality Reduction? - Improving the
#' Co-ranking Matrix. arXiv:1110.3917 [cs].
#'
#' Szekely, G.J., Rizzo, M.L., Bakirov, N.K., 2007. Measuring and
#' testing dependence by correlation of distances. Ann. Statist. 35,
#' 2769-2794. doi:10.1214/009053607000000505
#'
#' Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015. Multi-scale
#' similarities in stochastic neighbour embedding: Reducing
#' dimensionality while preserving both local and global
#' structure. Neurocomputing, 169,
#' 246-261. doi:10.1016/j.neucom.2014.12.095
#'
#'
#'
#' @param .data object of class \code{dimRedResult}
#' @param .method character vector naming one of the methods
#' @param .mute what output from the embedding method should be muted.
#' @param ... the pameters, internally passed as a list to the
#' quality method as \code{pars = list(...)}
#' @return a number
#'
#' @examples
#' \dontrun{
#' embed_methods <- dimRedMethodList()
#' quality_methods <- dimRedQualityList()
#' scurve <- loadDataSet("Iris")
#'
#' quality_results <- matrix(NA, length(embed_methods), length(quality_methods),
#' dimnames = list(embed_methods, quality_methods))
#' embedded_data <- list()
#'
#' for (e in embed_methods) {
#' message("embedding: ", e)
#' embedded_data[[e]] <- embed(scurve, e, .mute = c("message", "output"))
#' for (q in quality_methods) {
#' message(" quality: ", q)
#' quality_results[e, q] <- tryCatch(
#' quality(embedded_data[[e]], q),
#' error = function (e) NA
#' )
#' }
#' }
#'
#' print(quality_results)
#' }
#' @author Guido Kraemer
#' @aliases quality quality.dimRedResult
#' @family Quality scores for dimensionality reduction
#' @describeIn quality Calculate a quality index from a dimRedResult object.
#' @export
setMethod(
"quality",
"dimRedResult",
function (.data, .method = dimRedQualityList(),
.mute = character(0), # c("output", "message"),
...) {
method <- match.arg(.method)
methodFunction <- getQualityFunction(method)
args <- c(list(object = .data), list(...))
devnull <- if (Sys.info()["sysname"] != "Windows")
"/dev/null"
else
"NUL"
if ("message" %in% .mute){
devnull1 <- file(devnull, "wt")
sink(devnull1, type = "message")
on.exit({
sink(file = NULL, type = "message")
close(devnull1)
}, add = TRUE)
}
if ("output" %in% .mute) {
devnull2 <- file(devnull, "wt")
sink(devnull2, type = "output")
on.exit({
sink()
close(devnull2)
}, add = TRUE)
}
do.call(methodFunction, args)
}
)
getQualityFunction <- function (method) {
switch(
method,
Q_local = Q_local,
Q_global = Q_global,
mean_R_NX = mean_R_NX,
AUC_lnK_R_NX = AUC_lnK_R_NX,
total_correlation = total_correlation,
cophenetic_correlation = cophenetic_correlation,
distance_correlation = distance_correlation,
reconstruction_rmse = reconstruction_rmse
)
}
#' @export
setGeneric(
"Q_local",
function(object, ...) standardGeneric("Q_local"),
valueClass = "numeric"
)
#' Method Q_local
#'
#' Calculate the Q_local score to assess the quality of a dimensionality reduction.
#'
#' @param object of class dimRedResult.
#' @param ndim use the first ndim columns of the embedded data for calculation.
#' @family Quality scores for dimensionality reduction
#' @aliases Q_local
#' @export
setMethod(
"Q_local",
"dimRedResult",
function (object, ndim = getNDim(object)) {
if (!object@has.org.data) stop("object requires original data")
chckpkg("coRanking")
Q <- coRanking::coranking(object@org.data,
object@data@data[, seq_len(ndim), drop = FALSE])
nQ <- nrow(Q)
N <- nQ + 1
Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
lcmc <- Qnx - seq_len(nQ) / nQ
Kmax <- which.max(lcmc)
Qlocal <- sum(Qnx[1:Kmax]) / Kmax
return(as.vector(Qlocal))
}
)
#' @export
setGeneric(
"Q_global",
function(object, ...) standardGeneric("Q_global"),
valueClass = "numeric"
)
#' Method Q_global
#'
#' Calculate the Q_global score to assess the quality of a dimensionality reduction.
#'
#' @param object of class dimRedResult
#' @family Quality scores for dimensionality reduction
#' @aliases Q_global
#' @export
setMethod(
"Q_global",
"dimRedResult",
function(object){
if (!object@has.org.data) stop("object requires original data")
chckpkg("coRanking")
Q <- coRanking::coranking(object@org.data, object@data@data)
nQ <- nrow(Q)
N <- nQ + 1
Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
lcmc <- Qnx - seq_len(nQ) / nQ
Kmax <- which.max(lcmc)
Qglobal <- sum(Qnx[(Kmax + 1):nQ]) / (N - Kmax)
return(Qglobal)
}
)
#' @export
setGeneric(
"mean_R_NX",
function(object, ...) standardGeneric("mean_R_NX"),
valueClass = "numeric"
)
#' Method mean_R_NX
#'
#' Calculate the mean_R_NX score to assess the quality of a dimensionality reduction.
#'
#' @param object of class dimRedResult
#' @family Quality scores for dimensionality reduction
#' @aliases mean_R_NX
#' @export
setMethod(
"mean_R_NX",
"dimRedResult",
function(object) mean(R_NX(object))
)
#' @export
setGeneric(
"AUC_lnK_R_NX",
function(object, ...) standardGeneric("AUC_lnK_R_NX"),
valueClass = "numeric"
)
#' Method AUC_lnK_R_NX
#'
#' Calculate the Area under the R_NX(ln K), used in Lee et. al. (2015). Note
#' that despite the name, this does not weight the mean by the logarithm, but by
#' 1/K. If explicit weighting by the logarithm is desired use \code{weight =
#' "log"} or \code{weight = "log10"}
#'
#' The naming confusion originated from equation 17 in Lee et al (2015) and the
#' name of this method may change in the future to avoid confusion.
#'
#' @references Lee, J.A., Peluffo-Ordonez, D.H., Verleysen, M., 2015.
#' Multi-scale similarities in stochastic neighbour embedding: Reducing
#' dimensionality while preserving both local and global structure.
#' Neurocomputing 169, 246-261. https://doi.org/10.1016/j.neucom.2014.12.095
#'
#' @param object of class dimRedResult
#' @param weight the weight function used, one of \code{c("inv", "log", "log10")}
#' @family Quality scores for dimensionality reduction
#' @aliases AUC_lnK_R_NX
#' @export
setMethod(
"AUC_lnK_R_NX",
"dimRedResult",
function(object, weight = "inv") {
rnx <- R_NX(object)
weight <- match.arg(weight, c("inv", "ln", "log", "log10"))
switch(
weight,
inv = auc_ln_k_inv(rnx),
log = auc_log_k(rnx),
ln = auc_log_k(rnx),
log10 = auc_log10_k(rnx),
stop("wrong parameter for weight")
)
}
)
auc_ln_k_inv <- function(rnx) {
Ks <- seq_along(rnx)
return (sum(rnx / Ks) / sum(1 / Ks))
}
auc_log_k <- function(rnx) {
Ks <- seq_along(rnx)
return (sum(rnx * log(Ks)) / sum(log(Ks)))
}
auc_log10_k <- function(rnx) {
Ks <- seq_along(rnx)
return (sum(rnx * log10(Ks)) / sum(log10(Ks)))
}
#' @export
setGeneric(
"total_correlation",
function(object, ...) standardGeneric("total_correlation"),
valueClass = "numeric"
)
#' Method total_correlation
#'
#' Calculate the total correlation of the variables with the axes to
#' assess the quality of a dimensionality reduction.
#'
#' @param object of class dimRedResult
#' @param naxes the number of axes to use for optimization.
#' @param cor_method the correlation method to use.
#' @param is.rotated if FALSE the object is rotated.
#'
#' @family Quality scores for dimensionality reduction
#' @aliases total_correlation
#' @export
setMethod(
"total_correlation",
"dimRedResult",
function(object,
naxes = ndims(object),
cor_method = "pearson",
is.rotated = FALSE){
if (!object@has.org.data) stop("object requires original data")
if (length(naxes) != 1 || naxes < 1 || naxes > ncol(object@data@data))
stop("naxes must specify the numbers of axes to optimize for, ",
"i.e. a single integer between 1 and ncol(object@data@data)")
## try to partially match cor_method:
cor_methods <- c("pearson", "kendall", "spearman")
cor_method <- cor_methods[pmatch(cor_method, cor_methods)]
if (is.na(cor_method))
stop("cor_method must match one of ",
"'pearson', 'kendall', or 'spearman', ",
"at least partially.")
if (!is.rotated) {
rotated_result <- maximize_correlation(
object, naxes, cor_method
)
} else {
rotated_result <- object
}
res <- 0
for (i in 1:naxes)
res <- res + mean(correlate(
rotated_result@data@data,
rotated_result@org.data,
cor_method
)[i, ] ^ 2)
return(res)
}
)
setGeneric("cophenetic_correlation",
function(object, ...) standardGeneric("cophenetic_correlation"),
valueClass = "numeric")
#' Method cophenetic_correlation
#'
#' Calculate the correlation between the distance matrices in high and
#' low dimensioal space.
#'
#' @param object of class dimRedResult
#' @param d the distance function to use.
#' @param cor_method The correlation method.
#' @aliases cophenetic_correlation
#' @family Quality scores for dimensionality reduction
#' @export
setMethod(
"cophenetic_correlation",
"dimRedResult",
function(object, d = stats::dist, cor_method = "pearson"){
## if (missing(d)) d <- stats::dist
## if (missing(cor_method)) cor_method <- "pearson"
if (!object@has.org.data) stop("object requires original data")
cor_methods <- c("pearson", "kendall", "spearman")
cor_method <- cor_methods[pmatch(cor_method, cor_methods)]
if (is.na(cor_method))
stop("cor_method must match one of ",
"'pearson', 'kendall', or 'spearman', ",
"at least partially.")
d.org <- d(object@org.data)
d.emb <- d(object@data@data)
if (!inherits(d.org, "dist") || !inherits(d.emb, "dist"))
stop("d must return a dist object")
res <- correlate(
d(object@org.data),
d(object@data@data),
cor_method
)
return(res)
}
)
#' @export
setGeneric(
"distance_correlation",
function(object) standardGeneric("distance_correlation"),
valueClass = "numeric"
)
#' Method distance_correlation
#'
#' Calculate the distance correlation between the distance matrices in
#' high and low dimensioal space.
#'
#' @param object of class dimRedResult
#' @aliases distance_correlation
#' @family Quality scores for dimensionality reduction
#' @export
setMethod(
"distance_correlation",
"dimRedResult",
function(object){
if (!object@has.org.data) stop("object requires original data")
chckpkg("energy")
energy::dcor(object@org.data, object@data@data)
}
)
#' @export
setGeneric(
"reconstruction_rmse",
function(object) standardGeneric("reconstruction_rmse"),
valueClass = "numeric"
)
#' Method reconstruction_rmse
#'
#' Calculate the reconstruction root mean squared error a dimensionality reduction, the method must have an inverse mapping.
#'
#' @param object of class dimRedResult
#' @aliases reconstruction_rmse
#' @family Quality scores for dimensionality reduction
#' @export
setMethod(
"reconstruction_rmse",
"dimRedResult",
function(object){
if (!object@has.org.data) stop("object requires original data")
if (!object@has.inverse) stop("object requires an inverse function")
recon <- object@inverse(object@data)
rmse(recon@data, object@org.data)
}
)
#' @rdname quality
#'
#' @export
dimRedQualityList <- function () {
return(c("Q_local",
"Q_global",
"mean_R_NX",
"AUC_lnK_R_NX",
"total_correlation",
"cophenetic_correlation",
"distance_correlation",
"reconstruction_rmse"))
}
#' @export
setGeneric(
"R_NX",
function(object, ...) standardGeneric("R_NX"),
valueClass = "numeric"
)
#' Method R_NX
#'
#' Calculate the R_NX score from Lee et. al. (2013) which shows the neighborhood
#' preservation for the Kth nearest neighbors, corrected for random point
#' distributions and scaled to range [0, 1].
#' @param object of class dimRedResult
#' @param ndim the number of dimensions to take from the embedded data.
#' @family Quality scores for dimensionality reduction
#' @aliases R_NX
#' @export
setMethod(
"R_NX",
"dimRedResult",
function(object, ndim = getNDim(object)) {
chckpkg("coRanking")
if (!object@has.org.data) stop("object requires original data")
Q <- coRanking::coranking(object@org.data,
object@data@data[, seq_len(ndim),
drop = FALSE])
nQ <- nrow(Q)
N <- nQ + 1
Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) /
seq_len(nQ) / N
Rnx <- ((N - 1) * Qnx - seq_len(nQ)) /
(N - 1 - seq_len(nQ))
Rnx[-nQ]
}
)
#' @export
setGeneric(
"Q_NX",
function(object, ...) standardGeneric("Q_NX"),
valueClass = "numeric"
)
#' Method Q_NX
#'
#' Calculate the Q_NX score (Chen & Buja 2006, the notation in the
#' publication is M_k). Which is the fraction of points that remain inside
#' the same K-ary neighborhood in high and low dimensional space.
#'
#' @param object of class dimRedResult
#' @family Quality scores for dimensionality reduction
#' @aliases Q_NX
#' @export
setMethod(
"Q_NX",
"dimRedResult",
function(object) {
chckpkg("coRanking")
Q <- coRanking::coranking(object@org.data, object@data@data)
nQ <- nrow(Q)
N <- nQ + 1
Qnx <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) / seq_len(nQ) / N
Qnx
}
)
#'@export
setGeneric(
"LCMC",
function(object, ...) standardGeneric("LCMC"),
valueClass = "numeric"
)
#' Method LCMC
#'
#' Calculates the Local Continuity Meta Criterion, which is
#' \code{\link{Q_NX}} adjusted for random overlap inside the K-ary
#' neighborhood.
#'
#' @param object of class dimRedResult
#' @family Quality scores for dimensionality reduction
#' @aliases LCMC
#' @export
setMethod(
"LCMC",
"dimRedResult",
function(object) {
chckpkg("coRanking")
Q <- coRanking::coranking(object@org.data, object@data@data)
nQ <- nrow(Q)
N <- nQ + 1
lcmc <- diag(apply(apply(Q, 2, cumsum), 1, cumsum)) /
seq_len(nQ) / N -
seq_len(nQ) / nQ
lcmc
}
)
rnx2qnx <- function(rnx, K = seq_along(rnx), N = length(rnx) + 1) {
(rnx * (N - 1 - K) + K) / (N - 1)
}
qnx2rnx <- function(qnx, K = seq_along(qnx), N = length(qnx) + 1) {
((N - 1) * qnx - K) / (N - 1 - K)
}
#' @export
setGeneric(
"reconstruction_error",
function(object, ...) standardGeneric("reconstruction_error"),
valueClass = "numeric"
)
#' Method reconstruction_error
#'
#' Calculate the error using only the first \code{n} dimensions of the embedded
#' data. \code{error_fun} can either be one of \code{c("rmse", "mae")} to
#' calculate the root mean square error or the mean absolute error respectively,
#' or a function that takes to equally sized vectors as input and returns a
#' single number as output.
#'
#' @param object of class dimRedResult
#' @param n a positive integer or vector of integers \code{<= ndims(object)}
#' @param error_fun a function or string indicating an error function, if
#' indication a function it must take to matrices of the same size and return
#' a scalar.
#' @return a vector of number with the same length as \code{n} with the
#'
#' @examples
#' \dontrun{
#' ir <- loadDataSet("Iris")
#' ir.drr <- embed(ir, "DRR", ndim = ndims(ir))
#' ir.pca <- embed(ir, "PCA", ndim = ndims(ir))
#'
#' rmse <- data.frame(
#' rmse_drr = reconstruction_error(ir.drr),
#' rmse_pca = reconstruction_error(ir.pca)
#' )
#'
#' matplot(rmse, type = "l")
#' plot(ir)
#' plot(ir.drr)
#' plot(ir.pca)
#' }
#' @author Guido Kraemer
#' @family Quality scores for dimensionality reduction
#' @aliases reconstruction_error
#' @export
setMethod(
"reconstruction_error",
c("dimRedResult"),
function (object, n = seq_len(ndims(object)), error_fun = "rmse") {
if (any(n > ndims(object))) stop("n > ndims(object)")
if (any(n < 1)) stop("n < 1")
ef <- if (inherits(error_fun, "character")) {
switch(
error_fun,
rmse = rmse,
mae = mae
)
} else if (inherits(error_fun, "function")) {
error_fun
} else {
stop("error_fun must be a string or function, see documentation for details")
}
res <- numeric(length(n))
org <- getData(getOrgData(object))
for (i in seq_along(n)) {
rec <- getData(inverse(
object, getData(getDimRedData(object))[, seq_len(n[i]), drop = FALSE]
))
res[i] <- ef(org, rec)
}
res
}
)
rmse <- function (x1, x2) sqrt(mean((x1 - x2) ^ 2))
mae <- function (x1, x2) mean(abs(x1 - x2))
dimRed/R/leim.R 0000644 0001762 0000144 00000013100 14153203477 012731 0 ustar ligges users #' Laplacian Eigenmaps
#'
#' An S4 Class implementing Laplacian Eigenmaps
#'
#' Laplacian Eigenmaps use a kernel and were originally developed to
#' separate non-convex clusters under the name spectral clustering.
#'
#' @template dimRedMethodSlots
#'
#' @template dimRedMethodGeneralUsage
#'
#' @section Parameters:
#' \code{LaplacianEigenmaps} can take the following parameters:
#' \describe{
#' \item{ndim}{the number of output dimensions.}
#'
#' \item{sparse}{A character vector specifying hot to make the graph
#' sparse, \code{"knn"} means that a K-nearest neighbor graph is
#' constructed, \code{"eps"} an epsilon neighborhood graph is
#' constructed, else a dense distance matrix is used.}
#'
#' \item{knn}{The number of nearest neighbors to use for the knn graph.}
#' \item{eps}{The distance for the epsilon neighborhood graph.}
#'
#' \item{t}{Parameter for the transformation of the distance matrix
#' by \eqn{w=exp(-d^2/t)}, larger values give less weight to
#' differences in distance, \code{t == Inf} treats all distances != 0 equally.}
#' \item{norm}{logical, should the normed laplacian be used?}
#' }
#'
#' @section Implementation:
#' Wraps around \code{\link[loe]{spec.emb}}.
#'
#' @references
#'
#' Belkin, M., Niyogi, P., 2003. Laplacian Eigenmaps for
#' Dimensionality Reduction and Data Representation. Neural
#' Computation 15, 1373.
#'
#' @examples
#' dat <- loadDataSet("3D S Curve")
#' emb <- embed(dat, "LaplacianEigenmaps")
#' plot(emb@data@data)
#'
#' @include dimRedResult-class.R
#' @include dimRedMethod-class.R
#' @export LaplacianEigenmaps
#' @exportClass LaplacianEigenmaps
LaplacianEigenmaps <- setClass(
"LaplacianEigenmaps",
contains = "dimRedMethod",
prototype = list(
stdpars = list(ndim = 2, sparse = "knn", knn = 50, eps = 0.1,
t = Inf, norm = TRUE),
fun = function (data, pars,
keep.org.data = TRUE) {
chckpkg("loe")
chckpkg("RSpectra")
chckpkg("Matrix")
meta <- data@meta
orgdata <- if (keep.org.data) data@data else NULL
indata <- data@data
if (is.null(pars$d)) pars$d <- dist
if (is.null(pars$knn)) pars$knn <- 50
if (is.null(pars$ndim)) pars$ndim <- 2
if (is.null(pars$t)) pars$t <- Inf
if (is.null(pars$norm)) pars$norm <- TRUE
message(Sys.time(), ": Creating weight matrix")
W <- if (pars$sparse == "knn") {
knng <- makeKNNgraph(indata, k = pars$knn, eps = 0,
diag = TRUE)
if (is.infinite(pars$t)){
knng <- igraph::set_edge_attr(knng, name = "weight", value = 1)
} else {
ea <- igraph::edge_attr(knng, name = "weight")
knng <- igraph::set_edge_attr(
knng, name = "weight", value = exp( -(ea ^ 2) / pars$t ))
}
igraph::as_adj(knng, sparse = TRUE,
attr = "weight", type = "both")
} else if (pars$sparse == "eps") {
tmp <- makeEpsSparseMatrix(indata, pars$eps)
tmp@x <- if (is.infinite(pars$t)) rep(1, length(tmp@i))
else exp(- (tmp@x ^ 2) / pars$t)
## diag(tmp) <- 1
as(tmp, "dgCMatrix")
} else { # dense case
tmp <- dist(indata)
tmp[] <- if (is.infinite(pars$t)) 1
else exp( -(tmp ^ 2) / pars$t)
tmp <- as.matrix(tmp)
diag(tmp) <- 1
tmp
}
## we don't need to test for symmetry, because we know the
## matrix is symmetric
D <- Matrix::Diagonal(x = Matrix::rowSums(W))
L <- D - W
## for the generalized eigenvalue problem, we do not have a solver
## use A u = \lambda B u
## Lgen <- Matrix::Diagonal(x = 1 / Matrix::diag(D) ) %*% L
## but then we get negative eigenvalues and complex eigenvalues
Lgen <- L
message(Sys.time(), ": Eigenvalue decomposition")
outdata <- if (pars$norm) {
DS <- Matrix::Diagonal(x = 1 / sqrt(Matrix::diag(D)))
RSpectra::eigs_sym(DS %*% Lgen %*% DS,
k = pars$ndim + 1,
sigma = -1e-5)
} else {
RSpectra::eigs_sym(Lgen,
k = pars$ndim + 1,
sigma = -1e-5)
}
message("Eigenvalues: ", paste(format(outdata$values),
collapse = " "))
## The eigenvalues are in decreasing order and we remove the
## smallest, which should be approx 0:
outdata <- outdata$vectors[, order(outdata$values)[-1],
drop = FALSE]
if(ncol(outdata) > 0) {
colnames(outdata) <- paste0("LEIM", seq_len(ncol(outdata)))
} else {
warning("no dimensions left, this is probably due to a badly conditioned eigenvalue decomposition.")
}
message(Sys.time(), ": DONE")
return(new(
"dimRedResult",
data = new("dimRedData",
data = outdata,
meta = meta),
org.data = orgdata,
has.org.data = keep.org.data,
method = "leim",
pars = pars
))
})
)
dimRed/NEWS.md 0000644 0001762 0000144 00000001431 13464507204 012560 0 ustar ligges users # dimRed 0.2.1 and 0.2.2
* Bugfix releases to pass CRAN tests
# dimRed 0.2.0
* Added the R-Journal [paper](https://journal.r-project.org/archive/2018/RJ-2018-039/index.html "dimRed and coRanking") as Vignette
* Added UMAP
* Added NMF (thanks @topepo)
* Added the possibility to return other data such as distance
matrices/eigenvalues
* Added Autoencoder
* Added l1 PCA
* Added `getNDim`
* Added an `ndim` parameter to many quality functions.
* fixed bug in kPCA if inverse was not computable.
* added autoencoder
# dimRed 0.1.0
* Fixed kPCA predict function and documentation typos (@topepo #2)
* Added predict and inverse functions
* Added a function to extract rotation matrices from PCA and FastICA
# dimRed 0.0.3
* First version on CRAN
dimRed/MD5 0000644 0001762 0000144 00000015137 14153365332 012002 0 ustar ligges users 8fb6bb466b44be0ffffde3351503ed68 *DESCRIPTION
ae5a59342168733d9988af23c3ca4c2a *LICENSE
2dd5c5db35ffa4df3b59cc134a5382ad *NAMESPACE
796f16f5f6b46687e1983c5506c7f0e5 *NEWS.md
14bff94b02dc99cbfa7807b0b41e50e2 *R/autoencoder.R
d4e3e654a96439e20fb41d3e10985af0 *R/dataSets.R
52c76dd9c2140c9858f3552801ff6e68 *R/diffmap.R
9e17b49bd7954ac4f67b9cec33eac1b9 *R/dimRed.R
0b64904774785f20eea821044c0d004f *R/dimRedData-class.R
9012058299b382d56cb9a4cbbef96be2 *R/dimRedMethod-class.R
3fb979a5be13d3e18b4dde7ddf0506b0 *R/dimRedResult-class.R
e1b1c9e87513e7d4705876c8fc993b56 *R/drr.R
78dd0eb23aca3ff89346da8e47e4aa1e *R/embed.R
9dd870508ff1e5f4af4b9bc2b81979b6 *R/fastica.R
1ef49b08102bfb5f9fada19d0d6e928f *R/get_info.R
d0b0b1fb18b8534e0cfde6a8a0590551 *R/graph_embed.R
83fed7b8134e99d95ed390be193a9c15 *R/hlle.R
a8b7fea4d6c2d257fba19c3a4bbb2f8a *R/isomap.R
cbe329e1211e5af2490deba0e55a8d2c *R/kpca.R
e7aa15524f1ea9c939ca393835ecc192 *R/l1pca.R
d2ecb14abbe9b6b9f9845760a85f14f9 *R/leim.R
c04a9856b551df5080475fadcaafc241 *R/lle.R
d188fc370cdc5ea1094613ec0b3972da *R/loe.R
b60460a99bd739a082780629c886ff4c *R/mds.R
eb62d31d8645997200186731444ff667 *R/misc.R
c115a187e0c2cbbb12219ac0db73e66d *R/mixColorSpaces.R
2ddd29afc005c76d5b20abebc9c688e6 *R/nmds.R
792917d6a6d66001518646ea6defd557 *R/nnmf.R
f8a40db8dd043d54d357b3a916de8ffe *R/pca.R
f99544db6dff93b3f27f6cba2043695a *R/plot.R
0dd086f5a53d9118e2de93929ac895f9 *R/quality.R
e9a61868c99870bbcc03e8f6e4db0fc3 *R/rotate.R
546e6d5cf4d954c002b0e5d2031eb69f *R/soe.R
cc2c350bbfbca7b74f46d2f3a8c1f0a1 *R/tsne.R
a7ca7aa36b8ab73303354939b77029bb *R/umap.R
7e8d2039a421b5a819163feba460b5e7 *build/vignette.rds
941a4ba018d10bb388f78d2a47a93147 *inst/CITATION
2ab520458263e4a5fbd9ab5d1e14382e *inst/doc/dimensionality-reduction.R
08f8cffd3a53d83eefb2a7a18e3a8cae *inst/doc/dimensionality-reduction.Rnw
c5aa1da66d33de1503ce4b9c9a2de474 *inst/doc/dimensionality-reduction.pdf
bcd35ac3b1f65e2b07692ffb9a7155ae *man/AUC_lnK_R_NX-dimRedResult-method.Rd
065011a39f409eb9759f3001651523f8 *man/AutoEncoder-class.Rd
705d899d9095a33217dfa39df2323f96 *man/DRR-class.Rd
5519095e624e3e204b5e9208085e7fb4 *man/DiffusionMaps-class.Rd
abd2c7b51ada0b47af339c8357f58398 *man/DrL-class.Rd
90711cddd66d9c9ba710483e9e5cf5d2 *man/FastICA-class.Rd
e253b808cc55932ee2e93b4e2192eceb *man/FruchtermanReingold-class.Rd
7bad8e8a2100f98ca9c7e48a1e2ee614 *man/HLLE-class.Rd
e0eb458fdbd0c33dbb7ca4a7ab17f910 *man/Isomap-class.Rd
abdb364c81b67ab0bb3d6ef690284fce *man/KamadaKawai-class.Rd
e66ca8bdfe51f81a92791247fe1d8d2e *man/LCMC-dimRedResult-method.Rd
0e8eaaf5982cb1cda90011cf41da125f *man/LLE-class.Rd
34a958e7e8f907658558535107b36ca2 *man/LaplacianEigenmaps-class.Rd
9687a6e405057c15012ede04b316a198 *man/MDS-class.Rd
594258b8dc019d834d58f49f53e36368 *man/NNMF-class.Rd
329c88ea12f4db10f2e8d2b7af19a249 *man/PCA-class.Rd
1a0538d0bd1f1e096014b59b6545cb0f *man/PCA_L1-class.Rd
d2a64ee29016f6e9c737fecc37608b44 *man/Q_NX-dimRedResult-method.Rd
673691e8e68075b3d22249960a58ed9c *man/Q_global-dimRedResult-method.Rd
db781cbd1ca24c2333ffb1a19c33d0ab *man/Q_local-dimRedResult-method.Rd
c80cf369e22ae6e24de57a6ecfa3439a *man/R_NX-dimRedResult-method.Rd
6182a3d84c9cce8e1e8fbb4c22fa5d8f *man/UMAP-class.Rd
718f4e21d3332957a02215ba01e9125f *man/as.data.frame.Rd
5b85a0e6b1a89a9660bd9e48ab4dd58f *man/as.dimRedData.Rd
7d8095baec2b3d9adafadf37e6048785 *man/cophenetic_correlation-dimRedResult-method.Rd
419144b1768266c642462c3c959788ff *man/dataSets.Rd
f1a7b217df5585e3800a558d72a3df7d *man/dimRed-package.Rd
25963f304d114b9142d96f500419b841 *man/dimRedData-class.Rd
addaab77dc1c4ab13e424987a84ee34b *man/dimRedMethod-class.Rd
3eedfbdf1b1133896a9ef58ce47a6d1b *man/dimRedMethodList.Rd
dd02f619260c835191b8fd4d36d85fb1 *man/dimRedResult-class.Rd
8245ced1f3f0de4235b11f84b1a54024 *man/distance_correlation-dimRedResult-method.Rd
a6c062760361e192c7b614bbe9db1b2b *man/embed.Rd
af8863f32422517f0758352f5773d916 *man/getData.Rd
beb9c9c0b2b9069d2cfde3a7ef78f810 *man/getDimRedData.Rd
3d700516a933d7d8fe95cebd9e3e0365 *man/getMeta.Rd
966f27a61120f340e6967930d5a4dae4 *man/getNDim.Rd
54af2a8843ad16e4c8daf4b75f80fd51 *man/getOrgData.Rd
47659991b1edf53bbf1a544d8cbfc231 *man/getOtherData.Rd
b6a82a60362da823b309497c27b54d61 *man/getPars.Rd
2490bb8ba610ace97954566d69493448 *man/getRotationMatrix.Rd
6df2e37941539cd6055499ba4a0eca83 *man/installSuggests.Rd
a4bc899053dedf014db21d12324b9456 *man/kPCA-class.Rd
e6fda0f5f8483f08ff20d467eab06cac *man/makeKNNgraph.Rd
debb38427c3c0f1d3c3559ad66a3269e *man/maximize_correlation-dimRedResult-method.Rd
889d398a1978c51f13a5319bc6e77ce7 *man/mean_R_NX-dimRedResult-method.Rd
8e8c844dc330706d5ed9331782d6912a *man/mixColorRamps.Rd
b0909632cd8b7d9b9c893699ff10758a *man/nMDS-class.Rd
d129fc72b1b204e139d23885ccd064bc *man/ndims.Rd
9b29258e3f3c712a2722c57e395aeb53 *man/plot.Rd
eb1cb8345ec44a88cb210318aca2f346 *man/plot_R_NX.Rd
ecdac99d07501417ba48b07a28ec5f5d *man/print.Rd
3cfba09d8ba772ac2b17cf8240391f77 *man/quality.Rd
1fe05b1ae61fd475a4b39db0176ae977 *man/reconstruction_error-dimRedResult-method.Rd
bc566baaf50a52c8765a4ff8803e6a6b *man/reconstruction_rmse-dimRedResult-method.Rd
35017e641ea3990b2f39deb7f19e1621 *man/tSNE-class.Rd
b8152e44f03d5cfa9046bef699bea694 *man/total_correlation-dimRedResult-method.Rd
8ace2d0542826960bc7b59ac72f45236 *tests/testthat.R
5d2a1830e69ab3855dee09cbc64d1788 *tests/testthat/test_HLLE.R
38f08498979faa5581533adbd4bffbcd *tests/testthat/test_ICA.R
3268fc53ab376b48bb9f6e982edf8655 *tests/testthat/test_NNMF.R
ac423f8fe5975d8fe26d6e7a1b988da9 *tests/testthat/test_PCA.R
81ac0e38a036949d78ce64a071934063 *tests/testthat/test_PCA_L1.R
977a45bcedfe70de65d7bb0627c6dbbd *tests/testthat/test_all.R
aea4f4374a64f0a754351aa040995d3d *tests/testthat/test_autoencoder.R
eb2b0ed077c46c957b4069250febb644 *tests/testthat/test_dataSets.R
8edfeba82c3cf5d4f8d4155988d32ba4 *tests/testthat/test_diffmap.R
f9ecd97a5a0d177661d7ee22a051bf19 *tests/testthat/test_dimRedData.R
c9add493b83df66c3509203545016ea2 *tests/testthat/test_dimRedMethod-class.R
ff7b55fe97717cdc7f3dc140c110dc5e *tests/testthat/test_dimRedResult.R
5f32a62cc46be17283bb8acb2540d627 *tests/testthat/test_drr.R
84413e591356fb49d6b15763c66fd0e9 *tests/testthat/test_embed.R
e9b780f2fe15468d89d547e41dba6ca1 *tests/testthat/test_isomap.R
d6b6bf63d34d8ff812d45ba834032333 *tests/testthat/test_kPCA.R
a82999dfbdecc5dade5a2804181fc1a4 *tests/testthat/test_misc.R
c97bf6676400f0a0e2c3886bdf877765 *tests/testthat/test_quality.R
1280e99c0257255b1346627c897dd23b *tests/testthat/test_umap.R
085333b04fa2e0963ce84308776bd7c1 *vignettes/Makefile
6b21c813361c55cf8b7361b7a0b3bacf *vignettes/bibliography.bib
b3b0dc9c51a39436ebbc3895a9b2f9f6 *vignettes/classification_tree.tex
08f8cffd3a53d83eefb2a7a18e3a8cae *vignettes/dimensionality-reduction.Rnw
dimRed/inst/ 0000755 0001762 0000144 00000000000 14153220135 012427 5 ustar ligges users dimRed/inst/doc/ 0000755 0001762 0000144 00000000000 14153220135 013174 5 ustar ligges users dimRed/inst/doc/dimensionality-reduction.R 0000644 0001762 0000144 00000015004 14153217725 020354 0 ustar ligges users ## ----"pca_isomap_example",include=FALSE,fig.width=4,fig.height=4--------------
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed); library(ggplot2); #library(dplyr); library(tidyr)
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 1000)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## plot data set, embeddings, and quality analysis
## plot(data_set, type = "3vars")
## lapply(data_emb, plot, type = "2vars")
## plot_R_NX(data_emb)
add_label <- function(label)
grid::grid.text(label, 0.2, 1, hjust = 0, vjust = 1,
gp = grid::gpar(fontface = "bold",
cex = 1.5))
## pdf('~/phd/text/dimRedPackage/plots/plot_example.pdf', width = 4, height = 4)
## plot the results
plot(data_set, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("a")
par(mar = c(4, 4, 0, 0) + 0.1, bty = "n", las = 1)
plot(data_emb$Isomap, type = "2vars", pch = 16)
add_label("b")
plot(data_emb$PCA, type = "2vars", pch = 16)
add_label("d")
## calculate quality scores
print(
plot_R_NX(data_emb) +
theme(legend.title = element_blank(),
legend.position = c(0.5, 0.1),
legend.justification = c(0.5, 0.1))
)
add_label("c")
} else {
# These cannot all be plot(1:10)!!! It's a mistery to me.
plot(1:10)
barplot(1:10)
hist(1:10)
plot(1:10)
}
## ----eval=FALSE---------------------------------------------------------------
# ## define which methods to apply
# embed_methods <- c("Isomap", "PCA")
# ## load test data set
# data_set <- loadDataSet("3D S Curve", n = 1000)
# ## apply dimensionality reduction
# data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
# names(data_emb) <- embed_methods
# ## figure \ref{fig:plotexample}a, the data set
# plot(data_set, type = "3vars")
# ## figures \ref{fig:plotexample}b (Isomap) and \ref{fig:plotexample}d (PCA)
# lapply(data_emb, plot, type = "2vars")
# ## figure \ref{fig:plotexample}c, quality analysis
# plot_R_NX(data_emb)
## ----include=FALSE------------------------------------------------------------
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed)
library(cccd)
## Load data
ss <- loadDataSet("3D S Curve", n = 500)
## Parameter space
kk <- floor(seq(5, 100, length.out = 40))
## Embedding over parameter space
emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
## Quality over embeddings
qual <- sapply(emb, function(x) quality(x, "Q_local"))
## Find best value for K
ind_max <- which.max(qual)
k_max <- kk[ind_max]
add_label <- function(label){
par(xpd = TRUE)
b = par("usr")
text(b[1], b[4], label, adj = c(0, 1), cex = 1.5, font = 2)
par(xpd = FALSE)
}
names(qual) <- kk
}
## ----"select_k",include=FALSE,fig.width=11,fig.height=5-----------------------
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 2),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
plot(kk, qual, type = "l", xlab = "k", ylab = expression(Q[local]), bty = "n")
abline(v = k_max, col = "red")
add_label("a")
plot(ss, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("b")
} else {
plot(1:10)
plot(1:10)
}
## ----"knngraphs",include=FALSE,fig.width=8,fig.height=3-----------------------
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 3),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
add_knn_graph <- function(ind) {
nn1 <- nng(ss@data, k = kk[ind])
el <- get.edgelist(nn1)
segments(x0 = emb[[ind]]@data@data[el[, 1], 1],
y0 = emb[[ind]]@data@data[el[, 1], 2],
x1 = emb[[ind]]@data@data[el[, 2], 1],
y1 = emb[[ind]]@data@data[el[, 2], 2],
col = "#00000010")
}
plot(emb[[2]]@data@data, type = "n", bty = "n")
add_knn_graph(2)
points(emb[[2]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("c")
plot(emb[[ind_max]]@data@data, type = "n", bty = "n")
add_knn_graph(ind_max)
points(emb[[ind_max]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("d")
plot(emb[[length(emb)]]@data@data, type = "n", bty = "n")
add_knn_graph(length(emb))
points(emb[[length(emb)]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("e")
} else {
plot(1:10)
plot(1:10)
plot(1:10)
}
## ----eval=FALSE---------------------------------------------------------------
# ## Load data
# ss <- loadDataSet("3D S Curve", n = 500)
# ## Parameter space
# kk <- floor(seq(5, 100, length.out = 40))
# ## Embedding over parameter space
# emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
# ## Quality over embeddings
# qual <- sapply(emb, function(x) quality(x, "Q_local"))
# ## Find best value for K
# ind_max <- which.max(qual)
# k_max <- kk[ind_max]
## ----"plot_quality",include=FALSE---------------------------------------------
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
embed_methods <- dimRedMethodList()
quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
"cophenetic_correlation")
iris_data <- loadDataSet("Iris")
quality_results <- matrix(
NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods)
)
embedded_data <- list()
for (e in embed_methods) {
try(embedded_data[[e]] <- embed(iris_data, e))
for (q in quality_methods)
try(quality_results[e,q] <- quality(embedded_data[[e]], q))
}
quality_results <- quality_results[order(rowMeans(quality_results)), ]
palette(c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e"))
col_hsv <- rgb2hsv(col2rgb(palette()))
## col_hsv["v", ] <- col_hsv["v", ] * 3 / 1
palette(hsv(col_hsv["h",], col_hsv["s",], col_hsv["v",]))
par(mar = c(2, 8, 0, 0) + 0.1)
barplot(t(quality_results), beside = TRUE, col = 1:4,
legend.text = quality_methods, horiz = TRUE, las = 1,
cex.names = 0.85,
args.legend = list(x = "topleft", bg = "white", cex = 0.8))
} else {
plot(1:10)
}
## ----eval=FALSE---------------------------------------------------------------
# embed_methods <- dimRedMethodList()
# quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
# "cophenetic_correlation")
# scurve <- loadDataSet("3D S Curve", n = 2000)
# quality_results <- matrix(
# NA, length(embed_methods), length(quality_methods),
# dimnames = list(embed_methods, quality_methods)
# )
#
# embedded_data <- list()
# for (e in embed_methods) {
# embedded_data[[e]] <- embed(scurve, e)
# for (q in quality_methods)
# try(quality_results[e, q] <- quality(embedded_data[[e]], q))
# }
dimRed/inst/doc/dimensionality-reduction.Rnw 0000644 0001762 0000144 00000173524 13371631672 020737 0 ustar ligges users \documentclass{article}
%\VignetteEngine{knitr::knitr}
%\VignetteIndexEntry{Dimensionality Reduction}
%\VignetteKeyword{Dimensionality Reduction}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{hyperref}
\usepackage{amsmath,amssymb}
\usepackage{booktabs}
\usepackage{tikz}
\usetikzlibrary{trees}
\usepackage[sectionbib,round]{natbib}
\title{\pkg{dimRed} and \pkg{coRanking}---Unifying Dimensionality Reduction in R}
\author{Guido Kraemer \and Markus Reichstein \and Miguel D.\ Mahecha}
% these are taken from RJournal.sty:
\makeatletter
\DeclareRobustCommand\code{\bgroup\@noligs\@codex}
\def\@codex#1{\texorpdfstring%
{{\normalfont\ttfamily\hyphenchar\font=-1 #1}}%
{#1}\egroup}
\newcommand{\kbd}[1]{{\normalfont\texttt{#1}}}
\newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}}
\DeclareRobustCommand\samp{`\bgroup\@noligs\@sampx}
\def\@sampx#1{{\normalfont\texttt{#1}}\egroup'}
\newcommand{\var}[1]{{\normalfont\textsl{#1}}}
\let\env=\code
\newcommand{\file}[1]{{`\normalfont\textsf{#1}'}}
\let\command=\code
\let\option=\samp
\newcommand{\dfn}[1]{{\normalfont\textsl{#1}}}
% \acronym is effectively disabled since not used consistently
\newcommand{\acronym}[1]{#1}
\newcommand{\strong}[1]{\texorpdfstring%
{{\normalfont\fontseries{b}\selectfont #1}}%
{#1}}
\let\pkg=\strong
\newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}%
\let\cpkg=\CRANpkg
\newcommand{\ctv}[1]{\href{https://CRAN.R-project.org/view=#1}{\emph{#1}}}
\newcommand{\BIOpkg}[1]{\href{https://www.bioconductor.org/packages/release/bioc/html/#1.html}{\pkg{#1}}}
\makeatother
\begin{document}
\maketitle
\abstract{ %
This document is based on the manuscript of \citet{kraemer_dimred_2018} which
was published in the R-Journal and has been modified and extended to fit the
format of a package vignette and to match the extended functionality of the
\pkg{dimRed} package.
``Dimensionality reduction'' (DR) is a widely used approach to find low
dimensional and interpretable representations of data that are natively
embedded in high-dimensional spaces. %
DR can be realized by a plethora of methods with different properties,
objectives, and, hence, (dis)advantages. The resulting low-dimensional data
embeddings are often difficult to compare with objective criteria. %
Here, we introduce the \CRANpkg{dimRed} and \CRANpkg{coRanking} packages for
the R language. %
These open source software packages enable users to easily access multiple
classical and advanced DR methods using a common interface. %
The packages also provide quality indicators for the embeddings and easy
visualization of high dimensional data. %
The \pkg{coRanking} package provides the functionality for assessing DR methods in the
co-ranking matrix framework. %
In tandem, these packages allow for uncovering complex structures high
dimensional data. %
Currently 15 DR methods are available in the package, some of which were not
previously available to R users. %
Here, we outline the \pkg{dimRed} and \pkg{coRanking} packages and
make the implemented methods understandable to the interested reader. %
}
\section{Introduction}
\label{sec:intro}
Dimensionality Reduction (DR) essentially aims to find low dimensional
representations of data while preserving their key properties. %
Many methods exist in literature, optimizing different criteria: %
maximizing the variance or the statistical independence of the projected data, %
minimizing the reconstruction error under different constraints, %
or optimizing for different error metrics, %
just to name a few. %
Choosing an inadequate method may imply that much of the underlying structure
remains undiscovered. %
Often the structures of interest in a data set can be well represented by fewer
dimensions than exist in the original data. %
Data compression of this kind has the additional benefit of making the encoded
information better conceivable to our brains for further analysis tasks
like classification or regression problems. %
For example, the morphology of a plant's leaves, stems, and seeds reflect the
environmental conditions the species usually grow in (e.g.,\ plants with large
soft leaves will never grow in a desert but might have an advantage in a humid
and shadowy environment). %
Because the morphology of the entire plant depends on the environment, many
morphological combinations will never occur in nature and the morphological
space of all plant species is tightly constrained. %
\citet{diaz_global_2016} found that out of six observed morphological characteristics
only two embedding dimensions were enough to represent three quarters of the totally
observed variability. %
DR is a widely used approach for the detection of structure in multivariate
data, and has applications in a variety of fields. %
In climatology, DR is used to find the modes of some phenomenon, e.g.,\ the first
Empirical Orthogonal Function of monthly mean sea surface temperature of a given
region over the Pacific is often linked to the El Ni\~no Southern
Oscillation or
ENSO \citep[e.g.,\ ][]{hsieh_nonlinear_2004}. %
In ecology the comparison of sites with different species abundances is a
classical multivariate problem: each observed species adds an extra dimension,
and because species are often bound to certain habitats, there is a lot of
redundant information. Using DR is a popular technique to represent the sites in
few dimensions, e.g.,\ \citet{aart_distribution_1972} matches wolfspider
communities to habitat and \citet{morrall_soil_1974} match soil fungi data to
soil types. (In ecology the general name for DR is ordination or indirect
gradient analysis.) %
Today, hyperspectral satellite imagery collects so many bands that it is very
difficult to analyze and interpret the data directly. %
Resuming the data into a set of few, yet independent, components is one way to
reduce complexity \citep[e.g.,\ see][]{laparra_dimensionality_2015}. %
DR can also be used to visualize the interiors of deep neural networks
\citep[e.g.,\ see ][]{han_deep_2016}, where the high dimensionality comes from
the large number of weights used in a neural network and convergence can be
visualized by means of DR\@. %
We could find many more example applications here but this is not the main focus
of this publication. %
The difficulty in applying DR is that each DR method is designed to maintain
certain aspects of the original data and therefore may be appropriate for one
task and inappropriate for another. %
Most methods also have parameters to tune and follow different assumptions. The
quality of the outcome may strongly depend on their tuning, which adds
additional complexity. %
DR methods can be modeled after physical models with attracting and repelling
forces (Force Directed Methods), projections onto low dimensional planes (PCA,
ICA), divergence of statistical distributions (SNE family), or the reconstruction
of local spaces or points by their neighbors (LLE). %
As an example for how changing internal parameters of a method can have a great
impact, the breakthrough for Stochastic Neighborhood Embedding (SNE) methods
came when a Student's $t$-distribution was used instead of a normal distribution
to model probabilities in low dimensional space to avoid the ``crowding
problem'', that is,\ a sphere in high dimensional space has a much larger volume
than in low dimensional space and may contain too many points to be represented
accurately in few dimensions. %
The $t$-distribution, allows medium distances to be accurately represented in
few dimensions by larger distances due to its heavier tails. %
The result is called in $t$-SNE and is especially good at preserving local
structures in very few dimensions, this feature made $t$-SNE useful for a wide
array of data visualization tasks and the method became much more popular than
standard SNE (around six times more citations of
\citet{van_der_maaten_visualizing_2008} compared to
\citet{hinton_stochastic_2003} in Scopus \citep{noauthor_scopus_nodate}). %
There are a number of software packages for other languages providing collections of methods: In
Python there is scikit-learn \citep{scikit-learn}, which contains a module for
DR. In Julia we currently find ManifoldLearning.jl for nonlinear and
MultivariateStats.jl for linear DR methods. %
There are several toolboxes for DR implemented in Matlab
\citep{van_der_maaten_dimensionality_2009,
arenas-garcia_kernel_2013}. The Shogun
toolbox \citep{soeren_sonnenburg_2017_1067840} implements a variety of methods for
dimensionality reduction in C++ and offers bindings for a many common high level
languages (including R, but the installation is anything but simple, as
there is no CRAN package). %
However, there is no comprehensive package for R and none of the former
mentioned software packages provides means to consistently compare the quality
of different methods for DR. %
For many applications it can be difficult to objectively find the right method
or parameterization for the DR task. %
This paper presents the \pkg{dimRed} and \pkg{coRanking} packages for
the popular programming language R. Together, they
provide a standardized interface to various dimensionality reduction methods and quality
metrics for embeddings. They are implemented using the S4 class system
of R, making the packages
both easy to use and to extend.
The design goal for these packages is to enable researchers, who may not necessarily be experts in DR, to
apply the methods in their own work and to objectively identify the
most suitable
methods for their data. %
This paper provides an overview of the methods collected in the
packages and contains examples as to how
to use the packages. %
The notation in this paper will be as follows: $X = [x_i]_{1\leq i \leq n}^T \in
\mathbb{R}^{n\times p}$, and the observations $x_i \in \mathbb{R}^p$. %
These observations may be transformed prior to the dimensionality reduction
step (e.g.,\ centering and/or standardization) resulting in $X' = [x'_i]_{1\leq i
\leq n}^T \in \mathbb{R}^{n\times p}$. %
A DR method then embeds each vector in $X'$ onto a vector in $Y = [y_i]_{1\leq i
\leq n}^T \in \mathbb{R}^{n\times q}$ with $y_i \in \mathbb{R}^q$, ideally
with $q \ll p$. %
Some methods provide an explicit mapping $f(x'_i) = y_i$. Some even offer an
inverse mapping $f^{-1}(y_{i}) = \hat x'_{i}$, such that one can reconstruct a
(usually approximate) sample from the low-dimensional representation. %
For some methods, pairwise distances between points are needed, we set $d_{ij} =
d(x_{i}, x_{j})$ and $\hat{d}_{ij} = d(y_i, y_j)$, where $d$ is some appropriate
distance function.
When referring to \code{functions} in the \pkg{dimRed} package or base R simply the
function name is mentioned, functions from other packages are
referenced with their namespace, as with \code{package::function}.
\begin{figure}[htbp]
\centering
\input{classification_tree.tex}
\caption{%
Classification of dimensionality reduction methods. Methods
in bold face are implemented in \pkg{dimRed}.
Modified from \citet{van_der_maaten_dimensionality_2009}.
}\label{fig:classification}
\end{figure}
\section{Dimensionality Reduction Methods}
\label{sec:dimredtec}
In the following section we do not aim for an exhaustive explanation to every
method in \pkg{dimRed} but rather to provide a general idea on how the
methods work. %
An overview and classification of the most commonly used DR methods can be found
in Figure~\ref{fig:classification}.
In all methods, parameters have to be optimized or decisions have to be made,
even if it is just about the preprocessing steps of data. %
The \pkg{dimRed} package tries to make the optimization process for parameters as easy as
possible, but, if possible, the parameter space should be narrowed down using
prior knowledge. %
Often decisions can be made based on theoretical knowledge. For example,\ sometimes an
analysis requires data to be kept in their original scales and sometimes this is
exactly what has to be avoided as when comparing different physical
units. %
Sometimes decisions based on the experience of others can be made, e.g.,\ the
Gaussian kernel is probably the most universal kernel and therefore should be
tested first if there is a choice. %
All methods presented here have the embedding dimensionality, $q$, as a
parameter (or \code{ndim} as a parameter for \code{embed}). %
For methods based on eigenvector decomposition, the result generally does not
depend on the number of dimensions, i.e.,\ the first dimension will be the same,
no matter if we decide to calculate only two dimensions or more. %
If more dimensions are added, more information is maintained, the first
dimension is the most important and higher dimensions are successively less
important. %
This means, that a method based on eigenvalue decomposition only has to be run
once if one wishes to compare the embedding in different dimensions. %
In optimization based methods this is generally not the case, the number of
dimensions has to be chosen a priori, an embedding of 2 and 3 dimensions may
vary significantly, and there is no ordered importance of dimensions. %
This means that comparing dimensions of optimization-based methods is
computationally much more expensive. %
We try to give the computational complexity of the methods. Because of the
actual implementation, computation times may differ largely. %
R is an interpreted language, so all parts of an algorithm that are implemented
in R often will tend to be slow compared to methods that call efficient
implementations in a compiled language. %
Methods where most of the computing time is spent for eigenvalue decomposition
do have very efficient implementations as R uses optimized linear algebra
libraries. Although, eigenvalue decomposition itself does not scale very well in
naive implementations ($\mathcal{O}(n^3)$).
\subsection{PCA}
\label{sec:pca}
Principal Component Analysis (PCA) is the most basic technique for reducing
dimensions. It dates back to \citet{pearson_lines_1901}. PCA finds a linear
projection ($U$) of the high dimensional space into a low dimensional space $Y =
XU$, maintaining maximum variance of the data. It is based on solving the
following eigenvalue problem:
\begin{equation}
(C_{XX}-\lambda_k I)u_k=0\label{eq:pca}
\end{equation}
where $C_{XX} = \frac 1 n X^TX$ is the covariance matrix, $\lambda_k$ and $u_k$
are the $k$-th eigenvalue and eigenvector, and $I$ is the identity matrix. %
The equation has several solutions for different values of $\lambda_k$ (leaving
aside the trivial solution $u_k = 0$). %
PCA can be efficiently applied to large data sets, because it computationally
scales as $\mathcal{O}(np^2 + p^3)$, that is, it scales linearly with the number of
samples and R uses specialized linear algebra libraries for such kind of
computations.
PCA is a rotation around the origin and there exist a forward and inverse
mapping. %
PCA may suffer from a scale problem, i.e.,\ when one variable dominates the
variance simply because it is in a higher scale, to remedy this, the data can be
scaled to zero mean and unit variance, depending on the use case, if this is
necessary or desired. %
Base R implements PCA in the functions \code{prcomp} and \code{princomp}; but
several other implementations exist i.e., \BIOpkg{pcaMethods} from Bioconductor
which implements versions of PCA that can deal with missing data. %
The \pkg{dimRed} package wraps \code{prcomp}.
\subsection{kPCA}
\label{sec:kpca}
Kernel Principal Component Analysis (kPCA) extends PCA to deal with nonlinear
dependencies among variables. %
The idea behind kPCA is to map the data into a high dimensional space using a
possibly non-linear function $\phi$ and then to perform a PCA in this high
dimensional space. %
Some mathematical tricks are used for efficient computation. %
If the columns of X are centered around $0$, then the principal components can
also be computed from the inner product matrix $K = X^TX$. %
Due to this way of calculating a PCA, we do not need to explicitly map all points
into the high dimensional space and do the calculations there, it is enough to
obtain the inner product matrix or kernel matrix $K \in \mathbb{R}^{n\times n}$
of the mapped points \citep{scholkopf_nonlinear_1998}. %
Here is an example calculating the kernel matrix using a Gaussian kernel:
\begin{equation}\label{eq:gauss}
K = \phi(x_i)^T \phi(x_j) = \kappa(x_i, x_j) = \exp\left(
-\frac{\| x_i- x_j\|^2}{2 \sigma^2}
\right),
\end{equation}
where $\sigma$ is a length scale parameter accounting for the width of the
kernel. %
The other trick used is known as the ``representers theorem.'' The interested
reader is referred to \citet{scholkopf_generalized_2001}.
The kPCA method is very flexible and there exist many kernels for special
purposes. The most common kernel function is the Gaussian kernel
(Equation\ \ref{eq:gauss}). %
The flexibility comes at the price that the method has to be finely
tuned for the data set because some parameter combinations are simply
unsuitable for certain data. %
The method is not suitable for very large data sets, because memory
scales with $\mathcal{O}(n^2)$ and computation time with
$\mathcal{O}(n^3)$. %
Diffusion Maps, Isomap, Locally Linear Embedding, and some other techniques can
be seen as special cases of kPCA. In which case, an out-of-sample extension using the Nyström
formula can be applied \citep{bengio_learning_2004}. %
This can also yield applications for bigger data, where an embedding is trained
with a sub-sample of all data and then the data is embedded using the Nyström
formula.
Kernel PCA in R is implemented in the \CRANpkg{kernlab} package using the function
\code{kernlab::kpca}, and supports a number of kernels and
user defined functions. For details see the help page for \code{kernlab::kpca}.
The \pkg{dimRed} package wraps \code{kernlab::kpca} but additionally
provides forward and inverse methods \citep{bakir_learning_2004} which can be
used to fit out-of-sample data or to visualize the transformation of the data
space. %
\subsection{Classical Scaling}
\label{sec:classscale}
What today is called Classical Scaling was first introduced by
\citet{torgerson_multidimensional_1952}. It uses an eigenvalue decomposition of
a transformed distance matrix to find an embedding that maintains the distances
of the distance matrix. %
The method works because of the same reason that kPCA works, i.e.,\ classical
scaling can be seen as a kPCA with kernel $x^Ty$. %
A matrix of Euclidean distances can be transformed into an inner product matrix
by some simple transformations and therefore yields the same result as a
PCA\@. %
Classical scaling is conceptually more general than PCA in that arbitrary
distance matrices can be used, i.e.,\ the method does not even need the original
coordinates, just a distance matrix $D$. %
Then it tries to find an embedding $Y$ so that $\hat d_{ij}$ is as similar to
$d_{ij}$ as possible.
The disadvantage is that it is computationally much more demanding, i.e.,\
an eigenvalue decomposition of an $n\times n$ matrix has to be computed.
This step requires $\mathcal{O}(n^2)$ memory and $\mathcal{O}(n^3)$
computation time, while PCA requires only the eigenvalue decomposition
of a $d\times d$ matrix and usually $n \gg d$. %
R implements classical scaling in the \code{cmdscale}
function. %
The \pkg{dimRed} package wraps \code{cmdscale} and allows the specification
of arbitrary distance functions for calculating the distance matrix. Additionally
a forward method is implemented.
\subsection{Isomap}
\label{sec:isomap}
As Classical Scaling can deal with arbitrarily defined distances,
\citet{tenenbaum_global_2000} suggested to approximate the
structure of the manifold by using geodesic distances. %
In practice, a graph is created by either keeping only
the connections between every point and its $k$ nearest neighbors to
produce a $k$-nearest neighbor graph ($k$-NNG), or simply by keeping all
distances smaller than a value $\varepsilon$ producing an
$\varepsilon$-neighborhood graph ($\varepsilon$-NNG). %
Geodesic distances are obtained by recording the distance on the
graph and classical scaling is used to find an embedding in fewer
dimensions. This leads to an ``unfolding'' of possibly convoluted
structures (see Figure~\ref{fig:knn}).
Isomap's computational cost is dominated by the eigenvalue decomposition and
therefore scales with $\mathcal{O}(n^3)$. %
Other related techniques can use more efficient algorithms
because the distance matrix becomes sparse due to a different preprocessing.
In R, Isomap is implemented in the \CRANpkg{vegan} package. The
\code{vegan::isomap} calculates an Isomap embedding and \code{vegan::isomapdist}
calculates a geodesic distance matrix. %
The \pkg{dimRed} package uses its own implementation. This implementation is
faster mainly due to using a KD-tree for the nearest neighbor search (from the
\CRANpkg{RANN} package) and to a faster implementation for the shortest path
search in the $k$-NNG (from the \CRANpkg{igraph} package). %
The implementation in \pkg{dimRed} also includes a forward method that can be
used to train the embedding on a subset of data points and then use these points
to approximate an embedding for the remaining points. This technique is
generally referred to as landmark Isomap \citep{de_silva_sparse_2004}. %
\subsection{Locally Linear Embedding}
\label{sec:lle}
Points that lie on a manifold in a high dimensional space can be
reconstructed through linear combinations of their neighborhoods if the
manifold is well sampled and the neighbohoods lie on a locally linear
patch. %
These reconstruction weights, $W$, are the same in the high dimensional
space as the internal coordinates of the manifold. %
Locally Linear Embedding \citep[LLE; ][]{roweis_nonlinear_2000} is a
technique that constructs a weight matrix
$W \in \mathbb{R}^{n\times n}$ with elements $w_{ij}$ so that
\begin{equation}
\sum_{i=1}^n \bigg\| x_i-
\sum_{j=1}^{n} w_{ij}x_j \bigg\|^2\label{eq:lle}
\end{equation}
is minimized under the constraint that $w_{ij} = 0 $ if $x_j$ does not belong
to the neighborhood and the constraint that $\sum_{j=1}^n w_{ij} = 1$. %
Finally the embedding is made in such a way that the following cost function is
minimized for $Y$,
\begin{equation}
\sum_{i=1}^n\bigg\| y_i - \sum_{j=1}^n w_{ij}y_j
\bigg\|^2.\label{eq:lle2}
\end{equation}
This can be solved using an eigenvalue decomposition.
Conceptually the method is similar to Isomap but it is
computationally much nicer because the weight matrix is
sparse and there exist efficient solvers. %
In R, LLE is implemented by the package \CRANpkg{lle}, the embedding can be
calculated with \code{lle::lle}.
Unfortunately the implementation does not make use of the sparsity of the weight matrix
$W$. %
The manifold must be well sampled and the neighborhood size must be
chosen appropriately for LLE to give good results. %
\subsection{Laplacian Eigenmaps}
\label{sec:laplaceigenmaps}
Laplacian Eigenmaps were originally developed under the name spectral clustering
to separate non-convex clusters. %
Later it was also used for graph embedding and DR
\citep{belkin_laplacian_2003}. %
A number of variants have been proposed. %
First, a graph is constructed, usually from a distance matrix, the graph can be
made sparse by keeping only the $k$ nearest neighbors, or by specifying an
$\varepsilon$ neighborhood. %
Then, a similarity matrix $W$ is calculated by using a Gaussian kernel (see Equation
\ref{eq:gauss}), if $c = 2 \sigma^2 = \infty$, then all distances are treated
equally, the smaller $c$ the more emphasis is given to differences in
distance. %
The degree of vertex $i$ is $d_i = \sum_{j=1}^n w_{ij}$ and the degree
matrix, $D$, is the diagonal matrix with entries $d_i$. %
Then we can form the graph Laplacian $L = D - W$ and, then, there are several ways how
to proceed, an overview can be found in \citet{luxburg_tutorial_2007}. %
The \pkg{dimRed} package implements the algorithm from
\citet{belkin_laplacian_2003}. Analogously to LLE, Laplacian eigenmaps
avoid computational complexity by creating a sparse matrix and not
having to estimate the distances between all pairs of points. %
Then the eigenvectors corresponding to the lowest eigenvalues larger
than $0$ of either the matrix $L$ or the normalized Laplacian
$D^{-1/2}LD^{-1/2}$ are computed and form the embedding.
\subsection{Diffusion Maps}
\label{sec:isodiffmaplle}
Diffusion Maps \citep{coifman_diffusion_2006} take a distance matrix
as input and calculates the transition probability matrix $P$ of a
diffusion process between the points to approximate the manifold. %
Then the embedding is done by an eigenvalue decompositon of $P$ to
calculate the coordinates of the embedding. %
The algorithm for calculating Diffusion Maps shares some elements with
the way Laplacian Eigenmaps are calculated. %
Both algorithms depart from the same weight matrix, Diffusion Maps
calculate the transition probability on the graph after $t$ time steps
and do the embedding on this probability matrix.
The idea is to simulate a diffusion process between the nodes of the
graph, which is more robust to short-circuiting than the $k$-NNG from
Isomap (see bottom right Figure \ref{fig:knn}). %
Diffusion maps in R are accessible via the
\code{diffusionMap::diffuse()} function, which is available in the
\CRANpkg{diffusionMap} package. %
Additional points can be approximated into an existing embedding using
the Nyström formula \citep{bengio_learning_2004}. %
The implementation in \pkg{dimRed} is based on the
\code{diffusionMap::diffuse} function.
% , which does not contain an
% approximation for unequally sampled manifolds
% \citep{coifman_geometric_2005}. %
\subsection{non-Metric Dimensional Scaling}
\label{sec:nmds}
While Classical Scaling and derived methods (see section
\nameref{sec:classscale}) use eigenvector decomposition to embed the data in
such a way that the given distances are maintained, non-Metric Dimensional
Scaling \citep[nMDS, ][]{kruskal_multidimensional_1964,kruskal_nonmetric_1964}
uses optimization methods to reach the same goal. %
Therefore a stress function,
\begin{equation}
\label{eq:stress}
S = \sqrt{\frac{\sum_{i>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed); library(ggplot2); #library(dplyr); library(tidyr)
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 1000)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## plot data set, embeddings, and quality analysis
## plot(data_set, type = "3vars")
## lapply(data_emb, plot, type = "2vars")
## plot_R_NX(data_emb)
add_label <- function(label)
grid::grid.text(label, 0.2, 1, hjust = 0, vjust = 1,
gp = grid::gpar(fontface = "bold",
cex = 1.5))
## pdf('~/phd/text/dimRedPackage/plots/plot_example.pdf', width = 4, height = 4)
## plot the results
plot(data_set, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("a")
par(mar = c(4, 4, 0, 0) + 0.1, bty = "n", las = 1)
plot(data_emb$Isomap, type = "2vars", pch = 16)
add_label("b")
plot(data_emb$PCA, type = "2vars", pch = 16)
add_label("d")
## calculate quality scores
print(
plot_R_NX(data_emb) +
theme(legend.title = element_blank(),
legend.position = c(0.5, 0.1),
legend.justification = c(0.5, 0.1))
)
add_label("c")
} else {
# These cannot all be plot(1:10)!!! It's a mistery to me.
plot(1:10)
barplot(1:10)
hist(1:10)
plot(1:10)
}
@
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-1.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-2.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-3.pdf}
\includegraphics[page=1,width=.45\textwidth]{figure/pca_isomap_example-4.pdf}
\caption[dimRed example]{%
Comparing PCA and Isomap: %
(a) An S-shaped manifold, colors represent the internal coordinates of the
manifold. %
(b) Isomap embedding, the S-shaped manifold is unfolded. %
(c) $R_{NX}$ plotted agains neighborhood sizes, Isomap is much better at
preserving local distances and PCA is better at preserving global Euclidean
distances. %
The numbers on the legend are the $\text{AUC}_{1 / K}$.
(d) PCA projection of the data, the directions of maximum variance are preserved. %
}\label{fig:plotexample}
\end{figure}
<>=
## define which methods to apply
embed_methods <- c("Isomap", "PCA")
## load test data set
data_set <- loadDataSet("3D S Curve", n = 1000)
## apply dimensionality reduction
data_emb <- lapply(embed_methods, function(x) embed(data_set, x))
names(data_emb) <- embed_methods
## figure \ref{fig:plotexample}a, the data set
plot(data_set, type = "3vars")
## figures \ref{fig:plotexample}b (Isomap) and \ref{fig:plotexample}d (PCA)
lapply(data_emb, plot, type = "2vars")
## figure \ref{fig:plotexample}c, quality analysis
plot_R_NX(data_emb)
@
The function \code{plot\_R\_NX} produces a figure that plots the neighborhood
size ($k$ at a log-scale) against the quality measure $\text{R}_{NX}(k)$ (see
Equation \ref{eq:rnx}). %
This gives an overview of the general behavior of methods: if $\text{R}_{NX}$ is
high for low values of $K$, then local neighborhoods are maintained well; if
$\text{R}_{NX}$ is high for large values of $K$, then global gradients are
maintained well. %
It also provides a way to directly compare methods by plotting more than one
$\text{R}_{NX}$ curve and an overall quality of the embedding by taking the area
under the curve as an indicator for the overall quality of the embedding (see
fig~\ref{eq:auclnk}) which is shown as a number in the legend.
Therefore we can see from Figure~\ref{fig:plotexample}c that $t$-SNE is very good a
maintaining close and medium distances for the given data set, whereas PCA is only
better at maintaining the very large distances. %
The large distances are dominated by the overall bent shape of the S in 3D
space, while the close distances are not affected by this bending. %
This is reflected in the properties recovered by the different methods, the PCA
embedding recovers the S-shape, while $t$-SNE ignores the S-shape and recovers
the inner structure of the manifold.
% Example 2:
Often the quality of an embedding strongly depends on the choice of parameters,
the interface of \pkg{dimRed} can be used to facilitate searching the
parameter space.
Isomap has one parameter $k$ which determines
the number of neighbors used to construct the $k$-NNG\@. %
If this number is too large, then Isomap will resemble an MDS
(Figure~\ref{fig:knn} e), if the number is too small, the resulting embedding
contains holes (Figure~\ref{fig:knn} c). %
The following code finds the optimal value, $k_{\text{max}}$, for $k$ using the
$Q_{\text{local}}$ criterion, the results are visualized in Figure~\ref{fig:knn}
a:
\begin{figure}[htp]
\centering
<>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
library(dimRed)
library(cccd)
## Load data
ss <- loadDataSet("3D S Curve", n = 500)
## Parameter space
kk <- floor(seq(5, 100, length.out = 40))
## Embedding over parameter space
emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
## Quality over embeddings
qual <- sapply(emb, function(x) quality(x, "Q_local"))
## Find best value for K
ind_max <- which.max(qual)
k_max <- kk[ind_max]
add_label <- function(label){
par(xpd = TRUE)
b = par("usr")
text(b[1], b[4], label, adj = c(0, 1), cex = 1.5, font = 2)
par(xpd = FALSE)
}
names(qual) <- kk
}
@
<<"select_k",include=FALSE,fig.width=11,fig.height=5>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 2),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
plot(kk, qual, type = "l", xlab = "k", ylab = expression(Q[local]), bty = "n")
abline(v = k_max, col = "red")
add_label("a")
plot(ss, type = "3vars", angle = 15, mar = c(3, 3, 0, 0), box = FALSE, grid = FALSE, pch = 16)
add_label("b")
} else {
plot(1:10)
plot(1:10)
}
@
<<"knngraphs",include=FALSE,fig.width=8,fig.height=3>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
par(mfrow = c(1, 3),
mar = c(5, 4, 0, 0) + 0.1,
oma = c(0, 0, 0, 0))
add_knn_graph <- function(ind) {
nn1 <- nng(ss@data, k = kk[ind])
el <- get.edgelist(nn1)
segments(x0 = emb[[ind]]@data@data[el[, 1], 1],
y0 = emb[[ind]]@data@data[el[, 1], 2],
x1 = emb[[ind]]@data@data[el[, 2], 1],
y1 = emb[[ind]]@data@data[el[, 2], 2],
col = "#00000010")
}
plot(emb[[2]]@data@data, type = "n", bty = "n")
add_knn_graph(2)
points(emb[[2]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("c")
plot(emb[[ind_max]]@data@data, type = "n", bty = "n")
add_knn_graph(ind_max)
points(emb[[ind_max]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("d")
plot(emb[[length(emb)]]@data@data, type = "n", bty = "n")
add_knn_graph(length(emb))
points(emb[[length(emb)]]@data@data, col = dimRed:::colorize(ss@meta),
pch = 16)
add_label("e")
} else {
plot(1:10)
plot(1:10)
plot(1:10)
}
@
\includegraphics[width=.95\textwidth]{figure/select_k-1.pdf}
\includegraphics[width=.95\textwidth]{figure/knngraphs-1.pdf}
\caption[estimating $k$ using @Q_\text{local}]{%
Using \pkg{dimRed} and the $Q_\text{local}$ indicator to estimate a
good value for the parameter $k$ in Isomap. %
(a) $Q_\text{local}$ for different values of $k$, the vertical red
line indicates the maximum $k_{\text{max}}$. %
(b) The original data set, a 2 dimensional manifold bent in an
S-shape in 3 dimensional space. %
Bottom row: Embeddings and $k$-NNG for different values of $k$. %
(c) When $k = 5$, the value for $k$ is too small resulting in holes in the
embedding, the manifold itself is still unfolded correctly. %
(d) Choose $k = k_\text{max}$, the best representation of the original
manifold in two dimensions achievable with Isomap. %
(e) $k = 100$, too large, the $k$-NNG does not approximate the manifold
any more. %
}\label{fig:knn}
\end{figure}
<>=
## Load data
ss <- loadDataSet("3D S Curve", n = 500)
## Parameter space
kk <- floor(seq(5, 100, length.out = 40))
## Embedding over parameter space
emb <- lapply(kk, function(x) embed(ss, "Isomap", knn = x))
## Quality over embeddings
qual <- sapply(emb, function(x) quality(x, "Q_local"))
## Find best value for K
ind_max <- which.max(qual)
k_max <- kk[ind_max]
@
Figure~\ref{fig:knn}a shows how the $Q_{\text{local}}$ criterion changes when
varying the neighborhood size $k$ for Isomap, the gray lines in
Figure~\ref{fig:knn} represent the edges of the $k$-NN Graph. %
If the value for $k$ is too low, the inner structure of the manifold will still
be recovered, but it will be imperfect (Figure~\ref{fig:knn}c, note that the holes
appear in places that are not covered by the edges of the $k$-NN Graph),
therefore the $Q_{\text{local}}$ score is lower than optimal. %
If $k$ is too large, the error of the embedding is much larger due to short
circuiting and we observe a very steep drop in the $Q_{\text{local}}$ score. %
The short circuiting can be observed in Figure~\ref{fig:knn}e with the edges that
cross the gap between the tips and the center of the S-shape. %
% Example 3:
It is also very easy to compare across methods and quality scores. %
The following code produces a matrix of quality scores and methods,
where \code{dimRedMethodList} returns a character vector with all methods. A
visualization of the matrix can be found in Figure~\ref{fig:qualityexample}. %
\begin{figure}[htp]
\centering
<<"plot_quality",include=FALSE>>=
if(Sys.getenv("BNET_BUILD_VIGNETTE") != "") {
embed_methods <- dimRedMethodList()
quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
"cophenetic_correlation")
iris_data <- loadDataSet("Iris")
quality_results <- matrix(
NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods)
)
embedded_data <- list()
for (e in embed_methods) {
try(embedded_data[[e]] <- embed(iris_data, e))
for (q in quality_methods)
try(quality_results[e,q] <- quality(embedded_data[[e]], q))
}
quality_results <- quality_results[order(rowMeans(quality_results)), ]
palette(c("#1b9e77", "#d95f02", "#7570b3", "#e7298a", "#66a61e"))
col_hsv <- rgb2hsv(col2rgb(palette()))
## col_hsv["v", ] <- col_hsv["v", ] * 3 / 1
palette(hsv(col_hsv["h",], col_hsv["s",], col_hsv["v",]))
par(mar = c(2, 8, 0, 0) + 0.1)
barplot(t(quality_results), beside = TRUE, col = 1:4,
legend.text = quality_methods, horiz = TRUE, las = 1,
cex.names = 0.85,
args.legend = list(x = "topleft", bg = "white", cex = 0.8))
} else {
plot(1:10)
}
@
\includegraphics[width=.5\textwidth]{figure/plot_quality-1.pdf}
\caption[Quality comparision]{%
A visualization of the \code{quality\_results} matrix. %
The methods are ordered by mean quality score. %
The reconstruction error was omitted, because a higher value means
a worse embedding, while in the present methods a higher score
means a better embedding. %
Parameters were not tuned for the example, therefore it should not
be seen as a general quality assessment of the methods. %
}\label{fig:qualityexample}
\end{figure}
<>=
embed_methods <- dimRedMethodList()
quality_methods <- c("Q_local", "Q_global", "AUC_lnK_R_NX",
"cophenetic_correlation")
scurve <- loadDataSet("3D S Curve", n = 2000)
quality_results <- matrix(
NA, length(embed_methods), length(quality_methods),
dimnames = list(embed_methods, quality_methods)
)
embedded_data <- list()
for (e in embed_methods) {
embedded_data[[e]] <- embed(scurve, e)
for (q in quality_methods)
try(quality_results[e, q] <- quality(embedded_data[[e]], q))
}
@
This example showcases the simplicity with which different methods and quality criteria
can be combined. %
Because of the strong dependencies on parameters it is not advised to apply this
kind of analysis without tuning the parameters for each method separately. %
There is no automatized way to tune parameters in \pkg{dimRed}. %
\section{Conclusion}
\label{sec:conc}
This paper presents the \pkg{dimRed} and \pkg{coRanking} packages and
it provides a brief overview of the methods implemented therein. %
The \pkg{dimRed} package is written in the R language, one of the most popular
languages for data analysis. The package is freely available from CRAN. %
The package is object oriented and completely open source and therefore easily available
and extensible. %
Although most of the DR methods already had implementations in R,
\pkg{dimRed} adds some new methods for dimensionality reduction, and
\pkg{coRanking} adds methods for an independent quality control of DR
methods to the R ecosystem. %
DR is a widely used technique. However, due to the lack of easily usable tools,
choosing the right method for DR is complex and depends upon a variety of factors. %
The \pkg{dimRed} package aims to facilitate experimentation with different
techniques, parameters, and quality measures so that choosing the right method
becomes easier. %
The \pkg{dimRed} package wants to enable the user to objectively compare methods that
rely on very different algorithmic approaches. %
It makes the life of the programmer easier, because all methods are aggregated
in one place and there is a single interface and standardized classes to access
the functionality. %
\section{Acknowledgments}
\label{sec:ack}
We thank Dr.\ G.\ Camps-Valls and an anonymous reviewer for many useful
comments. %
This study was supported by the European Space Agency (ESA) via the Earth System
Data Lab project (\url{http://earthsystemdatacube.org}) and the EU via the H2020
project BACI, grant agreement No 640176. %
\bibliographystyle{abbrvnat}
\bibliography{bibliography}
\end{document}
dimRed/inst/doc/dimensionality-reduction.pdf 0000644 0001762 0000144 00003441270 14153220135 020724 0 ustar ligges users %PDF-1.5
%
1 0 obj
<< /Type /ObjStm /Length 5004 /Filter /FlateDecode /N 95 /First 794 >>
stream
x\kwF&99B_sQ;qdgx?P$(!Hlg~n/Y&%iYꮺ}AYBB
\L
a+/tW[[Yd!TEP`KRaU!BaRqa!EaPYU40h,}Me[x($%$l}U8CיyQ
';I_kL CY^ (}YCQEf4cmRh9
$Y̱q6P
iRJCmd驥 Yq9+Ѝ4a!֢h6h'Hւ.d!YndMj!Y{d*6GBw jYH B9H:H @{S0C3Pϓ84dG!CWt
=yWA$+A
N,u. 5.4зE7q}uUh2FmI~fj"q;:*#\?[D\u狦KiQW/G]U|KA
϶\}8[tzUͤ2-&YL}iqS_vE()9.yqu[5ėu7h+hq5(5VPv<2~.[o
6wOtY:OǕIQp?&9"R2#|ё,&Kyw7)L!s,E\
=TcȁܐY_TD珏kV^"uvEէ#lO5L +9pg~5?2!2_Os <`$qҶJmǫ&GmzˣWϾzq||"{O2!/
;K#jxw?kHkk
Tt
LLti
>3O`kx7C]C8x: B)ҌG'LJQ{ן#;6y
J&V؏FҶqI#%LDٱכٸtRN7
g(Վ)y?߅n|a#}ˎw '=eGN~aد5Svڌլvi!lƋb/.Fl*VWj>lʦfMgl
cga;fu79ͻz6!ԫj-p]D}VaU~g_0bcUYIwĽta-k:V`[`XwT/O/c7w,HҰAiH8gm7q=6>7atQ]O073.vmx|!몋Q[Ǐx
\psom!9їל-91tDOF-7|8l 0{ޡAtj7ɴ0[:J*9|;'D1[Y"ޖ.)rIfűDn1JjrD>2)lj̅]pgHW܉p$w?¹/(^%kK wނ7^E2yeLSQSLIW/@iK>CRى9epEk8U%N$ .5jLJSEAᛠ?M}M^FqoP2?Fomv!م]x)M\Cz.iG 7B
uŮ5Og!ڿrAnV0r2-yr
|A&EdvSH59T>SA;;!ʓգgϞU81[p·>ZfN` a5v؎ٛd)#ҹ
,CJ[.tCF:sWy )O]5
c䪋>FRmB0f\|1ue99/Qⲭ 5
ގ./G tm
Cp^p{Oޯ3$eHgw 7b!@' ~"oO`,%F
"9PI+%U)%^Fa)
K3)%,n5*v y%ߟ^+%ert3lbin4׀RxNCICICICI-&I#I$7IMRfdw쭸[_r$DsKMa;j嶵1-ZKQQA)\xZťw]Rʥw}x:>'5>IIJ.zo.kd<ߦ>pSJ8 Ei)P1)R- ϠIX_$T9
%E=
cWKAA_SZ-i JҎ擖bn'Rao9~hr%rNݶ *O)u
x^'#tcYYɔٔ ؼ!rvˏ=lˍyjJoqOGƕܜ5et>k?{~ֶ4><2sYY̊Vv\S9^YfW؈k+o?6Con8)߯FzF'&j{Vq&s6:4#돩tr?2y6#ѧ
a8Z 15MMzhVR`{@@ví*Ena\TyY!=^je꺡_]
*VZR?[+-\㊸ȜןK)
Ul2aae|R~^jեC'ɉDISg.Ў|ryR,O"ȹɣ٣ ۵qZrgV?'>!9Jo8j}/]2f\E!غ4QNtc"V;XqT!w.ՂէiiYo|IWޚMtQU4A+KJˊxh}j2
',SjT$Kt>EՈebɮRKsgoM<p3
M^%UG7VUWhz$F㋂|d@11t;cv4fmy34/,X#%bK/u/cT5*@{Y
ZxiYE㼤Yݠl51
p" %9լ>-'T{/=mOa;z9B$弾l82PQw4F
B[M販Z\^NXDAz>i@w,,HQb-(ғf:jC9Y9ˑ !8.
~-}˒`inג{1t8k䎔{'[}UovffnBgLi'&_V-hfJ8poWe5)&
pvYV9?CKG\z~`dsޡ
VᴮSѰ9\X#Ɛ!9RwJNt9ZRݚo$Z낲]=>SRwhjBՎ@*wi=MO+Dl:PP {`NTWq5(*a&2xs0lr~:jT\xv#>*0bՇ!7мIrU
^S&՟Ǥ/%-q*{ e{ 3靍{x̥ȳ>FhIX(y4UqyҠ
%UWH^f$lZ,i4I|qC%_Y?Kv
ȷTB5L}yg0iZz@#F&b&4FO%2 2icSƢa û0Nwl%{*LB'uʥm4d,bPyWHonVyxGt֧z[~4i&u5UWM`UӂV5UwJ*=b&mtA>
z?)ZCa;*rf(Abtp3c8gR[ny7ħ!1qw`o")^w[mp{ls Y:h߇ln`L)vMvw8ZutZ5bmdNI[ Ez7ȯGP]kz155jyMTӛjiE4Zbgpk=3ڮ劕ǡZpYendstream
endobj
97 0 obj
<< /Subtype /XML /Type /Metadata /Length 1387 >>
stream
GPL Ghostscript 9.55.0
2021-12-05T21:26:03+01:00
2021-12-05T21:26:03+01:00
LaTeX with hyperref
endstream
endobj
98 0 obj
<< /Type /ObjStm /Length 4623 /Filter /FlateDecode /N 95 /First 900 >>
stream
x\ksF~>V*ޭd;q98rR(
PBR_} EbAx
fzzkzR&3%*3QYg^::,%Li.@6t3,^Lk"fE)ӑjRJfF:*Z]̸Du(+e3kU\fGY$d6j\&~2D@h9q346s1Kz/=wS'fޣu"Gndf<}ntEmfc/㲘<}n|Tī%PO̒
SXFYHFFW`i@h@&~>1,Qn8P&wN[ w=ITr}#R9$`DWgQ33 ޡ`XeF M $(=@qqtG#q_'.pU@;vt7?tDA/$'0pUDMqMG0f1oA5q;;?h)|jL?w~^e_T4+r~m=z^b>>)VTxy&VT?Io
TuYmXʳYUꆳUg[˻Ec%rU\-_uWU|-_uWe ~wOK;-\xxrꛯNx6'K-|XF$X|K}?
A:۳UMBϓxR_gblZY_c
F)\jG}C K3CB{Q`