tm/0000755000175100001440000000000013204066220010703 5ustar hornikuserstm/inst/0000755000175100001440000000000013204065716011671 5ustar hornikuserstm/inst/CITATION0000644000175100001440000000144512074065307013032 0ustar hornikuserscitation(auto = meta) bibentry(bibtype = "Article", title = "Text Mining Infrastructure in R", author = {c(person("Ingo", "Feinerer", email = "feinerer@logic.at"), person("Kurt", "Hornik", email = "Kurt.Hornik@R-project.org"), person("David", "Meyer", email = "David.Meyer@wu.ac.at"))}, year = 2008, journal = "Journal of Statistical Software", volume = 25, number = 5, pages = "1--54", url = "http://www.jstatsoft.org/v25/i05/", month = "March", textVersion = paste("Ingo Feinerer, Kurt Hornik, and David Meyer (2008).", "Text Mining Infrastructure in R.", "Journal of Statistical Software 25(5): 1-54.", "URL: http://www.jstatsoft.org/v25/i05/.")) tm/inst/NEWS.Rd0000644000175100001440000004661713202011132012727 0ustar hornikusers\name{NEWS} \title{News for Package 'tm'} \encoding{UTF-8} \section{Changes in tm version 0.7-2}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item \code{DataframeSource} now only processes data frames with the two mandatory columns \code{"doc_id"} and \code{"text"}. Additional columns are used as document level metadata. This implements compatibility with \emph{Text Interchange Formats} corpora (\url{https://github.com/ropensci/tif}). \item \code{readTabular()} has been removed. Use \code{DataframeSource} instead. \item \code{removeNumbers()} and \code{removePunctuation()} now have an argument \code{ucp} to check for Unicode general categories \code{Nd} (decimal digits) and \code{P} (punctuation), respectively. Contributed by Kurt Hornik. \item The package \pkg{xml2} is now imported for \acronym{XML} functionality instead of the (\acronym{CRAN} maintainer orphaned) package \pkg{XML}. } } \subsection{NEW FEATURES}{ \itemize{ \item \code{Boost_tokenizer} provides a tokenizer based on the Boost (\url{http://www.boost.org}) Tokenizer. } } \subsection{BUG FIXES}{ \itemize{ \item Correctly handle the \code{dictionary} argument when constructing a term-document matrix from a \code{SimpleCorpus} (reported by Joe Corrigan) or from a \code{VCorpus} (reported by Mark Rosenstein). } } } \section{Changes in tm version 0.7-1}{ \subsection{BUG FIXES}{ \itemize{ \item Compilation fixes for Clang's libc++. } } } \section{Changes in tm version 0.7}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item \code{inspect.TermDocumentMatrix()} now displays a sample instead of the full matrix. The full dense representation is available via \code{as.matrix()}. } } \subsection{NEW FEATURES}{ \itemize{ \item \code{SimpleCorpus} provides a corpus which is optimized for the most common usage scenario: importing plain texts from files in a directory or directly from a vector in \R, preprocessing and transforming the texts, and finally exporting them to a term-document matrix. The aim is to boost performance and minimize memory pressure. It loads all documents into memory, and is designed for medium-sized to large data sets. \item \code{inspect()} on text documents as a shorthand for \code{writeLines(as.character())}. \item \code{findMostFreqTerms()} finds most frequent terms in a document-term or term-document matrix, or a vector of term frequencies. \item \code{tm_parLapply()} is now internally used for the parallelization of transformations, filters, and term-document matrix construction. The preferred parallelization engine can be registered via \code{tm_parLapply_engine()}. The default is to use no parallelization (instead of \code{\link[parallel]{mclapply}} (package \pkg{parallel}) in previous versions). } } } \section{Changes in tm version 0.6-2}{ \subsection{BUG FIXES}{ \itemize{ \item \code{format.PlainTextDocument()} now reports only one character count for a whole document. } } } \section{Changes in tm version 0.6-1}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item \code{format.PlainTextDocument()} now displays a compact representation instead of the content. Use \code{as.character()} to obtain the character content (which in turn can be applied to a corpus via \code{lapply()}). } } \subsection{NEW FEATURES}{ \itemize{ \item \code{ZipSource()} for processing ZIP files. \item Sources now provide \code{open()} and \code{close()}. \item \code{termFreq()} now accepts \code{Span_Tokenizer} and \code{Token_Tokenizer} (both from package \pkg{NLP}) objects as tokenizers. \item \code{readTagged()}, a reader for text documents containing POS-tagged words. } } \subsection{BUG FIXES}{ \itemize{ \item The function \code{removeWords()} now correctly processes words being truncations of others. Reported by Александр Труфанов. } } } \section{Changes in tm version 0.6}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item \code{DirSource()} and \code{URISource()} now use the argument \code{encoding} for conversion via \code{iconv()} to \code{"UTF-8"}. \item \code{termFreq()} now uses \code{words()} as the default tokenizer. \item Text documents now provide the functions \code{content()} and \code{as.character()} to access the (possibly raw) document content and the natural language text in a suitable (not necessarily structured) form. \item The internal representation of corpora, sources, and text documents changed. Saved objects created with older \pkg{tm} versions are incompatible and need to be rebuilt. } } \subsection{NEW FEATURES}{ \itemize{ \item \code{DirSource()} and \code{URISource()} now have a \code{mode} argument specifying how elements should be read (no read, binary, text). \item Improved high-level documentation on corpora (\code{?Corpus}), text documents (\code{?TextDocument}), sources (\code{?Source}), and readers (\code{?Reader}). \item Integration with package \pkg{NLP}. \item Romanian stopwords. Suggested by Cristian Chirita. \item \code{words.PlainTextDocument()} delivers word tokens in the document. } } \subsection{BUG FIXES}{ \itemize{ \item The function \code{stemCompletion()} now avoids spurious duplicate results. Reported by Seong-Hyeon Kim. } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item Following functions have been removed: \itemize{ \item \code{Author()}, \code{DateTimeStamp()}, \code{CMetaData()}, \code{content_meta()}, \code{DMetaData()}, \code{Description()}, \code{Heading()}, \code{ID()}, \code{Language()}, \code{LocalMetaData()}, \code{Origin()}, \code{prescindMeta()}, \code{sFilter()} (use \code{meta()} instead). \item \code{dissimilarity()} (use \code{proxy::dist()} instead). \item \code{makeChunks()} (use \code{[} and \code{[[} manually). \item \code{summary.Corpus()} and \code{summary.TextRepository()} (\code{print()} now gives a more informative but succinct overview). \item \code{TextRepository()} and \code{RepoMetaData()} (use e.g. a list to store multiple corpora instead). } } } } \section{Changes in tm version 0.5-10}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item License changed to GPL-3 (from GPL-2 | GPL-3). \item Following functions have been renamed: \itemize{ \item \code{tm_tag_score()} to \code{tm_term_score()}. } } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item Following functions have been removed: \itemize{ \item \code{Dictionary()} (use a character vector instead; use \code{Terms()} to extract terms from a document-term or term-document matrix), \item \code{GmaneSource()} (but still available via an example in \code{XMLSource()}), \item \code{preprocessReut21578XML()} (moved to package \pkg{tm.corpus.Reuters21578}), \item \code{readGmane()} (but still available via an example in \code{readXML()}), \item \code{searchFullText()} and \code{tm_intersect()} (use \code{grep()} instead). } \item Following S3 classes are no longer registered as S4 classes: \itemize{ \item \code{VCorpus} and \code{PlainTextDocument}. } } } } \section{Changes in tm version 0.5-9}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item Stemming functionality is now provided by the package \pkg{SnowballC} replacing packages \pkg{Snowball} and \pkg{RWeka}. \item All stopword lists (besides Catalan and SMART) available via \code{stopwords()} now come from the Snowball stemmer project. \item Transformations, filters, and term-document matrix construction now use \code{\link[parallel]{mclapply}} (package \pkg{parallel}). Packages \pkg{snow} and \pkg{Rmpi} are no longer used. } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item Following functions have been removed: \itemize{ \item \code{tm_startCluster()} and \code{tm_stopCluster()}. } } } } \section{Changes in tm version 0.5-8}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item The function \code{termFreq()} now processes the \code{tolower} and \code{tokenize} options first. } } \subsection{NEW FEATURES}{ \itemize{ \item Catalan stopwords. Requested by Xavier Fernández i Marín. } } \subsection{BUG FIXES}{ \itemize{ \item The function \code{termFreq()} now correctly accepts user-provided stopwords. Reported by Bettina Grün. \item The function \code{termFreq()} now correctly handles the lower bound of the option \code{wordLength}. Reported by Steven C. Bagley. } } } \section{Changes in tm version 0.5-7}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item The function \code{termFreq()} provides two new arguments for generalized bounds checking of term frequencies and word lengths. This replaces the arguments minDocFreq and minWordLength. \item The function \code{termFreq()} is now sensitive to the order of control options. } } \subsection{NEW FEATURES}{ \itemize{ \item Weighting schemata for term-document matrices in SMART notation. \item Local and global options for term-document matrix construction. \item SMART stopword list was added. } } } \section{Changes in tm version 0.5-5}{ \subsection{NEW FEATURES}{ \itemize{ \item Access documents in a corpus by names (fallback to IDs if names are not set), i.e., allow a string for the corpus operator `[[`. } } \subsection{BUG FIXES}{ \itemize{ \item The function \code{findFreqTerms()} now checks bounds on a global level (to comply with the manual page) instead per document. Reported and fixed by Thomas Zapf-Schramm. } } } \section{Changes in tm version 0.5-4}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item Use IETF language tags for language codes (instead of ISO 639-2). } } \subsection{NEW FEATURES}{ \itemize{ \item The function \code{tm_tag_score()} provides functionality to score documents based on the number of tags found. This is useful for sentiment analysis. \item The weighting function for term frequency-inverse document frequency \code{weightTfIdf()} has now an option for term normalization. \item Plotting functions to test for Zipf's and Heaps' law on a term-document matrix were added: \code{Zipf_plot()} and \code{Heaps_plot()}. Contributed by Kurt Hornik. } } } \section{Changes in tm version 0.5-3}{ \subsection{NEW FEATURES}{ \itemize{ \item The reader function \code{readRCV1asPlain()} was added and combines the functionality of \code{readRCV1()} and \code{as.PlainTextDocument()}. \item The function \code{stemCompletion()} has a set of new heuristics. } } } \section{Changes in tm version 0.5-2}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item The function \code{termFreq()} which is used for building a term-document matrix now uses a whitespace oriented tokenizer as default. } } \subsection{NEW FEATURES}{ \itemize{ \item A combine method for merging multiple term-document matrices was added (\code{c.TermDocumentMatrix()}). \item The function \code{termFreq()} has now an option to remove punctuation characters. } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item Following functions have been removed: \itemize{ \item \code{CSVSource()} (use \code{DataframeSource(read.csv(..., stringsAsFactors = FALSE))} instead), and \item \code{TermDocMatrix()} (use \code{DocumentTermMatrix()} instead). } } } \subsection{BUG FIXES}{ \itemize{ \item \code{removeWords()} no longer skips words at the beginning or the end of a line. Reported by Mark Kimpel. } } } \section{Changes in tm version 0.5-1}{ \subsection{BUG FIXES}{ \itemize{ \item \code{preprocessReut21578XML()} no longer generates invalid file names. } } } \section{Changes in tm version 0.5}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item All classes, functions, and generics are reimplemented using the S3 class system. \item Following functions have been renamed: \itemize{ \item \code{activateCluster()} to \code{tm_startCluster()}, \item \code{asPlain()} to \code{as.PlainTextDocument()}, \item \code{deactivateCluster()} to \code{tm_stopCluster()}, \item \code{tmFilter()} to \code{tm_filter()}, \item \code{tmIndex()} to \code{tm_index()}, \item \code{tmIntersect()} to \code{tm_intersect()}, and \item \code{tmMap()} to \code{tm_map()}. } \item Mail handling functionality is factored out to the \pkg{tm.plugin.mail} package. } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item Following functions have been removed: \itemize{ \item \code{tmTolower()} (use \code{tolower()} instead), and \item \code{replacePatterns()} (use \code{gsub()} instead). } } } } \section{Changes in tm version 0.4}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item The Corpus class is now virtual providing an abstract interface. \item VCorpus, the default implementation of the abstract corpus interface (by subclassing), provides a corpus with volatile (= standard \R object) semantics. It loads all documents into memory, and is designed for small to medium-sized data sets. \item PCorpus, an implementation of the abstract corpus interface (by subclassing), provides a corpus with permanent storage semantics. The actual data is stored in an external database (file) object (as supported by the \pkg{filehash} package), with automatic (un-)loading into memory. It is designed for systems with small memory. \item Language codes are now in ISO 639-2 (instead of ISO 639-1). \item Reader functions no longer have a load argument for lazy loading. } } \subsection{NEW FEATURES}{ \itemize{ \item The reader function \code{readReut21578XMLasPlain()} was added and combines the functionality of \code{readReut21578XML()} and \code{asPlain()}. } } \subsection{BUG FIXES}{ \itemize{ \item \code{weightTfIdf()} no longer applies a binary weighting to an input matrix in term frequency format (which happened only in 0.3-4). } } } \section{Changes in tm version 0.3-4}{ \subsection{SIGNIFICANT USER-VISIBLE CHANGES}{ \itemize{ \item \code{.onLoad()} no longer tries to start a MPI cluster (which often failed in misconfigured environments). Use \code{activateCluster()} and \code{deactivateCluster()} instead. \item DocumentTermMatrix (the improved reimplementation of defunct TermDocMatrix) does not use the \pkg{Matrix} package anymore. } } \subsection{NEW FEATURES}{ \itemize{ \item The \code{DirSource()} constructor now accepts the two new (optional) arguments pattern and ignore.case. With pattern one can define a regular expression for selecting only matching files, and ignore.case specifies whether pattern-matching is case-sensitive. \item The \code{readNewsgroup()} reader function can now be configured for custom date formats (via the DateFormat argument). \item The \code{readPDF()} reader function can now be configured (via the PdfinfoOptions and PdftotextOptions arguments). \item The \code{readDOC()} reader function can now be configured (via the AntiwordOptions argument). \item Sources now can be vectorized. This allows faster corpus construction. \item New XMLSource class for arbitrary XML files. \item The new \code{readTabular()} reader function allows to create a custom tailor-made reader configured via mappings from a tabular data structure. \item The new \code{readXML()} reader function allows to read in arbitrary XML files which are described with a specification. \item The new \code{tmReduce()} transformation allows to combine multiple maps into one transformation. } } \subsection{DEPRECATED & DEFUNCT}{ \itemize{ \item CSVSource is defunct (use DataframeSource instead). \item weightLogical is defunct. \item TermDocMatrix is defunct (use DocumentTermMatrix or TermDocumentMatrix instead). } } } \section{Changes in tm version 0.3-3}{ \subsection{NEW FEATURES}{ \itemize{ \item The abstract Source class gets a default implementation for the \code{stepNext()} method. It increments the position counter by one, a reasonable value for most sources. For special purposes custom methods can be created via overloading \code{stepNext()} of the subclass. \item New URISource class for a single document identified by a Uniform Resource Identifier. \item New DataframeSource for documents stored in a data frame. Each row is interpreted as a single document. } } \subsection{BUG FIXES}{ \itemize{ \item Fix off-by-one error in \code{convertMboxEml()} function. Reported by Angela Bohn. \item Sort row indices in sparse term-document matrices. Kudos to Martin Mächler for his suggestions. \item Sources and readers no longer evaluate calls in a non-standard way. } } } \section{Changes in tm version 0.3-2}{ \subsection{NEW FEATURES}{ \itemize{ \item Weighting functions now have an Acronym slot containing abbreviations of the weighting functions' names. This is highly useful when generating tables with indications which weighting scheme was actually used for your experiments. \item The functions \code{tmFilter()}, \code{tmIndex()}, \code{tmMap()} and \code{TermDocMatrix()} now can use a MPI cluster (via the \pkg{snow} and \pkg{Rmpi} packages) if available. Use \code{(de)activateCluster()} to manually override cluster usage settings. Special thanks to Stefan Theussl for his constructive comments. \item The Source class receives a new Length slot. It contains the number of elements provided by the source (although there might be rare cases where the number cannot be determined in advance---then it should be set to zero). } } } tm/inst/ghostscript/0000755000175100001440000000000012213264557014245 5ustar hornikuserstm/inst/ghostscript/pdf_info.ps0000644000175100001440000001604212200717467016377 0ustar hornikusers%!PS % Copyright (C) 2007 Artifex Software, Inc. All rights reserved. % % This software is provided AS-IS with no warranty, either express or % implied. % % This software is distributed under license and may not be copied, % modified or distributed except as expressly authorized under the terms % of the license contained in the file LICENSE in this distribution. % % For more information about licensing, please refer to % http://www.ghostscript.com/licensing/. For information on % commercial licensing, go to http://www.artifex.com/licensing/ or % contact Artifex Software, Inc., 101 Lucas Valley Road #110, % San Rafael, CA 94903, U.S.A., +1(415)492-9861. % % $Id: pdf_info.ps 6300 2005-12-28 19:56:24Z alexcher $ % Dump some info from a PDF file % usage: gs -dNODISPLAY -q -sFile=____.pdf [-dDumpMediaSizes] [-dDumpFontsUsed [-dShowEmbeddedFonts] ] toolbin/pdf_info.ps /showoptions { ( where "options" are:) = ( -dDumpMediaSizes=false (default true) MediaBox and CropBox for each page) = ( -dDumpFontsNeeded=false (default true)Fonts used, but not embedded) = ( -dDumpFontsUsed List all fonts used) = ( -dShowEmbeddedFonts only meaningful with -dDumpFontsUsed) = (\n If no options are given, the default is -dDumpMediaSizes -dDumpFontsNeeded) = () = flush } bind def /DumpMediaSizes where { pop } { /DumpMediaSizes true def } ifelse /DumpFontsNeeded where { pop } { /DumpFontsNeeded true def } ifelse [ shellarguments { counttomark 1 eq { dup 0 get (-) 0 get ne { % File specified on the command line using: -- toolbin/pdf_info.ps infile.pdf /File exch def false % don't show usage } { true % show usage and quit } ifelse } { true } ifelse { (\n*** Usage: gs [options] -- toolbin/pdf_info.ps infile.pdf ***\n\n) print showoptions quit } if } if /File where not { (\n *** Missing input file name \(use -sFile=____.pdf\)\n) = ( usage: gs -dNODISPLAY -q -sFile=____.pdf [ options ] toolbin/pdf_info.ps\n) = showoptions quit } if pop % discard the dict from where /QUIET true def % in case they forgot () = File dup (r) file runpdfbegin /PDFPageCount pdfpagecount def ( ) print print ( has ) print PDFPageCount =print ( pages.\n) = flush % Print out the "Info" dictionary if present Trailer /Info knownoget { dup /Title knownoget { (Title: ) print = flush } if dup /Author knownoget { (Author: ) print = flush } if dup /Subject knownoget { (Subject: ) print = flush } if dup /Keywords knownoget { (Keywords: ) print = flush } if dup /Creator knownoget { (Creator: ) print = flush } if dup /Producer knownoget { (Producer: ) print = flush } if dup /CreationDate knownoget { (CreationDate: ) print = flush } if dup /ModDate knownoget { (ModDate: ) print = flush } if dup /Trapped knownoget { (Trapped: ) print = flush } if } if % if Info known DumpMediaSizes { () = % Print out the Page Size info for each page. 1 1 PDFPageCount { dup (Page ) print =print pdfgetpage dup /MediaBox pget { ( MediaBox: ) print oforce_array ==only } if dup /CropBox pget { ( CropBox: ) print oforce_array ==only } if dup /Rotate pget { ( Rotate = ) print =print } if pageusestransparency { ( Page uses transparency features) print } if () = flush } for } if % List of standard font names for use when we are showing the FontsNeeded /StdFontNames [ /Times-Roman /Helvetica /Courier /Symbol /Times-Bold /Helvetica-Bold /Courier-Bold /ZapfDingbats /Times-Italic /Helvetica-Oblique /Courier-Oblique /Times-BoldItalic /Helvetica-BoldOblique /Courier-BoldOblique ] def /res-type-dict 10 dict begin /Font { { exch pop oforce dup //null ne { dup /DescendantFonts knownoget { exch pop 0 get oforce } if dup /FontDescriptor knownoget { dup /FontFile known 1 index /FontFile2 known or exch /FontFile3 known or /ShowEmbeddedFonts where { pop pop //false } if { pop % skip embedded fonts } { /BaseFont knownoget { % not embedded FontsUsed exch //null put } if } ifelse } { /BaseFont knownoget { % no FontDescriptor, not embedded FontsUsed exch //null put } if } ifelse } { pop } ifelse } forall % traverse the dictionary } bind def /XObject { { exch pop oforce dup //null ne { dup /Subtype knownoget { /Form eq { /Resources knownoget { get-fonts-from-res } if } { pop } ifelse } { pop } ifelse } { pop } ifelse } forall } bind def /Pattern { { exch pop oforce dup //null ne { /Resources knownoget { get-fonts-from-res } if } { pop } ifelse } forall } bind def currentdict end readonly def % <> get-fonts-from-res - /get-fonts-from-res { oforce dup //null ne { { oforce dup //null ne { //res-type-dict 3 -1 roll .knownget { exec } { pop } ifelse } { pop pop } ifelse } forall } { pop } ifelse } bind def currentdict /res-type-dict undef /getPDFfonts { % (filename) getPDFfonts array_of_font_names /FontsUsed 1000 dict def % this will increase if needed mark 1 1 PDFPageCount { pdfgetpage % get pagedict dup /Resources pget { get-fonts-from-res } if /Annots knownoget { { oforce dup //null ne { /AP knownoget { { exch pop oforce dup //null ne { dup /Resources knownoget { get-fonts-from-res } if { exch pop oforce dup type /dicttype eq { /Resources knownoget { get-fonts-from-res } if } { pop } ifelse } forall } { pop } ifelse } forall } if } { pop } ifelse } forall } if } for % If DumpFontsUsed is not true, then remove the 'standard' fonts from the list systemdict /DumpFontsUsed known not { StdFontNames { FontsUsed 1 index known { FontsUsed 1 index undef } if pop } forall } if % Now dump the FontsUsed dict into an array so we can sort it. [ FontsUsed { pop } forall ] { 100 string cvs exch 100 string cvs exch lt } .sort } bind def systemdict /DumpFontsUsed known { (\nFont or CIDFont resources used:) = getPDFfonts { = } forall } { DumpFontsNeeded { getPDFfonts dup length 0 gt { (\nFonts Needed that are not embedded \(system fonts required\):) = { ( ) print = } forall } { pop (\nNo system fonts are needed.) = } ifelse } if } ifelse quit tm/inst/doc/0000755000175100001440000000000013204065716012436 5ustar hornikuserstm/inst/doc/tm.pdf0000644000175100001440000053274413204065715013567 0ustar hornikusers%PDF-1.5 % 3 0 obj << /Length 4592 /Filter /FlateDecode >> stream xڭ;Is֙w U.j,$U]T,s&!i.*S[R;D-߾>{o)LMնMcma~}ߋ?Vivi𷂿@,~ = ;`O;CaZ| Gt|x-==7˪Eךjym-_>诛=p35boMрȈk^RQ-OjvK\#!vMFKوFoߝ ?!c䕪U MwFL8L@zzuJ>ǠAH$Snܗ @+=Q iSœ\uݭ gobk ެI,axbxm "YG9xV*EFeOeeq P>EU7t~dĞ:VE_O=ӳ;[ш)-9#uq.ZRN=Vfu:RaKkߓ}z^՜>{D`Ϧ,[X]販=P}mI$J8!U| 87$Vm ^Ikɑ *;xٸxYK`A\״gq)fpgoz!>Qfg8ȱFe~Tc!6c']d`qf? +ljvS;arE¾e徔ڦk =~&Ȁato9H&1WPUjs\L)v,vd=u$0j3_n -q@4xzԒ~ua @ASNXA鶄IT 1?(""CW=J}>[Vп&|d-e:ǡrz) aT=x ᫚pDޅB:S2>=>1 v́m<$}Zw uS4!D—'vC ҇ bbD>NϏID(Eu]圥p(bwO3w]a}ݡ8|Ի?c% M1]%jq9↺Syspm!OW ~f28LYJړ@^V٤~b5C|pYy$Ȳ+ܹ:wi5ʀѠ Fc[(uĬ|+w4mȹp09ôi]AO<)زT©l4V@EL$RyC9J{RK I30m3%f=-"-*5:01=DQ>*uaX4X>/*@c"VrlV?c!ʯc1{%+%{eZgեe\*> a#krߟ8R *a@ q+ά)BctpƂt ov !`.`tԏdK-@oF|S\pg<'$&Tcm`7Y<o@'߇dY|/^IIPµJp|@#!A`LT&eR4TV>PD,?P3)4>pjS2.;oql:MGAd _N]x1țfh#\4";g-W1q3!cv*{VB]eBDYHEHآ|ͨngqnPeQc d,iF46.B4jDYs9K-=; qU_fjP錾!8 iO;9ht%|{yKiy2/Ef%m&D1ZhA˷z &o.cC]ۘ RTv_#^*b?iMiͳH`='ɷuw.8 8m[Y˵B u . "CԆkcj oTE;9"Csy*#B7`L9!Q;,uQz)ϐ5&<\sI`skWw8ƹ_NOT#!W ҕl (ۯ~)сU)821j=]h8e_ŷܳ+(56gڿ=.sǠ0L:Ucm&r^&O"!ԴRN n&8E_#Ixy H,RT$fA:e.k$2/J Ir 7yTPSۭ1>pST 8$ +Iэ:@q9{L$$83`lL&i)A^1g1O;u0QN A낀|U Dvzh;dC&H(=#i6#{RXp(ǖ;u\* c|Dއju͠$cb9Kwu/\z\0U=0(Ye}Šf]Ñߓrq ~Ɨ?ףk* n$eoip?L*!sixC]Ejq3LYTMŸ) sOf\vEXyUףD fuyi3M+SrK6R۠yTG.\,M-kɠ0Mu$:Xm'75 ˖orޕ줔+W5H$y͊Bc\JB+RLME8SL(}u ۰^y'gtmt15U4'߰S-Tvbs"\겛v|і{57II\rB58XkJB=WCp2_}QFc6y33~-3uxY.I 4KѮ&ㅣfrqs\~i$ލ\f!t$f^j( ߱n^)7ۤQ/fTԥJJJAxBf,JXf"{ R5~E)vPFcUE.W"Vӂ b7ݼ x4"ZcrdE=Wœ6*kTY$_2&xM!x}tuT?۹롸u]={2;\ZaήKrk6T\K m@БzҚ[0*q~I}r=!T3E^#9٫mYXXfh7$irUr0)HV:'Y$Q"_ov«~$ۙ\7-rd W# p}jv18ALc7z/[݄Ufv*?УDg:)'xA뷓VAL M]{g\ѹ3S3Yf} ʅVu1Mߴy8qN\Ӝo|abˤ+ҴV/^\ :XԤKcF9N:)fH7,D"1;-1E񝺃l)JQo(7 Jv&{v 1bY})L.Q:#G MǤbDv"*2.7|ĻW:E_'+Uk?db'qT?L'ŕ\B&8ahSTMtRMg4gSg`f 8G_ ҡO]Vk7?6+!Ch8,^f+h'QT[~kߙ<YhNAz!#SU3ζҹhc_|{R endstream endobj 19 0 obj << /Length 1873 /Filter /FlateDecode >> stream xY[o6~0dc5+Q+ aJն<ˎ? )b "s!ūt,hv3J|(I"!p4>&i8iq]Nj, )!݊zi8v|ȣ 2rvUmu{Αwsy# fO ?ymÅݾ w/iT9!8Li[Pٝ Xi5jetP0 ;CQjB[ss%qIf2d0rR )uayBn PQCt>#(]3/;faP3惸ږSYԖ o4W:cF~:ޫ;?%@c6 ڏ0|_N&YcTKCFn y\qa^ɒwIìgͺmfA o>#iϐ}LCaNWqdlz`;LdbdcB <_ vW$CQ5+BFea`5 Ė۳>n`HD5j4o[q[!w A{︔CX/ܿ%F;fiLC2πw,Pޭ+i>(g'o W`D9Z:HҢ=Quko.R\t?)G=܎$@Aƒ;:5@; p; 8BПzir[0b~j=@W{0*@N8[CqI*/aeZYt(%HSˬV-0۸DZR%)/s$'|7e@&B2avS^6L6XU{ QD&Sؔy{ɕxƟv意I*Lٴ+_2$K6>"̉R`ˋmŔz(9*\Eŀw0==LDAdߖ. gbԓ$>pܶh,1dpc@2^`L*핻T$ג5[}ӹ;Xd}t$ƹ]u%׈.h@[>jY^~f乚(bZ|ɉOhH{DCۑJ".MK6&]+H'=~4od'w/K[O\nޙ^ U]:E_U@>Sͻ{ҼV˱8lZ"IuY L, oBȐ]ܱt̞;~褸y +֧ |i24>Ka3~E'L'DϙZjgZ#֤WcA4_A~ɡOEeü? [)cs2*EYS;R`XtE1'm}EayUJ4iDZs\ 3k+Na:9 Q<g/ endstream endobj 22 0 obj << /Length 1536 /Filter /FlateDecode >> stream xYoF~@$a I鵧SJF{;ba{kf݇Iқ$JzU/Nd>Mq'E{t6r/d-$RYɅ EH%d) oQo5ꉒP]? QDJGRvz+_5VK)aeؠWI,78h,2S)w<P ~KܧgY^'Tn"切+,'d| <͓2US1ɗ eÖPB*̞': KHqzTj?(-ri;)=R7M6PW@Y C{`u<)|!%=/=D]2ͥ*xsu4c`v~ x@sؓQ++ИwZ]a(]:ĮJ)ݠ A &޷sc7Et.hb\;Fì:lT4%;EJOS@P'cѥB{?7q*]<Ĺŷ#tlfĪv) 'U֞a?TukCOYtoagZv I; Fk-IˍMҲ.6c=j"*gR[w!Ӄ9GBMR Ch !sÈLjɈ4ʑ(1kXYR?J#߈`&Xu؉"DгⰰGATV5큐AA'@5m-Ol"v4:p+^6nfx1 >h2A`k{_9a%$1L܄xYÖ֭+O`kφBlh~s5x8x\"!O{cu¡t7n6vK݋_`vÖ^>BnL4+է-.L "bHc睌W2-YN?-/ʈ524jP͈{Rþ&J0"FS8E$BvPj;.pFBU7Z@A=Y憤1tl `nM,/p>~GcƷ }KJ&uu2+~2j.o@ PxX}5q㿁O yarX\N[F֏)HΏÏk.B|F,ēHA0ҹw. - endstream endobj 25 0 obj << /Length 2452 /Filter /FlateDecode >> stream xY[~_1CAcFA`3M7h}H@ckZ#;;>Ƌl'h AEs9o/x 8"TVy*'eϺK=sOHװ~Z|ūTef2]̍VET078 ?v,'nmHV+pY^P8"J߲/4ӷrw̍X8w-ˁ:[(»-mAc5M[0A* ,SH#$k?2P+n9i<I_ g}눿E{[_ Z.1NFBnLv' a/|__( ,WQß'H1$>&x*9V:RIxI) _ XːSڛ4@iPQqgzCa1qdB%)^HTN ^UO R endstream endobj 29 0 obj << /Length 2347 /Filter /FlateDecode >> stream xڭks6{~'L$L&}\ӻIt3iiWIR' J",/߸8>X^_82sXDoå*xj9T,[fe-MqQƥO=M. - ;fojyX$wռ#ͣ"6˴ (93MЬp~h/ +^%&BZ' #ůݵVD|pFK0pSuD&_<@Qĥs,CMʨBW^9BHKj2I(>b{IYuABhn8axeNq}\5R7E-\dt0haůNAY=g$kM沰dv}zG 0y|`KeB{j:ʌ ';> f,I6X ZO&2 䰷鋤\+:'Y{yVD/7MHN@#B[O |v9"r!NnOz2MW,c(yYѣۢdzA$tk;b7Nn %WY{Ďb730CJ_<'&͚%oViq/M`k /d-}#eJ5e?r:5,:|hp]sJ|Q=]G.S\p=p hb4J $'"C>O*z-KcM`R,HE1?W bRHiy'R Hh\ n;NI.&$&A|?ԁ" 5̔$]R9dDƎ@fUr|oILbs&0:05]2VQ /cH.X~' hz`|h0<7 ϽoԚ{mֆ?o WfX)hEi=>}Q9 ״G_F'F ^ը71P0h-:rfeD mWR0^sPQ7Q|sV/ШFǃeE{KYu2m tAwa@fNF A:uO_PS]N@=uO? }m('>ƧM2Zs'3&ZODť:6^ͫ"&x^͈uݪP6OVq-t&@}Z+o_IŶZL<4y)&kT3Y\MET+N'Ԣ\S,LTNC~> f yN"΂~˸( $={|?[T endstream endobj 32 0 obj << /Length 2512 /Filter /FlateDecode >> stream xڵYY6~_h 5+Hv2@]`c`&yPXVDzgú(Z'݇nTu}UŢXy(.tqXEZ,T{X}JL%lR%?Ri#AJ&A[5 hG1 }i[V 8d52O5F%-6@m0Mr>,O; : ĵ xjVYE D S'żLzoAMQGLo)AZ7$1aI~ɘ=P`=s 0x<DnтaKDKI.` CvA.%~3)a 5%M vʷxjv;&fWI=)w|R^ϼV,l0S6U!7Bc< !W-{VGNd8g-}[ ȃ4L裮Ķԩ|Ye@gIF'|^sZ2pY֊IqY֌6z tQ-,9W,x,~"#Y:e \[4>"Tfke;>7j,/j|K+ O^%xoQR9 _td!P oiP>Ѫfy+QT%<8"Ƹ0]$b'^/e3Nofc4*cvng M}"g$K{DiF*т k8vE@XTeRFH pUe|>)+Of4xq82N=lS{?a5Do<KJFyw3@s .*pJ8>LN~șַ׼o# \*M}^qCb7*6 ~Nү9ԟ{>Ps|`g9ZAk03kv@:׌B4z n!<#juA#m#-=W{z'n&2dN{lAK&8m3!}WwNxʬd}kab7tt޿]@|yzɾ ~=DvCJE]>R ZC:.K.@'B8>ގWXr4mD Q;b J+y:h;.|0 p#/HN3 66A~\O Qdn֝Ym+>/@ś?䳿T endstream endobj 35 0 obj << /Length 2362 /Filter /FlateDecode >> stream xڭZK6ϯ0 Ѝ5DQR ! Acmo,k!A) D,+w,6Y}9JZ7؎D5~XRI]vp;ؑx+JZ= r'dN[yMݴ}T[GZaG21_s;'Uh{q$t-B]R~%;K;7"l>u㹡dE+y4tW̨W1 L<.3r$HQIaK|:\U{'-8 O]O47èQ^gF17S_3kӍ5|nVſ[}$5Q^Asɭ+E~3vFw[}9yB6s5#+zVӘ{kȫ9˽JDfP*P,J[Yi%ߛ}VY` aJqnĢpIDEJjmidȺujwi|`rҷHn \aZhv΢V7 )D)aܚ$Okk0SeIiJH) T͇n>J;Ge]򚄅qUTUƋ9An-t:vgw ѩ.U0MG%\;4Da%F\uR֎e7šW9ǩ[)$а)6$PA<p挑_[8$wU}q&=fMU2Pspdu >!_[x[LU-.|BupaF Iu,isX孛 fG|AFR.} sӤr&uB * |X"Q%Pplm~pV%B!9fKJe'9Yc1am=C񟉤pэ?ڽZ8?i.m Q`h/,(nHiMcFF83^p95ۣ=YJQ-24 [ip@ C|#0Su\f `c+J;WRʅaF=!ieDdi4@GD\D,IGDꈨFw% fcvM\_S6gTv((mK[Y~k.8m>&hgqc32bor#) ߣD<~H'ە^tVmPB'$tpيXȖ{V7g6:\c1‚\ jYD@GL}ƳrRTu5^ h ZC(vYs ^"/Fdp2pPK)sGv HqhS3W-|$'ZR/*riD5 do.;w 0xBʎt$#~@ uanJ`),  WcTZ&yoVęŅ= Nq ODZh U [g$w_εk\<6t+E"<Źwy9́v?P|d8etݨu?qXkIfR/-L;#\h5=wOȪG6>.ϧ1[,F$1h}ڌ:UՑ{|z&, ؁ \OsC:ࣚA|˥g^m0y;1X.iU,w.Qsֿ8\i`co\ڔ?ilIV#9[ߕik3bx:lg">]R&q[اX endstream endobj 40 0 obj << /Length 2177 /Filter /FlateDecode >> stream xڝY[~˹bҺFNV(vEnu?s$&]aVCrΜwiMYΚ)m9[BUUȭzcx cqa`,y'onie?qSX"K~&qq=7PmaӪankؾϸb [Q%x27잟wV 7,k+{YSa̜J'fr5̝sKKZp'gameiZ|_ (Z4svM`d5c-|Xb fdK<5>o@C") SQa e;Hd p*P$ .=ʇǻ@z/ŜH1<$e>uIb/|V#)ʲ=٩a8`Ūj5u؝U6K,vEѦcOaYx{;<3}o+z*pG^DzQnBˬrm?`żϢ:gbKګ' 0/c<RHQcP8> HqZq' Q_Rӛ֊{A3j-ݓ^c/^2VOSL^GpStZHUx#-[MK 7Ǎ9F%ۜo6V{$Ŷr,|z.{{`pBZ-Bbr-ǃxwcމDZT\R2L$}ˈڊRܲGee– ؙ': 9u5\axG4f@'N\ 1RqWݍu53 i7_3`fE@3[JəW v0K[ƌ?C3,C\ (/G&?_sa S,Z>ۢDfp񮮹km/dm^(A"' ]M$<1a媓7Pkn bNr-sv,ƢƔj-rR[*` ,iE5xrI>7a"O[*RŃ_TXEAWrQp)y JM ɀ]t#wQ}z{jE",%f//h~'B|O#۴;9֮~F7v쮦 Mny%Sf|{^QԦ$o8UhQ/bUcn 5y]]ޞy%/ro.Z՛ A>yw{gFaW,ͼvԉ3W=.k9p-J KT/.Xqs^ڥ?\Wލ'^dP M4I6TY-bWZj!FEQ)6enƳ^\p{Zxwt+MRlIc{]"Бx!M|ŀdav.b7hUM>r yVVs CS05U)Ӡq*W>$7_2҅n&Bb{Ŗ;~o?UYre2kC><'K0 wYw:FGÄ&/6'`' ڇJTY endstream endobj 57 0 obj << /Length1 1466 /Length2 7227 /Length3 0 /Length 8213 /Filter /FlateDecode >> stream xڍTZ6,ҠH 0!Hw 14HHJ7ttHIwȇzZ߷f}>g %*D8A\@a4y@\&&m ˌˤ ur! 8A[,uSE".p  P?D0@ r3. fm?K : `@ZH W QA͍ l̅tgP6M3j 093.\& ] ir;A8 E8F ,NZJ*5(Y@\+W"w0A;05 UP(a;#o`lqK]9 /6W{'ʙ"4,AC(g_œcsv/`CXZjŁ[st*E5m|@ P:_=ͷx9 VM@}`V\/g+rrxo ,aj C o/ 0jwer+/K$7rki>SQ3d}Hw'?o];:WUBX!B=TXVs=Gު `[@> ,! &-Hf0_[Ѻn@y;A 4n>%v "YT 6Ǯk0T 8A@n bwp8 z;7RAZ0>~ {{ŷDKo Hm9 } Lѭ4п! S? (no!.NN[_~-Pw(wj  }p^)Eƹ: 6ʴ5rIx/mzөTbw+ˉ4ݵV]սOW?b5Wq':>|šԖ}o^֤̔"HKr֮˛Uox3a_4Ɣc1;胬:Xv\H/oO*1 M#]Kjo7>[KIG%w\<:9 6: !TW=r/W 9W ,QO&Zgҁm OL{ @<a'x| Gj"oT_?}0~W5rP\c}pbxPg~ jbUKLTVڽ8̜-<1b]szaҴďYmlwҦY CvbzcJYRLp_%(Q.v)m2FZ&F;)eh]rHU{6׹8/A&}&tsY.+Vܧ? x_9'/ݸп0X"" d+טgn|6|ՙn]a8Yd k(\/Ywv'W-M~A 6r|ĕ#'tѩ:m$7'&]afp$fM^!E{a4#:'!w^H0Y}Л!Ew .N{Q)^۹,y)U` ƅQJϥʃcxP6;1v$6" P&F7o.HQ)f}굇(_v=! 6U|^Qw_bZm>b$p@/U#^{qr`e }c)ANcL eAX:3/H-iq۫V+/8E7oa;Ylo(lV*&3jx4h74+j zeQY߅6xV즌^<%91SoRVmˋwv"C2AHfS(gݏ{2GD#@Q _j|G.%MB~"kvv^{/!aZ7Nq#R .|tà˦~Q o2NjlN|E3.lbEتD ;^f~'cc|S!{5RlZ@+\mFEӂX"V_ͼʝرQ4r&Acyгû]s'Y<|Ns?9*)Hǩh뮘z& cL2uӴZ PPgNXϳy3,, f+ |HyγUf({MX]ĉsi/o"3m7}1 '5R:%-L婢܆xg[V*΋kC␙S1upX̙JJ[|6Nw3 _akS\>&rf[QԮ_Ȧeƿ҆'  %ݿCz n[L٧W(%;ߚS2wKHVsS-r5͆=!>H\6qֵ7pB>vRkP3 f.xء#z;I8 6!q}7OEB9Nϭ"'~†(!0җ.my\in$DaOEOQ?tucR@I!oG1d "/|cI^O|gXOuT o9Lۼ? !Up[5!5Z*S`tԁ ?6 9P=l L[1UU_}+3p+_: 9%sN6MQ=Z<3‡է/TX_zJoƋzn򜜀{=zDx2^Er6&^Ԭ5m -Le~7@|[>w.Ioj"Ȃyux׼GTngo vk;9VOJ'(ɽ[lš[?dYZN;P;&\Qc" 5%jpE{~oY&l#z9>t-.]k!&69s]]M{*6t-H܏p=?QYAD诊u'  ;5]aiSA;Q7NwK&kb VrԺFTi9;#}_$#Ug61$?BF d7j f]6RUk?Ǹt#8+텪@\H_ܥ>V>9m:7FC`,8Wz!2fff;d.P!ݻ+4]u/V˂8E={*&F}Icne¡#>&( B|PʱWታ^tg58ό[Vhq2S?]o W`F{\$BchrgB"d)6ɋ 'N$Bd5h$)qz5c~[)ϯ,NâTM%mJZ֐2a!q?7ӦJ &iGS<`6pq{&L4bK^eb7*]SurA6c/=Y#?jZH <6J%=|`[` `a]X΄'lR Ea.IYƝ Q6ipuBLGa֩ ! jE/5_km`xΪ)^:@HאcƙaoNWf\<ڛH#n^PtFߤ1pAj}7y ?Y} %jL[ٚ3LYK %}sLTr+ a|06op?9LT ?ɵuVw+5Zpv(2j_/Q]xbץ u˒IE=Bd; [^xLČ_(-k[`y_W0[A3nh.lUvR9nR>{%qc9 &ndґ8C"|DqߪZݎjicZ ezjl [[%,>sIzH+7õcU=4M;r'bd4˜ļ|/284A"H+]QC,ztYSؒMshop?jS-0"L`,?F]uCh琰BsERVk Ф#E廴TwUݟ%BhBSwq{ƨaks6iY:{<]reTEbf<~c1i <mS_B  MwPp ;߾qN)1ITfxI1iޕzH1SQT9p& ,ݼeO''c[`rֳ#o<}\Am0[vGCYg!Z~E}cc{h j!g8UdHe`_HLOn0zCWDOB 9e(Yf"l}!c~'I}nj5ƇQ6u_- 6"RQD=R_֡fàp^ A3FyU`=ʸQ vUٳ1Zh32JT^,ϔf풅~ql +5j\ ꁭq"9[{$u;__^R*)ymTϊL޲8,]`QO0=dH6Nv_LKiq ~\jak "`0P'{0%Cnh) ?p ɷ5JZxZWzS[=ΝpOݐk]"s`m5z+3q 6lDJ@W(C]|%Y 9R^RzRfb<1kw ݽ ckjk\BI>6QT U ۖgRWu켾/3p)|T"Z^WmǾЊT6zX%s=,~ w~uH 9ҾS!ЯfY|Q"6t$A[avYo;[; t1ax'-qʮkayb{(WXgQ `chfAԗ7YP\'zKN7 ǻ=jKĹ dK=?Gƛ[,5G30R! Uݗj0Dom&4g!&FMsɑSos@Yf$vx~&:_|0Pg}23'M d˿hZ&9poEmK4TEkzJjdY}Of"<$e+e4|f*Xt^tqVB[wFGk׫h-f\WRBE*"DУbEvΒo{I0V㑨F$sv 1Q3?iu0§]§ªc4(Ͽ4rj]ԓhWm|ro&gU,ޔ_vbqn͝85կCo }j5 qNXKLdOfWw}Yݍ endstream endobj 59 0 obj << /Length1 1869 /Length2 12130 /Length3 0 /Length 13285 /Filter /FlateDecode >> stream xڍP ŊS[w+^Sw{圽}3Nfֳ3 5&H&Tcqq QSk]@#Q뀜]iH:2)뫢f` 8,-,Zle?_tv~~^? g9PZ_#`vuu`e`ڻ88[3< dd wi,H-k_@gU`6A\^M  gkt@KY/&8C4;;!^`l(z2?v.@w hg@:Z;]]X\v6KC,$AW?;_õ8x@|C`eX9jCNn yu^EHȬ@n666^~ 4f##C?į58:8,_-AH>.@w %$vv`C*Yů 0d{?v~3z0 ?>bV ] ]MƿK'`gr׏wlC,ڧ : ?io:7#7;?R9ڃx\7-PvxUvL +"Y]ͭ,Rspqv۾^.3uy74-^Hl a]G S `e8^X:8#Dyx">?`/Xe!N?`U^}*C^TK|^!?SjC>uK^#"ſ ƴ/rq%ƫ /|ݿ55پn>ÿ5klk k]p/|MxM_ן?dzskL!-L}/8 }K¬%hK:ɍM-Σ_ֳnly$lnG}Gl9Z;}` uY,+3)"֙'4V B<#Q4gGjK7!kIdyX"KR&tW yעP0IddXЁfv'VNo3I>z3wdLs7[c5Ծw!t< yh^8r] {|ǝ0w~,+-c (!JvklAx,oUX/"ĥѓ06#5 ]h/}*YKBb(0NZ;n! eɘ4'JͼfX#A!%@`ArD-QsUXdGj IP9cU J)SrN,#] ,x~ JLXA{1<ũ'*3+uQa^ Ҍ8#CoP(aꏷD ̝ \E8T:R=U3KɎ ('y͍Zngsu$t^=&V2|]ίÄlLp!q3]gņT`sܹTgGe#UJ? 6*;G7~`+-و&7QU(SPzS)ԘȨ@p wOhI"$$_e\>-R평Oˏ,͌c8>=> {?\Tzt5 bP~2&:3ʷfmOiRL+Jx_.l-mG+.L.x߂{N(oM < EO2;& 3Fz8ҟgFIEHxoZ/d}nw}] "`T5`E΄c/:M} SЙm7w([и E.Hĸ[~:P ~vֆybv)#؀T GWwauGJ#8vYW='X/$6It鍡tvh۶)$ZR^`*5cqطG7^rKAW,\Gb">RU%;y`etgM}?et &CiV{9(8p@`ّыA&,lߤ s6P'6VcSvC;䦤>&l0dY2H^TY ۥM4YyCY'ob4۽a|^HwSw,Ra$$ITI8oSLiS.}Rm/ {P? sS>yIW[8Snt-6~zDk?^OSИҒZLG DfAj+.VxW@W?A1eW(TTHגAb8.b[qCGӥD7mVmf>uxLOhʁZS׎ft8dG]hxEiቴnOO1'w= u1[\j8h.5Wr wo(W>F%rЊU|zdyi)3Z,516b%YAݒjt?b-Q5ՙpO$o*}^#`MctPoЭ6q:f~ȹZtP-2> Ҹ9.fY-_4mRn,rX&v*g69ڇNeUj)au`ZkMWU:U*.S1KQT$L"ٻD_(2ǫfsc%ڣ 5tm'/*W6oYrgmdmKOڼ8jt?)3+6|nD_-x̛S)$z_~M9<4Viy]n v,Սp'hk-P6ƯE*mm y@j]!?@{j)6I89(`]ݾ*gxZSU]l*|7El;|Ac?}ݴ]@t{e'4gg[6{. QV[]À poSuJeەf{Ja3KHx Aõs}zJ11 ao97c Wta0dYrT& !6^3b)4-% Ίd?Xݺ0b|zfM?s Z9qB]!IdQ]I췰\2ș-e;hm>Bd\"Nly c2jn{>yG1{pǝHKZNMIEys?|o.)r$jO8CPB,Ic[;S/jdԐFY UV(eTʀ!=K%2|^h UQV:gpـc^28ڕ =Yj~'"Pg*~Isvz+( jfo9_xgXYB"a?2Z)q"y/=Ri:]*U$!E۶m[ѰR_s6WyXWb>~?oȫp=gO]EY;4Rf4eJA;1a]^:bL5bW7X!y%lOs6oKӕ|CiLYSG xh*(zg'~O mF~JkB8\l??M_{ϖ b0\'}G Bm)u4;֜;<.:@xCߗ *W=6y[\%8f/;-Ю2^u;4>ꆌ򜚑[6ŐO!L6iKy{ Yjmz}j޴;PLEOFe*p?΋v ~ݡb5ۦ6ר ۄ2o_}"[QAw3ވBFs*P1)va|c+m@@D8,V 88F>iHɆzmwFBr7m}Ϩ տ- l{_QwP;;&/JfX <`t~c'Wi j'3;y7S.C[׼9v 4?J4W{* W-nje l}zJ.`)`{&*H!ŏlQEidZ;(w7+&shhO?h+\]EB*x?R/}eo,xc&4"u]P+ˮ[g &c I$$I)^d&4% mZC>w#;>W&u$^.ݰ:eEb;4.^0zv7 >n챣';u|AACwxg-wKkጭ|tLy{eDSagYP ~XSʆ3~S7簀i8dnJOq~WORko>l1-{_YVFIB+DA"m=яt(<[azyDIMoaRRzb)ֳ{c @|X?Eg. hvn9tI^Xz2(ީ(c5c'DވkM?M4-[ \a m_|\ܖ̴o1!:zf{HJvc ͲKa F$4.-Oxɣ)2$i y_Xe:d&x?*5H(Mj֚xcK-i:hrz#3n iB=[,oRi*&B) 3:amtH/H]o~L8Y7nC:V=Sܼ*|{dCaچ={LJꝩ,#&<VW)h[x⯟{e-ZHyk)9n3mC{)J1Е婪bG0+AS-?ev碀'+\.b'm CJ)B8M{y^?'FEzG3Ryјva:k^3@~CI?+lY`&wUVwlqό{Cl':FUR䒫+t#HXhtz%34ecC; }xd~zCv۬U~ptI:eSff#A8T]~G>#{c)naUgx@U]*68 |p/=Ъ\=A{#lj#1OH_ L@=|G$mF*4N#\Q *) :Qfw6wNR Ji\rfkEIw8}Ȉ/qE}=-"-YoC>5Pq~'+(3ޫy+ <;! פ#)geoos5]^6Gj@؈6jQn|.vd-r?&^/PPrGRiv>% U%sRmn\U]OjFA֖Qm=Zق~EmtFh2s# f$&(MQORI۷-EAͲ#ƧǯQE_dg=)^<:2 3,u,PS,R֌bHz?Ј2L*j'法e_V8 m[)Fߑ/GBILl g@5L'2pd %E|(d tR.$8?.RR37]!KpWVzesôڼ7diDi/#OC+lD{}U'!vOOӜ8ċv8Coub~Y[Yq3]f"aj|&Oc\' R⼕#LBΞj +n#K"A/}`j%"-fBF(zaח!K4bp5 *|pu o_E=6f7lR]!ӗǘz&`m-D[E!_hulyGȶ,/1 F2fכoX|-k R"4 Ń7iZq5zooln W7H]X< ct!^kתWHT z,1YGmN6, EQg#~$f'8qB +vQ/[_Ȗt ذWu\t5FKr$|6,J4"yE{M6'$w3ِBd|"6zUa#J&e2I"kOv7 TA{խ5F6pӭa7hCqn|7Q.(B-4%m ziwNt(JS5GzCT>("d[8]p=Q2G0o|R;Y2ى( +]%Lml JT.^x-ͯ/0wp3q~9wQydrcY>c%=|T<9T L&/A_}< z[Z<feyȚj*i] O63EQ u52_`qPTpGĞ EeTڮHn(u5GNƸBA-܄&FMη ߞu^ N⭃sQ{x%oF_C61Yiy4z]2yc54Rź7eW2+  }wOR(gxdj[|ſz(п|Ύ-E+zwI]W{-t.N4p0dٛQjb8k[=3ыdz*وO{6O<ԙ7;`c4ZTRCӪd8I:}w'?yl'4c~j 0nytV⧊.NVZcF>%x;c_NH?)>|ƎUۋYʥȸW,akD7PZ&OzYOƷ|"i{hK'Bv7̊T{Gvjx;36iZ.OWՑ0z8d4fSs,3j 96+>_zĎ^hTNM\og3g/nao eOa!Zk|d=Ċjb:m\N 6 D)Տd5®OXi_VLpM=P뿽CaJ4k CWtf)pp$.raCb/G7W4Ue8/wK&e7I`ZR3̜PRd'"YIbىU 7WS59Hkn}fԐ l/ιq~H᪊`Y>}$/>!7eQ&lȵ 6b\T6T(|;8gύXl'`ԔYʶ#;R& L*c)x+#oܮ#4~![q(|@0?fh$qyøOt6,ߘtę_tם [蕸RE`1ܑ{kX5G{Ʒ=4Yu #fՇМn޴ҘxH\q]Nc2l3ِD\mGژgO8 (ly4YI0LƢ# D!D=ioH/ԛP/'Xʄa Qm6=bnX̛D|ᢢ)jũak(Z4Zޜ'6XRC9 @DSxr%Sp[([ W{ 'ts.XK)dYHW*c79Mp؏\~΅B-0@G+9:掂S93İ"=~#SB܍OZ#?#%1cܮtXZ6`㊄gv?ˏgzڜvz~ọiGI0x$zϞt~%a%fak?֞t1wː]EW6>٢o!Uȸ Ghe?|N 5ᡮY)} 7PSJ:=0=B@\R pR4?9Nhm%| kX>񐛯^٫ W"v2ɷ4~&v )SﰫkHݠ[cdF,SfVOLOwv|RNe3A /CBFz{cBe7)ˮ xa(W[WV'VR>V^[ǬdK{$dߘHn|"K{{`]-gN&}7eyp o~M?r;@ȶ51{w sƶFfb}gNǮF%b9q^GlnX-YTSx~]dG}'%*ů;Ȋx$≃UVJNtab/ 3Lg^ͽypCՒȼ'IUoz>nOn9S񁉣q*XQr`:Ya rSovz03')n'hLIk~iVZ8oDnf啻[9i |_6&F=Ј endstream endobj 61 0 obj << /Length1 1451 /Length2 6792 /Length3 0 /Length 7766 /Filter /FlateDecode >> stream xڍtTm>- KtK7.K(-4*) RJ9w3sܜF0-\ D  :FB $" rr# pw )ew8թ@X tĥ$A 0$. P`0 @ {r*\}hEEJMB+QtR[K跶=+b\bCc"St6&xUR.vQu0FiaWv9T2=**Mnga6өli6a%4+p]Ӟ3iy0մ}Bj_ A]],̈́6k ;M32 O=}!.)v:jBy zBU6!],X$pZK0&Z@ p-SiBTP)ruM,tb9OfbBZC݁l︤X5GsҬC3{GR} JUɿ\1'9]P +uCERm../,HLC4}ӐCJG9NJ->^{Ys}ghdz9W盙==_'R;ZX| {> v{h4/PGr"*9ߵfm\u‡Qò㠭vQIȂ|K2 gD3heZ%"sL_|hub:6sv:Y,Uy?g麉?>V?UDOHzx/FQ?vwk;FO7'UN\0"- |pw45FxeunO ۗf\i#a My= vlMNM:٬+>>&,Й;`'㍽4-LbXu]IVbbQ?3,ztZ lʞ5<$%C 0٧q =pY4׫;m0ʰ6 eQj R?NSr-|7vy*WKE>v1@{tH99h\f=,Hd &'^?N1ԥ(}Q|W3 2 =xİfIz~_DܦI@gQhHXv =\`5ƊIG]3:`8-e%&wLFjyH=o/12 fs}`p%bἮ!<k.;mG#]~Oй7|sY/4ez]PYz!2slqqӓmQbǏVS x2LH8i*3Øߩf՜U1Lgx {j"}@PUy+%^w 䝣d,7=׾FJ/]ڑWRޞwq1({K|l&:@]%^9f N7 _xz&oMŪW:GAwB-8RvY8aC\ѧ}{4vwϫݞyxuuW62U{2F];>yuIn RU]9F'|: [3~@8wA@W 6#5gs6Z|lIGw?02j<$;#71[5uLJMdoiU[^)SDݕ, i۝ЮeѮ(v|@NVq1%Ǩƛ Bю-^"h#OP\1f+@KR^d~ rc v OW=Z?J$thIlA[q>uVMG ,u9^W[wlk6z%$D߲ljxPR 8̇! 1J<"5&Ih1lL\{h7D߶]V>~i*hn-Ӎs2mМJfQ/GSO;Y'h"J ,YCl48cYygRoL..W@%Flt?_c[puE!&dv-9 hetf0Q%92"XG>PP4C^h mE%PR=swͲ}iFߛR{,&"⌼DӇp {<>-Iz}?T 6~Gi\,v'\mgGX Xuk rgHhYVUbFp)hg|lF&OqRxOᱺ\ъq4J=E;5<'y,f,5 z!ψF]%''t{UG3PL\ AV* &ZKe$J{jxmG/kŘsDHpZB/JCURmEMwn*u۾$h9sQDH!Pl[j`NI(rC,?BqֲNׇd*Mp Nr-l'_r|}yh8x2|tw e›|囖^N[G_ X*k~[-ùj }5ϛ\6 2vLvm`@gTMɨk|.t#IoFZJE@y]:b; qyU4jLrl>___?k?j{{|Jk)x TLFu>X~Z6ĶX%zLaޖl"H]P Ù䚧'WlDD!d99kԥ:V# b7LMbce8ݢo1nބ=_(]IlUI_srl8. = %3֊jxlXmcB$a|WmݬBx|3@&my u~Q`00aCD>I,R>a劵vId; ם{o=M蚞uи7Yy䒢/n%EP]{~(|jot]V[>Mw)^F{hEC¤nEZKj$q82l(Z'sዼjc{NC;U_byi<ȑlʂEDM55כ~ۘ‹J;8;QKY-DV?{_ds,͛KOޭ.momv7o E>ra~+6Sm5wp1[ u(f\}66=ż Yɚu^UCh"ko|dɇloټ~& T9w~Bތ5[ZN%)rygHp 7ǔiˉKĶ XE~@OLPiK` ggVon骝6'lyF4"Eل۝f<^N$Kp<7e Myk$`j o3fm""NOl3IBNӨF%[侹u!gL~CYQ%1ɭv~#ۊ)N.cSҬ$y K!QZі#IձI6glƄSk҄zPQ$,r::̷i L6@K[4$duEN|/2PrW ^0L$WSnU+ц+fb2St̰5TS!NhfJZ_ҖBRۆz@3~&uJs08&w0tu3#T<^׼I$N9>8J!ca Y?[0,ӃvEv:Nmo UK jѤ %\x-Eyrf$T|:(RYi%`ypY1*Sԓc1&i 7B s_Mj!w:q|vDMf3;{~iٜU#ʑs q&uaڲaCJgW{+Y6$\aѽ..J 'V,o~RIve~v5u}>_̲]$W;ma{nG >gMNjMTչʎm}¯F V0f<%M&m M|bfԺ[(7rߝz-JZv1z\ 4) PkADO88[dpGpϙ˘1^U p^4:|j>]wG/ JhR47.#h*l+z[;-FȁejD`g so,"d~5B_Ȏ?ϹwEV:2H8q;et?/(gGkP¨(п!C2ihDNT`rZaG8pڧ[vn"x=W_Cnr'@wUFHUR$ĬJNv~ :$)-K8i48hAfu4\Vn1xUFn]B@sN,dvUoOQ7ՠ_.8{U[%9$oN _KDW2RI"?*j 0l ESW^stz'+ɗcq;!Un9GM%]Yz>=QHsu\[{b#rN'%UGvA 늡4AmL>I|)7#Əbo98f=Uq={X&B PeJR/yò! k%lENoFX=KеA}}Bj"ee X"O)}4޵dYEOS7SJV}~tNq:v/-{ 7`RVB#=xw X.v rZA4*6ƠTp ĥ\TQG A -(m9enʝ6is\⑇ˬX41 endstream endobj 63 0 obj << /Length1 1398 /Length2 5888 /Length3 0 /Length 6843 /Filter /FlateDecode >> stream xڍwT6 RDJD`Ih"^U@IP )7*(U@t"EEEus]Y+yg3}DH 8!0HA 1aHm, tB]`1 XR|K@0@Ujp,[ q<P> [܁Jp  A 8G+!#4AAp?B:phiOOOa+Vqz"p@c8À-!? o    Gb .H$dh poo } BQht#\@u]aNA΁, \ v!@u%# P  c.=!\ru#qXy} Jwo?uF<>K$ ;Z pskT8Hꖘ{AEzῌs5?4 'C ?,a~>i0; G'e1/%@?0tɚ0 oik i_FeeGHL($*AR[?B_-= x^E]>?ԅyt+JG(+"uw_v߀qExA# ZpwoBX%B`qao= 8o֛/  7Da_6–A ,M,apy. K:ԐP|D%$  I}5~("D.@B~@{p>h )0\Gl;CH˿C(LSupA,fQ.axIg`Nmnٳ}j8j8W8b.&q4=HV.]"2dmT9tpU1DvH~k{eI.#m/qMv;L|r}mΩ#z띦dp;W(NMMU|:Ϝbtz;+s|[K[7xo9&G ʩHTvIC|>ּ# zvXhOw-wzrK__x؇7UL&ɷg52j!j)b*_4};9"H»{>xӼVeI(d }4~f7&XM-4*Z麰[`*+÷J|@(% ~$Y6{8FTh'm7H Q}? NеxgWė${򺨈'~7h*NǛz}Zo- ܗ%.(8 ]}D7h-qtNSNHK-%r·~s`eΔajqfw]ͼ`a?y'Wf5=uA_pb&Lt3ZEوwI[V9\&k۵6ej?ڶ:={y7hjrJ^}J#ICH?z} x"7_^EoϷ+jqI|z2CMa=~+x?.>;B"nL~~{)04I7rE]hzcgrŞ,2 v@ߝۋ}EGL.՚~E}3LY]π7mD(#W^11zm[y+` 8k1WH`8"p!gPdaO|Ln-&텇T}W:UV;Fx^c~VxZe$2Z_,SQ? 7JS"Œ-gSowe?iލA_Wg :NSAcg3Ւ>xa\pѳjJޢ2Ff*hշ2*Dk<ꖼQ!E L1ȯ-6;!tKrJA3q1"< ֍} ӽk~;ٖ 'FyQwuc[ęTe,\L5zçpә1A_k`u +)pF'%Qc X*,rXgH;u|"WQkGb^nހjO"5GSvI ?Vy;Kʯ6yJb]LJМmP!G/3$$ג?=`Z@^ҞYXQlrgmVBnD3_hab؍$2J8aڗ338(3&t<3F]Ck*+&z{c1It×/-tӢ#Ku]j5kkw+Fz\@d)ڤn(Y[9n/G&FWc+ږL$K|f>Rxf"H~DA=45pQB;܋\MXo_V)ۙ*3ǚ2܎Rk".0pLMS^a $Ia/8yM+2Z@*a+s^mF mwܹ=zj7g([f Z˻L1dUOǛ,HСdyAJzKh(ic $U̼{-ǶSr/Ȍ:}: w6D?~ر ynK69i@CqgUjP0|9mR֢(+Ei1 /Wf2BjC,! vf$*~MlEMp ,c-pP܊ZbEsyLMt/|)|up`fTbcχ'~kB?X[_lvIQ&"BLcy˷YG+ [ {R \[ĮT׎n.nr>?hju:#QH9WbiX.JZhVpi2/܀SO >Ɇ&%L8,̑ %2s?~MPJ.=fAH5$`UƳfPRIŐK#7d.=xU{ȞsRA{^\m5Kg5TEx NAu?&u2U:[C393֌ |(%hqX+ƧfOJ݂Hms1JTe fR=]X~4!Zឋs"H`{@8.!.1xU:A7K,Dg'դ[=ՠT 9rZk=$3ڜ_%t,IѬɼ3%jb.ڸX(v[gT]FY[(G옊`N8*6:[zk\J];t?sU֊$39;>|g;VY=Te7 2N %_ivM}ݫRtSU-(t)Fq=vʆ9r,ݤĤq8+R}ydvI9M9Le3.jL=xeAVsE8.U\+JNp{cWz%nP(/zu9˴I8d|9.ҨIE6+]rg ')CXQ-xߧ~܌'aNʓR ! UI_űY>VMh#w(o9KӲ)@@ nK!a,S4X"ϊ˯F'DXwooj90a{` L/L9I`5U-зݼe{/xQez_n7<4mtonzC A9*6's&lxE+T եSkz.޿8U%ha誹ȶuF ~@[ϥLnsIz-qeqCѫ\<^3e~F9_\4 t8Driج}5o$*'?x"T1UsH-~^+%U{p#Ywq&aP⤉%ŲvTh4avgja`6]\+3ݏGwsKt4ljnvEA=d-[StjZ~^fZP3#mq7%-N&FCAx,6fD_ZI>_׊(fF'Q ԋ=|O{xJ_uN+3s0j?oՠjO޳* 2]Z d+_?xD9\WT"ܱeuWKXaC>OSzx'H0%/ ~Ƞ7W gUxkWB=p:6hn?9G$9%jJ tn[(k:b\a1=RZ߭մ)y xe+,t;c"[d4im35DcF5AH%DQtD!|  =Nݔ#0z#qЦ-//hrC~6~;G5ֺFMzIBg#WOU˙i.&?pg6сJV-Lt8<\]p_Ĭos&,yxVF Gͼ/ 9Xh%k?R+3y5;)y0\A=)d-*D/RdL}T16SI"wV$Av3/QQ | 嚟\8ҊA+}hPnչ[|2}u^c(h>H\I-uff[`9£xUCαw6z5tz0#Kiֲx̯0H38MxmL}Gv endstream endobj 65 0 obj << /Length1 2407 /Length2 20591 /Length3 0 /Length 21989 /Filter /FlateDecode >> stream xڌP\<Ӹwww 4.,Np `A.of߫^s"SQg5w0J9؃YXj,v&6D** o3"`W3j0):\mV.>Vn> &n s"@H% #֌_Q;3hڽ2@@ݙL\-A`+4dbg2&D*x5؂̀.@g+9@]V'Xៀwk`ebog.W'`w{߁&.&n& [׀unU\̜A`&yUY\hvAݟh*'?'kcno`7=#=(+W% daae@3+5<r6Nx Av1qή@_YY 30h GS 3{?um=|tTu>117# {ɸ_?oп+WO2c//k)9n-@g X8Y^^Uߖ6$jk/7?n;^z^ si W^Q{KryU@`3[f 8~?+ף?׫eft\כ󿔒f濯'_'.=f&{k u<_306^0A<f?,_`2;Yz^W>?OzS/yeP^ ʧ2h"?z3/|9ؾ,-vv_ SuVl%v&.V^Gy=k vаnxYl;:U5/nW<E@_ j(j_guk_q~-]fo 2뿭#>k׎_[*BW6'WחOUc}UI.@; _"rqy}y3 r>ÁJx/_v5/2W(Z <5fί:ej=f f!u!wE 'g;]Pj6oDGzv%i>y6%y4WoG\,: ~CĨ!rh E*ɕUν_ڣ~|u,la_K|1F3 d*4{H@q6f#wT.$[o-~kRͥ_'4QwiG@1i.ʻ5Ft#Z}s_r^^2 ps/ș\( \n$>[거N%Zᝩ̡ZxjpZd Kmi.&tgҏ\[$N-Cuke߭%<š x]=Aq\V5^(umͽ]ux̥b%ߊ]8+6?SCGU:`\p9VD'Be1tbHA=o!әVf{g*IQcXph?ME!7*m ]j.ҢQPnwޘRj5D|\VxYq@@%>"8~6u*B gReBSu4k]>kPxmA-Ό}ޥ2(osFCS-d\o FG>[Dlh6f 7O`(;Y&NdEe4Y X/ 9;^"[] 6~ VtBRnJGh~3uȕ;!j?TAZH![FT`xc؟,"T'mĮ< pǔK5Lv|Fڷ4oDQj`& }pq\ZS}S 2QeRRI/#(}j^T&'RIXY|_ڿٝ74/emX]llV} @hl9SxX;h,V?CEuZ6eYj.YJJ_*[q#QwCclTI5^rc\+6\3? TL~(/O1:ympa .ۍNؽ32\@%hbEfaRX̓ljL|zoU9zXLP)KBn=PU՗lm}j'aYhGbvgRp|K)eÞ\A+b)<{N+w259^wihǞ:d1i nfvذO'EêLX`37-~evRHhҩ)0qbnfW{Fh[ҁK GHV[rHxӯKg}e*/㠈d0v(sO+"ְC zϜ !uIIBk8%=mW5-öWN%Q[P.fM}x.*2 :{ Re9_EEVfRX > 4!z7ߥkKkkYݓb:SNrT>2`ބCwIdbhsZbi߃_UxXķ@>E3RdqAyDW47/:Qtɒpޖ2\G)+7*Eʱxna3ԇw 4+S,g| f^qD~/Q\vHDO+<֝u0kUy6$>G{\Vt$"[G O'#۫gv4(rݑF4\甬_;J 5ZkMnݾ@K"lی9}jtpLp-ka ѡf3z%%0>6Q&.>Cgl^~x%nW=(a7KLb([tūY"<S6JsQ!f %J=!-E+]G~.6u PI)0)?yw3ڥ@GqIPoV[#TتR*;o):ɧ;nUxyOb\x!iwHPNWO]f%5Y#gkRqcz!Pαicֹ(+xس.ܪIoF%f:}2B>mbaש ĪF%boy CLӍ^~8 5VM4![#0v6 8Qc:ۉ$c^_mBӧ ݎp2\@B '5C`k:9'9`G2u tQI3d6ˤVpтfs4ҟ`11YLmT'U=Mmu>HB2";ĔpUMO d~j_vAމ@Tb/E5d p1xN264_3n<5&SN{bBBuR0 |8"0rw\ >}>%6ŞVX:*y#JԑQaz8lˈKpAC OۥJ ]!9obK4txM_[~7ًsmʛNy\o)zya{Q*1^xa3tJH{=J22-##3HqIDȚN@xO#Be`ou%!7֦;]"9;1,:|dVI>!3K1׌*u`pHtk8MLğ@ ?EuEui*&[}ؔ߃%T}*U X.Yh4_,Yk#V!ܞМ]SRKi0AeJ=W2s?H5NEq^uzYj*ڭ;9,Z$>2-k@N3ڻ>CSjĞQs lꝻ`1@lZ>?nsĕ3 Ih A\@E2 M9\z25"4^gр{ ]skruO*zl(f(d\ZУfqփ;]%fWl9-sk֚5+x!VDyhiU>SP+/ ~uby^Q Hl KrYk8$UDW8f'+xh>p_]֬ӃϑcrnI} \5N&kme!=f4mh 1Pmվ9Uj_S )![Ϫ<ֱghZ=J[GO v;ÎN 5R"s*(uN5'4fEd@ry|B{̒ g_\.VW- _{~Q:+Jӝf tg5-ݡE~$<~QOS=jU4>I2gNkEL6uX6 ɟ>3§!I/4%|.ӓzq?o+yFpDgxzqERmiqw޸6OqA7UHQQO›3,$QSQ ek))f-iy"0?M##ZrB1;dTUTzi׉R@D3ا7}ɛG.cD-aaɴ#7i ,kKbNΈ{7m8,Pp nJ h>T0V-l4&E>~]# aU8?2x 0B(4%/; %64qfM`ͤrv𖑲 -=/ƒż;0FW K *Ox pG]?Pq)2uñ%՛1w1k d|o&B~]6lA,(YUUwq%!%V&g)|)k0IQOHo9MY,Neqlf*ˏE~T/> ;7 <H:)#7ӆc-WY7ѽ֊TE(ѣ8˜` T=s$$<|yO2 KB^35w9wm p%{N9vSi̤?UxYYS۰F W؈O$̘`=2bxprFBE5hԍ!S]]ϾJM&<(;Ӹma :ʪ;^5w0yB78 ۺPD2A4K:!Ւ]b@" '5(ɒg2w Kz. "v0hɖ!YtzLPw-L8@u=1cDVrH%%P&q*\h\! x:DK\DĪ[!ܢ/h:](,ů8hDT*Aݏyx!" l ,>wƊCE254f )] _)]]:"̲LX|$ TXsǭY56ԕ L < s:($y7‡ihWjuB*k7}qbҰ 3i"pI8zDvs uM5O*)bX?"5w|BAr~, pt{a{6ʕϯ\he,qAyn(-д{CcO1(8: QO2IjԀ lt~#L$t0lV"$Lϑm:#sIKŇ@"{ɪ6GuF'{g o(>V^+#[pO?2E|hGNQLݱI{ubofE^6IŁN.Sacb$ٽ+;+i~Ԡ{sC%*JapEb}]lRR3_dFI PveSƏ/M.wQBk tD\]zR>Xt N>4ss~Q$ N"! RH]n}tYRZbz1iL+Åt%-qǤV7Eynxat Qj1DuNNs͋%Zx᣸f~D w(ǗZdFՖdwJFa;vZ4ـGYB|K8HjsQh㑩AF[r_š3"Y!%&8sov6sOls-/)/**<'"M!tCy^솔fyH9_XȂTUyi2=wI8vI:_[(̵SGg j;^ 0Π$iZG.3L !3K_QHفJ hzz^rrʼnNL]=ce}C 2 Шevߋ#c<R;Xl}t&g?*gUSll@Gΐ89v/xh_[q\r,{U.TiHnUy\C"UceϺ~#ݝ/rc29N AadeFE MQ;j CJM ;Waנ, A'Ʒ"@feͪbEw4h8Ŷ dTVS3I7: q[[KS"Y{Sj2*|n*QN6֝mWsb-m[0k,^u 1D;I%RIҏpRp]K% qEXhO+zs)piL㿄@yFSlPt8Eʹچ1fF;Ws-oh,85kw& nے -3d|&z 9WWfT9:e5>}prW{=_tuO/52LD.ɭN\.*.I;ݧMeľcw_=w#rP}Q 2ٌ;K"/ n3¨aJU6id)d3+_hf9UTD1"R'G{SE7\Ʋ"C w(W=$SQ\JY^m9 wŕhV#^JrGN^lr*%YkTf]TFYcI7xh|>Ȇ`nw ,&!yCnȟ6~`/d8ir狿nXHÚމ3;c+gcSteba<6):$CFxTR=ݽVPͪ.47);}0JΠPa pB%Eeg80p3agkfTl"uqVoeLe'RZ&y8S%@1)ݣWKs3KDmmb5"ϛ%EEսb? un#=>TaдiZAK'd0%$wŶzQŒMH/G՝fw!N3^sU`u Z)/b ِM2gمhiXT-KI79 <Ȼ'o%Or oC vλ55{К6p@z]Oۣmnsj7Dd|4qč]b303:YG m]dPaAxl:_$ߡnLc%4CX_Fm]X ClrA=G|e|!or?`tf.ULYQ݃+tXl 2(U62"7޶H{\;i_^5rV{+ z"x@ɥQ\cLlIYua%58qKF-fL, tZpsфh2vֱs3]]A/B0"/G( .hR0je #[{l@9ڬ|#A-n!.7qKSY~G _*=&!LŽ)d7O#{ g^0 dЀߢV<"IOǍNC:ؐ V{8\h[s|]Kulv`*۷nYP곅7!|~tuIst2"3G[cgPa;pT7gsO>fL^}?oCyQKC.07ѫMp$X2;=ϟSYȓk骾rOK"3V;v(|or5f p@Lp.p7\[FkP&4:IQ~:CuoٲNì _^~5nAlև"EG󤀬(N 4%{ԍ fmes1 1{N"cpo0/qڊ7)2"|ŽU:R*Bsf5&npMƝofbV+;@vqWxc';ٶ$jג=)2߰~&tޖ*zFؔ!oc()#%qcq6u]G ±] k>'}MhȕhjkyGg3dD8}k7x|ӗF}!vj< U9K▔E M 7l—ꯛ|BA֥挲ZDGi(Y1MCчX8!ן/NB{!*)F}_R8}kpJ#la1%00.=Sɼq+fZr(ljMzk,<[|`T\Z^[1;hKjҴwd8}m^19ŨLUkgXJxH&F6J<g¢4mAd\R^3{HeɺZ=3H-'F8mUmܜ-| 1HʒU۶B9t98= '&DWueXM# )`TB*l|* 0$ҧϵ_N{KrZ+dgo~`&nj~#jGcYNjG$ϕ,љ9M*XehX3ѣ Aq DY<3/L]b\q`abԋ-rh(xP˺Db>>J]hʘwMWn}GDh[&0d~HLXiaY^t~ snɣ8@R+b)Û/QoPМ[b)nQNxA-JP/ 6yBU:bkio䴭 w"%1OPP LA =<[2 orQz)Wġ`NC ^)&q$j}I'o*״ܞ3{|;Dgl[|Qj-dw{{f7nGΑ}7F8%Fx^i˞8`.ΛwIBFPpseb:Qp"lܖw3mkbeC}2o> 8PTHNsuх0%IsC_'kIVbo\yޢ4dE=iÅqq̹Fߡ5¦ax|uf`Qn3 ]tm{Ն|{zPwjRr9;C,  \tH]Y3K1jvs'!@o,R)޺iA"o7N9_AI)0DZa-nJ1҅QڿJH]@^ ~{tr &Kayɔ>|AмRU-kIEWe<xl KęDd%Xa4ʇIUrʲmj\=qӂ5bSԋϒOYY.oG@UC_,!\c~. 4YP CuTڿ6 t9`(>8$|L3$<="q1F|GpPv[,bmmg#<1dYv^tv"Gq:"q $?I8>k5YC,Eu;;&IEn[F R.mCQۜ q/is!9gA< Mffb&ƅ(kֲ")LH`\[\x[;0 ;?o[S<N Y&8bƗ>iwK87쏷rQ!N値2SBSDǐv#K=<,9iOZ:i_m]}YddP܃p$>p9pyj$ExP~1A! }|^&(c( BYrY51yAVBˇ?3ۨK)uU eXe·eJ[Vp[KRC`ܵC1TtGG.,V! c#׀d(ɔ&&JR/;i8xLkܑfrkJϱvEJ[2U%6f];K˗Q̙fG|bN2&eOqjLۢ7p-BﰚGh$b&hRyiX r u(eC) +f:{fJg /XO^6 zt!̋f1Y0ݪbwѠ-ۉUf;Egiŭ؝|Qv=IB uFCTOa/xS W:EE?(EPЃwΆ$VxWRV. w[I(QytY߇j>ͮvm>a٤X?+sdCM#]>&J -x`:]Wa{"i.J-<%!sǞTOW.3+v{?U?y+[e@QJuErėnL*R>v]i ̨?8"&-e$w HT( ,C&/t/VNBы9sZŴɪw逻:10](QR Ri'H% hɹs. wqSr{#~F B?7TLOGG]B'{W Y1)Cf.`eM (fMı1,28?6{SSv yD>*` VkBnE&h*aѡ9}Ck)c x4‚#Y}UG4C׫t-AcFFHت5_f}LI/2z1kvl=o 7 |'(lC7u&4k=m{.<2Ws}w Ӧ14wu>vF 1IvPl[_sJ:?X]v줸N3_wرGG(\2$*<9',PLiBXy/oӛֿ!հ t0Ft V0zu{3$#0p/PK\~%>XEh3f\()? ik9<қ3pV1JP.(D,ECwe|оQtw2H3!z/`Г&f 莉 ;vq~c|X.M̡O nNo Q_%69QSM r$#|%`3A?dȭlu(iLM;| πJS/%pG@7נ_Xob Kw`݄X=t%U'"-j)F|;fϵF3Rsrlucݰ%Qxk-/tJf8Pcy2ʛG2v>/b@ފlj%&Oy|ݷ{/H&&!p}Vh(֫VLp,31ҊSԅx̥2CRr-QE83V9sHSe0h85[j40g.=_$eo7^#وE=%d\ MNm~|WΪ =hN]EJښ t:WCkaߧFmDX3ǁ1x^Q\ ۑogVHuavf:d#*s';B1M#@1G%?40À]+c7З{+0[5CZ\,^tMV|D{P6^XY 1i X=.^k a1e $$2q.%ˇzaL8eMg9-ThH5hKQ h79(Y:,v*`zu]cZ^me#LpmxW_b(isM"?H&a;ޏ0d -p?_![ZY~}^n!J%{TЊB#K@暤+䰀A<%E2|V x%( ̒0<1UT&QOZ eV]hPKawVlU1@SC$Xo—A] YZiZlcJ4#WAkW'R.7@@ =,j;p),QϠ!nb8BS2ҢPeNMdj;\*lRԁ/"/. $hg{ePKH( |D؁~9B*;{UUUKI$+0]NAl.]TRYd7v'.ʑY/i詚Jk΍/Q)H:DNb\c\ڴRMs1 (=qu$by0|==0Y\t6%T`Bu(og`xcBĖZ]1|,8n$6Jtl6nQjXR5O: _>IJV&DgBiq ,{sW]T9aTV5YKiN8UA> stream xڍTk6L#]9tw R҂ 0 )݂tJ44Ht ssZ߷fgkfFm]n9%D 4t@ȏ̬E8@c3@P' AB88c(ܡ`@ qfVprCmli `bpsV @8>d9t`E yy=<Q9,zh\Pg++w#o ;y!Çjx?mA1?0ؿCp?>ܨR?'CdW;"<Ѝ?7k?0l O¬D]]Hu')m7> [|ꬠU\/Jl |4`&=oѰ;p~}я~X2͜o=CMؓ`r8wA-o?Fx?f{BߵʘxtG~7UbΧx>6C1潭s ;5cagE%U}4ѱ1C0ڪH+V"^A^HGl6{Έ#\ǖ,+N[ςQm33b2ic]KGMVG޲!5{VN];c`G*3 ݚ֔\ENtnb-41OR2ZqM붿gV{`_G$-/"*z{ӄ>F2.B{VpfXlȴ(Sf+OLb~4+Dc&Kz'^P;(ާ7P6ĵJåBh=yQށ^hd; wXxXьYB.}_;+=_A _ܠ2h7/1q0zFj#4 5(u &FL%6 4$%^(EiBޅE3M覤+_k{,]XaQ"TlI aKMчlH>2v].jRq< \[D`]'%u.gd݌cp%IEesBQ>M V 3L#ȡ33ܞt4/da!D3fO+⤎t|J"&>*QfUB|`,<iθ6^6K%'3hSz$nl#B3݅Dfw2B mۢ^{Ղ]Ri$϶&X  kھǭstR, joB Gѽb}}t brFDT1)QF,aƽ#+"#{DM-G 0`ٖչ&g_XQ"Y"E: qS,-' ϘBT83[L{]m33Q.b,ڶyMQQ$+RUm2I{6j2IMi:O)ۄ:+0f渔i}8GTD#)5uq'LY*Ii}gQ:kBk^j>Z0P<;P hc4qqn~v-Fa53u VAjHN|te0?8N$qFQ+SSdHdÂͱ j!G_fja%U'SIH`p<ƜVYؒEĸEK9RT2װ5!;U?[ @f.&zjCșqVpn.LvƚJ}B={w9B:&=lJrjo_ Gqэʆ\4'&Nxxnq?DLfI? giI> {fPZcbȽ+X% 3'ϥqAIF@pB!ypPӦ̣1Cn]"CLq#yS-m`_%i?G$'W50sp]rp\lf7:E$ƌfJ9χMPAA^*}VKgK;NNj˵l3ۥJs8h0YUSU`'GiAS4; ϼ^П`LF<-< ݪ~E0|/u%Sͭ`X"UVT3RK Suj}ȡ!,;>LSOֈ^#m6&F&N/URFj/s<ݐ D"s%)7nUTÿڌUk3'Tuv{Mfu< 2[xεS "@NV2$:VW@K4}X9`2<)M$MLrh)a R-;wј#LS29+df |#ekֈŚi_a:Q9da`:̌ x!ϺÎK]M<%5Qr]D\pًjF𬴙Պkbk) {b q y:dz~(jKAHU/͚a edO֪[L!zYEy٢UsTs11FѨIs׽_"ߜL>ڲH0N9aQRB yoEH?{M6."Rm1V]ᾊo}(k_'*a#>'0xIVH -4pUbď)VQK;CBeq^iVf4HUKxek}%iV؁x|IB^ 4% %-Aلf~ Odpivb1~Jv}ewHٝ`x%닂ˁs㠨\ȈYb1̬~k`Mu&Q(&IAZ:n*}8Hv &,V4]9:`Ee2~9'|bZ2 1Q[xTJLt ~4?z`ډɽWT.'QR}LHnMdj:*i# _FW)/nzr?VHi+bep) = H\"zjutuok%Vգ/lDl<,@@B.>}J%iU 3[0[3Ǜ~tn ЗtWsLVSMw7W}[7+5E|I0fϨ1bBjÇ _͈ud+z8?.;Cd 4Zkj췽H_=}&?)Rp'XUDZMjXS'PJ x sS`~?Yf (]#o[jr15'#h` YrN%dF giΩC1+Vlœ|¦ ҁOǤ>6vL{f6ljlV[s ZhQűA0qsdP.U&oGN- &gA[P<WآܳsB;He3)FeahehWY9OG[xGܱffExFqf$b[v:rԌnQ\}*{3WN$/ƕBv{ IHlQjIHTĹ}Jsz"n&rWvӞ{[EPg'i@.4'̦/T74#RE3 OC2x\Ʋ3ؖ":[ o"0#w)ָyE,P8UCcdYvԈdAx&31z*PW^B\&$yuSx/,[宽NRss[ڔ9N ײI`.gO,=<^6|nݙ!)Rv'7j2gpS0 @g{YК* ZqNF4s4Vmk[î^ a8 0,Ş9`*3i?(\c;>Iq!?s;x6Q"Z4 ucá~_:$IsȍvYK/YhlFߠ篬أFmeZ{YC綉"fƭonͫӀ2ZS6{@t%}΀hsbiiA N % o;27w Y2mMS!JSZ'!nvR ¹oߤ0{P#JTM|y:}M]_#t nL5\r}ˋ:w嫝a&+gȊ~_JϰMżӮa{eǬ]<4,Mݨb萢8)x|m9KOKratP4mKm*lWp7n~]4w|&DEM|@L\f4*=b1Oe1g =Ȯܽκ= uy.Abb+ S)oZV/VY5'?-2Yhwt'"Rٯm䖓X=-՛fc zgqj9$jyuP>Hiљ,sj?pGRRH49!4eHC+-%Yt/B_=[% l-%[Aώ94yܞ|UAm(:%29d,݅\ DdsUQm9`cdf`pQȲtD%4ζ4\g,\)v^ amSJ1 Θ p3:hR[H ن4> MMw4&UQƫeDv /P 2 (HC)o83K3H0gK/:jְp_o߉rK ^Ej?t cc&o )-ї|^Ypu^^z(G u} d.6gHV(wTI&T =PؓhCŒ= /'Nr.+ oHZϦRNI?aO& 8g:wlq9WܬQ8 d4IP43p7R7tXZcN'U߱N.cb-`){w&ɴ?މ)ݦ3G<ΪGc=7h \/sF;}C}ҟ|~ +"m CW*ʣZoʅG]YOzR, }7-2GUi.$ *[OGD^\ɵaأ.)o^,( 7a7*mӪ!nKX (֡;B䊗/yEO8s ~H蟙n\ hZ toKycN{QB0V8#,S3ܽI·k ڒ :X_nJsj*$R;'{*jrWgnIlr=X %+BOqJy߁Fe^A//panʶxp65]^@eG ;گU2`DO k ~'KaMYgIdŘVQv[N& DS;3dE\/5vy ɰΥY<m%2;Åj%bAcv+Lv?1?^`IBM 'Tljm-ꮑqE_eQԚeP޺uhx ]03ELDWqa;H29x ggMJ<H(F :N0q,W j꺞2jo~r5uiLZoGذ>!]f A6|-/-QC8eMQV yyy6UL-^k8vyB'c3VAWjc\>3dļ0̠-EC aO][晩[26'ܿbBN+ZK_5}ٚ&jUe|=仸?IZi*f!o?eXkC]2l:>.NfO B9\œSrA~{H%sLWWs*:R A,]#mbilP;F̗ZrNM_׉!7 :VNd#3XۆףFݓ//4w20&Ppw/v̬)M1A\wT1d&6zh> URt KIc~7:>ӨT҈G#)2zHPMmv ]7Ӟ|47pIԪvx0ɚ7Ulm4pU(';t)-cD<,F<wTCqYHy9o`-R0̀_W̏n~OY>]QbS^KD'UϦ47y_3]>QN\2`$NVo;Taea{j-f۬1y43%|.*Y#L >^-._ެjHxū̘XEkGƳ hoI4糔Md;htޓ0c}%s(85ooiNnH5;frD&ӹV@ ?aci0 n_^\Vќ$Y,EOu zU=@`aƣa+ĥ[71@o:E97)^%tJIIWZRݘ^NTci oO(D!t`;bm:#xf/:vQ ~tq@Qt*hZF~E.,YA9[ 0_Gn!a9k:_S{Q@kYL>ew6OϪ1_XU}-̖RmgkĬcy¾uEw#H4ƕ(Z匩Cud! ΔAhTR{Yf>gPxq_{s6[ ԾIwJN?H":M ~)#t?.y50YxQunY244\XXZ2;G /OֺVimCŧ <щMǗ* BRhU:<A\|DgY9uJO@2|,߯Wl0ёfzk>?pxe*Rh> stream xڍP\.L7 iFw #3sgz|{P3CLAR{(3  9X@vd 0F +@ d}I@!9[-7`yqH,9=F>/W=t lfbP4ZW43C Q XA|nnn,&v,'K!z&jP9\AL@VƂLа;iWX@L@g- dbor < ?4 `e@S, NȿO*'*xV'Uoi~j/` >/i/k/` >cd{f|f >S9 B\Tj䟻̐ f5m7Un4)^N.wh+37D?`J]?z6!${?ũM"/M L"a`S&хM%ƭOڽdu,ta_K[y hrM3 (̤H X?.f'}~䥷vsLݹPklط$9EQM,yr?"( {'%~\mEq~A@)&^E;DWڽ[N8bCfHn0 l:h딌LǧU⮾T2O?©^LS`SeVJ^J*wzzd!(#VBhv7 R;Ze^nS-N{I'FShTl'_X8f`@ŋ*d  U 9Iy=p}jm3T-u$^ Kg.7״GY2zfTo`vGt)F< ˈ;L:;Y5^_L$] uٖ&+jǔ ?3HfkщzDa+%X$-MPqkgY]ٖ1p.xGm+bѬdc̤ۓ i3잠Ѭ`{v+Yʏ7ɨ&߀HAbryw("G"o]Qyq|E0R 6 F? D9]r\$A-LcS348 s&Ȝ@Ƙd0ڂkGN+&[&7_[Ȓ-JvP٪ɋ~۷Qn2X #3/s]Ű/+- |̧c"Vj9e.$J>{!~y/ZxMh&ˌ䫌9DL%3 v }Ь(ȅ xzP .de5&!d̯ ȑ~* '!tkdeq[p8S8FB$ D/f֛;a!RBA9נ5n<&-`1Z쎊m:FC4ȭP[Wgc hJq=7a5Ĉ4ċcUgA *xۚI}6r-E%iCWU'_"Tŏ3-+IbŃ{aamxuse+;.e~ #Ld!EUǕ=R6HVdʾ="^P3 igG%B5RKQjWF4Nln{s?Ul.F_ 9-n!R* |U=XuxU:&A玼feղDFj|Cw*'(IV B\v`1Vn7:׾#ZQՀ k+ Y]w'(\H[>[l'lUӪ{@VK9I/ѱ=h'*ZbwYh+(eqr\\Uܱ2tgWBXse #xZ%6Ljߴ  na<,z 6V "n lXN%>9Et9ZyjFl({n,rZp<ۙ>gۂ"QϹԣ^u I@6t,EKBl]z@kN,˒˘(_WY.:j9BįZDY<Li"0EP!8w{:XVf Ei4KwChG"WC% *u 0緃nmxs z@: ;-]TA];WZh7%c&vaDj^Xi vJ,h>k_ I&@*!+RΓxZvq.ePqz.,xlQэHw:б bmڢ6:i\J9c@K^ХN}sMT >LP}fDZ|wLG+&B]J>m[B'BqAyT2v6Ka%>706Zk2e${>ҭ׻})16d!`i] b'RJ>UsUnks}xܵ505E: ыzm>G5Ĕkx ukWL>?CvP{ ]=FuBGǠ+Xok3.^bzC.[3 &%jMt隯*$6rEMq,ġb$I剒e޷dnV@9*Z{T>5&5mПwE5s o[2H6~jAL)׬k 'b}thmlt_+K3N#[.?n:/GUvB[xo:UET/E הIʃj s̕t0~bk0!z\CɷRG|dIY"ˑUiRlI֠ob%%U7 \" '#DŽ*vU\~ ^A/'34-@$im^Vk3,ANeXʱ×Ґ<˝|ݲ\12$Xx~nqndfl_]؛WK)!S.FYf:L}#9>/`3ᓎjz^aKm S[?mh=~IھG65 Cqh@k}{'S6{aYs ̗B,w-NP{L^2,BKİ Yuw*Xylj~ݪUBK%Mhw_/2ą뛷|_IF՛K  (mnCp0.47-|,Xg}z+HU| |Q%\N~-}%&R|="dC:(e=ܫCLT j[ ÐXOPzc7wK۹OXcǰ$ y4݀a00_*^ {P yǫ۽L"W:/-›#LJXO?E4v.=#QPV"\dlX"pSKTʧݡPQ`XqQˇ4.}M0Jd;2Lh ~3qH =S),%!At O]NTK#*d B58xӇNC=OY'VVC'φc$Ƿ9oHO6zIj۲0j"AeA>ىR1Ώ_\ʪ/`-GT֕\73 l/^Ĉ&Ô ?]:g vGn|瓵Ww&50߬M Cm_f/T0^ɌΘ/;yA拻Uz&w0sTu(rHj*Ym-{/B 3#x,FFapJa qnQWc7in帼GlDxz~0:t+u]|_a~\<[0r-qf~bW57vE&{W(j~%QlKQT<)zuz63Pt|51A_;a|#}*ys %d\D$ޕrJ/GCW[}YX IJ;J=}BQvmwTAyXjv's$Vo(J:Gׯ^{K#k0)~.l%® zktdKצd;ޣ T2ו KL˯괎::<-k~<% )'z<*@\5/zqAŠdXU"t$&@68_ yeEa};wSa_nʫ-CB̙'+λw^ۘt| I2gh>VVV$hm74=H.<`+.YQ\g th5Ay%J+vw - @A8FE`2AlX-5#X;m;/Kb8\-(b Lf> 1EѾ^n QԌE "ƓkKdsK\F-R9ŷ%bly_#6p3/SR7j/['&>ΉSlv#sy("yƴz¯ o#)_P0X^\y@".ĸ=l##rs\ۑH=*!঎9n4J]qtN`N-ޫXl[,glsb_60Q+`*tNq2jPg8G=roJAA?Yɉ֑f%cҪ-GAt%3lN TԾrdSl-DV(/E r-K5J}LbcF2{2viA5kDŽ%-n:f00S u sQ% znS!@M=8v3: K6J=X`(?E}X q9?KKT~= W(ԡY0|8wy-Xr#/="8x~?QFq_fx^rc|1:;Fba]ʞ ZX*ł'@0*ʗDHkN})32ω9i;Z\w9)`B/߽t 8)]bOGoߴ#=_bDT)5)lM[YQ퐙4v!8veU4u*%̿++gTE=]wBٳӴ_nŬ(N~Y,X`z0M<,M^-b$w(tPf4VH@r9bJmͪybRL.`%ir Abd JH%UIZ(sx%IM_:S IVRH /`gHAT, >'F.#`% v.X. *^~x-8 }LVAQaF\9f*Őa )&,~s|eIE7~Hk> ؁R5X&F5aP/ V 8}oGٝ./;,/Cp˰9׉|gQvvK/ci%bh5ÝF17ltܕ&4mdžZ$EmȷTHj'\PEF'YyEZ: /=evCJxDJ}'T0hIEP8w~0["% ,skDW֡e>ܢF#@.liDZSU"u.UYd d9:j"0"nQ',JI&cmRM~YlW x96Ȗvj^^bR@J+fBDٶ"WxA]FyE^4 NߪH!abiy lv!c??٥C\ɧh\JvrnOmmgw2 2D_̾9udo"1k3H t ^)m6[ \.u<])fMa)E[.ٗ/l>v4]RȉJH"' ~&2Ch]݌4_iAϬ=3Jaj IעOýui wkS QMtjw *ϼb!*=Nz^ڢN0GGFĜJ#Gsu*9–xæSwfW`xHSFqhf\0eTv-f1迸RSMShN\'4u[+(jTjLKɁTFMplub:>;3mGT3#-ڸ2?u6+s+[Iz (va_:0'$yxU=U߽WLlWR2ZjMk/\{F#[(Rm}U¡tTUM')YαZxEĜȜJdHwiCZQvUjAêe >AF}arݭov ݱ fY~~CGp(8CZ: GTat8ə LjN<4

%FE 0'ǚ+*9 t!+Œ endstream endobj 71 0 obj << /Length1 2455 /Length2 15888 /Length3 0 /Length 17326 /Filter /FlateDecode >> stream xڌeTspwwwwzp ';nnNo,/Pэ `e`feeGҴq Pi]\me#4u$M@JNyw{_C'~@ tEprvveZs:_1#@hnjp2yOZAk77g~OOOfSWf'+a:F5@ tZ~ P6u3@o ۘ]AN@(?@CN tXoF`cf'q `icH+2y1L-~ڻ:M=LmM@o SzO.6n̮6d4h)G '+$m\{svNV,ܝYm޻$c!Y\̭Y~'vd-  zn.@+!,lf@+G?Ab/+l?g♅EOSE^W៦Q;y|YLn6779Z:4"gOK D`  YXA//&iw{,hk0u nuPr-5J@ ws3L|̜m\m6noﵳq:jLlG5s;u b_* h7cz#He-/>X@.PK'G`-X$ ^`X 6`8,r7ES@ P>/(ʠ2hA?ԟʮA >ҙ:8.$5@30s15K?r/? P .P0s'{+-qpSoX RD5 ܿA49?.,m<_ ?AzO&B 5_ Ϳ hv 7ArA˿b.V?A_jP _g ?GhY@gng䜠i8ۻ87?D⽻_CgI5/64D(2@' __!(_˿Gǿ hT `e7w]@sN}^@s%'s0ۆ:1BO)An7wRN8{]fenfX p hSFAe~ 8ڙKAXOG#dwzTN؁U`I*Up~TPY#j(2H#rzYݍa4$!paFW3˺[A$&"dr Ng } TRzlV.NSxz> )3ZgOOɗ yYOs6s_M(>)ĉ(Xщ: ko=ſ{OڗNzg܄:궍0)]y~dDvT7Q05(":c͞8TY 4bk7JݥY-Z70ҿQ84ƐTs:H1a"VI)&f4~PHr^XKms}Pix^ܿ]B+wp yFȏ9(uj,C͗#4 ^iV]5EU['8~4Ig%ҵ8Up(xа?05ꪼ 3FHCKȚi֣QP# E{N`&b0o%=&u*$gDT4W1w4͋{}0G>8ڳ1}X#gt$fާub40ƌOl`:Hӓ͉zCFR9Au[cuEnI,L2d~pQS Ʌ_z "3ƯOKHB%&'JcMAbm ] 3C޻yj-{e{lݮA1qײV;w7 L͓4G,Xbgzhnb P򮠨cCġ ӫŐ& s-mZB',j.Qm'& 1K#GYY۵rw{QHkE?\iT+EKeW(.ˎϨ_04YI UMσ2 \ta,RN®fK ZCz/v `}ٳ'nbPSYЫ-Pc0/|Ja ~ٞHa059vUt=޹(*[9d==?=CuVeq# N 0DZ6 *җIcc rQ5l1һLq92V'ͰjUi[,G`]hI汖!A$|+ D6w~vk&ଗfw%z*Zŗr||VVмXozGLҦTwܘ*FphΧ.ynA'm!pc "it8:Uin>"Q}ļ^VWv3N{>D2zBJ\|,M;P< l *x!=f@ܦA۸D0l鑔ROluU∸EVIj̋nEAfu gӋ%;Vd/ʸFR~_25U'ޱ/g͜G+r®3+6}izQ@eĂ5Bksg Z|vwB"ju"g*53Z:/v60_Q\CQR13tD1Pdau3hXgh]ÔxM=c'p0l|_;guCYp/453x ZW%šmyHK%N*WpVd-~ 'c oYך\.Ռ~6eM_ nJ հi1HT¨_#8LlZ+ "s3`zfHgp^=iA-@E4ARbsD'c縂3! ch9Tx 9arZ/̴03G 57sQstqVq,+sRDÄӗ7tγ <$[NFkvWƬ,nuNL{Fn=+<䭬otv󎈋49[ ia[,# .|Ly,!IdNwȮ {h!q27sQ𽐊3c#nAb.<"Oŝ0"8:v0$sNVvC60ZNxI}0~glƬ7]:f0q4g"< K^jY:mIne[.<'ijA:[ }#*C_s+jJn"IV<<:IVeWBRƕYѩS0B=kUkaD|Wefo2!zGtZL[3=,E*B}QF_^܈cBM1$WHn% ~0˦n*MI'0w!QHG޾ ôMŒIY|#373 Qx$DQQ ;*Y)KrB+m1L೿(ֆ8WYrxE$Zg=OPU_o?,x.9:olh{^XE;aP3!f6SDfR S{`=VШ|Q#6%~Uy::ݞx&̄#(R3bCbsaѬ8K m@k̺ -%e \8e[:Y=+8 7^HgFuG}5T[0]ҵUA ϖH[[iz/mjgN<]qKN[>E˄`?v} u/_G2 c{S[&6Yv}_"7*8ffvr+H6R `5Ỽ56X_T9J4VrI>^5|0"_{r@t r-fU\IRVIl'mn>I.^Y3BrTU >Ŧ`[-=rh&ּW,- )3 AQ7<^#<鏅`c[9gYHcrq1TfT]fȈ̩>< 7Շ!X7z/yljFna۬P2>Dj@AT {J`gC>4|?i{D7Ϸq[3 5KǑboO(WQNfm"9 Y3^^,(=gkы S*+u o8/5V4.:(C |/2RpȖɎLzE<-y{١e%o[wC0ՙ W7pLB`%7sqh|kSF8Q*2')_k19ډB% *>&aKybyOu%"b[,`?TGu XaL1^ZPx!ib.[Krt" V,ٲE#E/}ɵt;cM{W˙r{< Y[I1c܅8QX 45P2-=ۆk2\i|0eɉ@:<!p}Z\!h*jh'JPkm&Wd_5 l|1f҉ Ji&9T 8WW>④bq=.#@'?4VCT;4sƛ]~7ʓ2kgZI.X~t23"*ҘѨZid N8x uG>߆yң!2H"_ x\M?ܹϺ:ɞ} ^`< ᝬX8 t4dB4rvis]HR l#J h]1Ƅ釸ouB'~[|iKHdN'=E>C9+X!ňեh%1Xe#Iڞ0dɺ,=i?֪#|y o$lP!p1]o:ø Ȟ'{{-GrdARr`brJ5:~5$bֻDcֱ(OpD}-l"hi:˚tPolI)K\QL j$f* ^,Kz)6>X36áfº@(j?V7{$ AHBk F GڪpP9XoS(DZsw̹@g})X/ﲡ^ Bn7\{8R6769k63oc(u\on5~K[U^_ Bޫ)YV_ޣ%}KcOeY\ٔmvPgy 6Fg[,d1x(e2 E[*'o%N??d$5Sғ%JWcڇ'T 06m-ޒ"_;ToNMIT[5_S4d[\6>(+-TdX#\&5A}' :vF&u`КB Ȝ<Rl>X{h:a`щƑ 2 vh #x(^ڛ}dRaT~Y?iMEAeg"wOHu NT24i𩝨Y TYPEtf/~r }i qz'a83%rgm)''  qe܄NM|ޏ.g_zl #VI>F֚1_p174F6aX{":]C:/îq06)Q9jR:65ͳ Xu]ާGZm/Ի6_8Z]<``tnui0^,צ6vwtz$+_U})bÈ?&X$%|uz7J6˟H.uh#2nHV7*ka#×ÑU~j@T/4 IVY`m? SQ>qWaM??o~kr\PHt̗zgwQl|dv:؊L|&X\:L8]s2[׹*Ȍ#%˜Z3k]'TtW:+?45*\Es&8f<.]zO"5~EK-BIr:nq/$=MG+ˊ()\wƐ6&[\"K2 F߇ /y/gc܎eVҘ%chn 9#ȘLhD]n2x_ + CKD e^X Hdu^HJ21"6-Iy;0>Av.v^-;$K`5a_ Y!JLu%@-Z˘=ٿq=dܣp%.JY6VR*)޶߸3(/:L@f.`&$T؂&u TJ­[`;߄4כtC+RZY80xx)^Pi{=F MSE|-#Ԧ`"1š֛ZGL8`?Q'0Y:gzޤ6qn,a-vdR:Eє!+ F(PC2t6h ,]3dfx;*{+gs2JH(:(c/f`JYTS2ע脉͉P*eO3~R",7}&񗍁R8YپZnۈeL33_v!I{6(6?KShٯ]UF)29dbY.O_Q9x4f`(t݊Xb9@X+zUV-1iTLĠޅP"Z!"0olЅUgfHS`q_~[z[QbK1Q[~_/^[NnзD{o=m m,3eiuC6#jp+1Z>j($$Z%XlY+ƋIPm#(!jVpHTpKO4¤A7G Wj|^Jm#/|7L#:C\0 I xbNpvI͵G~5 Н,y|9||+^XzF5ߋNοưل|ұg]R2_PTC G_`?nRSgAP3KOqcT9YgMLۺzr] |{VpN^3J^V^R6iOAL<& 2&L$MY|G;M{:K d<ӛYиyޓSX`2D? aeYO f*vjkz$=6fbF3 {i'1 ,鉅Hغao$YOcLS*0$>8?{BɒJo'H]q ngD[r=gipia3i+f;A`ev#g7-b?#y2tg/ zRm3 @/>]nnK {GE*fz{X\+ %UBdgB' M/2}' P)eX 8`Y;N7ɓ}**:bùuN4$? $jO}[)h{&V6v`(/%&Sݞm*Osv)v}ъ^psއ*ɇ(44\w5džዤOC<Ìn=VpY& WWz1*e +R_ޜA>HA+Nww =껚;43@U*57;r0;i1; 8Ocӛ,e|9D',VR$[秆Zƍ}mjAI/B;7 KCƻ<#rn0P-9-$ܫLeJ1( )k^_QjO& =0`J&k옣yfxQxlWzcp`&Mǯxo?tL(3& $دwRV)3`zK53iUҿ-1a(-s!c!O R'򎾤BYD 2C+˴4FZ@Vo}BaXwE:X\cBzL0%'KlG)yGcqe|Ըhpy`,nƞE[~dyhRtI^{aӌ210}) A5-+]c,Ȝ[R%;kn^w_zV5LD;u(wH(%]*fL;Y;+纁}y"k9ɍrF PZppSbV/`>kiItV+"_Vi6~+y -eclr&yZ׳ӷj2HHM=&"z!Y\nj0iiX~ :`;bĔ56Kvᢸ)qLؠmY8K1VRHmUZX쎢~eEMS3顽($:7z6OU͙eDI5tnD^6 632O\] _ >5M{y?HEהDM.!,{erOFD49ڒu_Ѧu%˧f,O̐"wc߷ƒ#9k4k~fl9yG#RCj]cW)U~8O}Zs "}5`AkK*g #jm ]K<L`TuB.茎i(HJh%}Its+wi!ڀfobSaTRV7WCkT_OU ˼wÃy\q/E% LqUS:QJc ^Rz"? ȷ'ҩSг! =Os~B%O5t6 :ydJPVEf̛Ȑu{JUeA &ND?IEĨ܏5iβA2j3X(ecTo/zs=q'S;~$An6*P`A돯%'wnN Ľ6&H%"brR1S`0:;7 J0KrAotꂝ qry[x۲]=;?$z@:5'f%:]MHUYHw AޅIy25~¹}A=k2.ua7c΍ژl$3>:T;7v2mIBҐuTCxtRi򚃬;Ёz F%_M4]SoLt['v)^Y!/Q gpLJ x۠q=3J 10vɅlv-S~ H=dzpV辸ϖjvxAn;M_mD +,ƭ2>V^ ~6Q( /h`;}'<*xY{Bl`~IcHN?1Ա熔(4&g8*z ;.qHuP4a -Lt0ӳ7(Le2BF,Py|'u]_N= "e;D4- 88_CO i`b2)pIa7Bdɝ;bz IN+=ʽ/T+Pd"<4Dz76wwt Ҋc8Sk"BIx0ϚIYT^7\xjQN5UIgw8f4.,BzfؾLh_L给MeYnV2'Z%;;*[WSaɟCzԓ#`efsyK2LAOi9 5ftX<&Jd''#'@m5z}I51> PX|Tg8r6*uޜ 䏃0y*ꀋv\~#mQͪ Os zv NilYZc`}] :_gAFe|pT)g%/M),b&H)PYb8WBOliX^& {m~-sIգ,!` <%1!WgM"=%BCA*E4iem3^Wlzd.nub\|$XhQn7RV*s$!pAZav>hmn_bV %4jO0ґBd 2+GzPINdg_(F-VNݯuݕW> 1?ɿk1!p(wWҤiϟfC{";jVwC'((glj}#$ۻ~ f*Zl4=$f,v +`5R7WU[Vl)+F?nz>vӔM+K2OqW_Q>rx"vX5Ί gǻ`zÃ)x&PmR`㨈TŮX,~jlZ21S.)1ELc%DCuZFZ:? BB#8p}&0t)GA< =hr[nw]BZZsхsCTw"2l]ݗ~~Ќ=*b5v'suaT.rqωƦ%XDv'2HeȍxŁC8+ss\nkѕϡ .R" ި1(B ļTR'8$?|{ʮu~_|f3nQ c|W-[Hv^ Sw`}/<ӗq 8/<ۉi8aӣI O!WR3 ,?{ء*b9Tכyb?uܤ|jn3IO)5,/[?f%YH4}%BbiԷJڙ( {̛C) !Flg7~;h`9xc?7bKsIj'w|eu*L:'ƫ={< ^Nx Ɛtof-ӕ ]a#PF\Z H\DUTt |dR/ҽmr}$BͻKTfz1mJu #J7C_oUDx3LDj[~Xt<{ V`Wc2ϒX ^֐pPȅҚO?mcKPD%̣`&Y;1xO 5*iem_WGuDT8wh  ^?Uf=q>:pfŭ`{h,~9CeEtYL^4{خF=/h&?~rۼRaF;Cu.r \ry4q(= Qa^v(x Z@\g#8P^s.ѰK]OcGLGs~|P&Cj;E֒0k`n:Mww],ҋ!(lg.' ˽`%p|</Y< k 5ʨe[(_j\ VH{Pvm4)L |?Je-w_A S y_B6@X֪BY!$.u$mR?oxrsM- \ؗStLjt U^za +SmY endstream endobj 73 0 obj << /Length1 1691 /Length2 8409 /Length3 0 /Length 9485 /Filter /FlateDecode >> stream xڍTk6LH C7H*]03 ]ҍ HJ)RRRR HItKYkv}_^ϰ2l*0(W( P@!> PpGjC`PY(zAL @j 5O@@TR@L1yH@^;&@ qYanG2 1?`- !Ȍ >FWiGMۛ y8r!G~ j`Ї#A`RCHOWhkik8>;3B{ A@P߆ 8 A\@6H?JTu d!|pAYjsuCp)A<ȹuP7?݆! VU)GFD@8Pv}l'0u-Fs#B\8 @xxo+ "6`H1O<%av0?11_-TPyE"@@LDat@*ZS_k?8K $.-"@[3p#(WoE*..94A߿,D @\\Mr.Wt [?@`n C3#I ܞN ^3AQ DrIPD/G;4Aa ^ LEE"qH : A?HDF# dL?Ho7@R/a/( "kuDr"W/ DFvD/LD6DvD&׹zzx ?vyh܅`wnf+TzU-Oͻ1d-`wz2̮r0]g4rl:jǢuR<rނN0aT,=fQ.3s s.Ÿm-3=+肘1 ۶gȲbjAHaij8-xOuf۷}B'oԻD+"Q1LEZ#MAB4Eh% >~nnNz :v :QC!Fc<$L[ƆقQ7לXpHJ{MĆ$ 6~Op S - ~q+9kC o@*y`)=`u͏_z{×-j,4a׎,Eas퍷4ٶ%?+DŽ)vQEtQx hS}xH"HH4>-vnidr3MNyɶ/V:Ӂ16>{o[={A:gt dA]lի\d|^ޘ3qwpTװ^آ[¶$;F'1S9YqLp\S?.j0HhhbYӔY(j|wҞù9ʝ^IBY-!}pgiT%B>mh먶'%}S=/a5^6s۞dv持+!P디$OV)zNj%ue\B$]㹁:n@CO~x}YPa1Z4s$;o} < ^F2gIAohqsgM^XoB{bMS0s9m%hCRS2$TL ? dzȭAE8>X}`ͧ֐~H0"m-xIe7$IEǖr4fdſ%#!S/\{*9eէ-oMC?kkXO5D&RL3bn{/;)^$G< -S)ط`Gɬ. <SS}#~~ GsV|v>GKmҐ֗a<;SV.|ΝzCJ° 52&(w@)gU b%vJlx`fL1/(MdCdZ)o1&ZV(= Wt ":A W_u_QyکP'iij4Bwۇ:  6%M\PumNtɣL]Ula$̊[=*TuL ,B5ھű}OWg*nʡɅW,LP-f%DÂ8@1,efrGװ{!Մ+ą&LwE㵤e̿6H˩v,S?jd 4uL~6rvޓÿ^s}׷Q4z9_y3'Lb+&X E C7p&p,֏Sf#&A磥@Y !t 4|~FTo;iATia6 ft5q"կ,0(Bƙ ǘ1&(䨧)XH͙͞Sk ͤ+Iq  #8(`TWYJ唒IwmEِ*(!!ڳ#VW̹-m6vmF&<)c4V&UHJ<aYĘr/nǘP )8ρod.$RĥrCdbE#Ɨԩ| 7 wkepx&=0DBiyn/ex{!%G z_ǧ c<)wllnrHB[0AxK('G?,%6njD?4 _Nq)?ο֕ZQ9jLM}z*Eb\.-Q0q ڇw vן˭6Қnd (*(MY Id .i8Ԧ['<"Xu2J8e(Hp;4d׽);:t_NFɿZ/Ȝ:;d,[$_Yߜdk{)W ݧLne/$Q'c7'Pkn4#71$9%\>';,>Ի=iYRӹ*~TN*~7.S84[Օ .'M]ŮBmTNu-+b[5x-/Bbh u;zr˳`5"^!GUG~.=Ԇ:,=fZj}<׹qޕspvi1Gre4JޞuE`uHU?LN&fEq "7[t6+`8[20DRqT0,SȲYMjcLX8 >I<ň w{p6e RfX(ÉgY8ΛnMĨO~psWD~"Y\'ڙ^,H;ry>fy%B("J[hYRX-p﫠xj9Q7=+d3k=ݟMPӰ*:i)>rQjt㈹0ֿ$\/gTR5T9L۾,ܜ&sbNm}AA;`=ۊbNŔؤ1TR"w쨽|L0_au]Z򦅩B,&.ldUG@&Z&) Ch<͘R/W `;^_SO5ԤqMɳsfDNUF -ֿB*}~迦P_wh g!)QèiHX'/Dj药b//թqc/j82ev taF:Rbɷ; #cB,֘?NʪP)f&ͬلt/^= avc;h~8Mp gH$Gd-u2+UwctGV$9Y3κ.C4}/ ٭y00$:8hnaƔ.2MĚm"qG7n꫾+7;gWE:%kKf^BRgQ2lm,,]vo _1RÚK9StgY) cP⛏5DO뗊JxslW1pE!a+YzN8 7G8oEt07'ΐ-R\|klWKwrF`Jc`IF&h?e92N_BMofϹX3ػ},=ydw0f.)+'|="Α#CO^Dp%樋TbS9iX,5})6L$J6:+)&Ԏ7Td_:2<Ұ[;O2I4*}}}o8,CqjuuGuOzK!W,~5R4x0&- EO&æ PL/ I-w{Os"#kךvGeYՠ*1Nsi* 3=Xc۔hˤ uGw;HU|Ӌ$tjJ&>W[;}L;=A8;"XX?'hZ_Ɇ4[}[M0$VP M""<#ۮ?;A✶+Gshή7ɛ8+H-cpbW*s6Yy|o&tpřR>^kد"dP(gn6Bmx?~9BR/u]htG3,z2=gJM fHZwo1$?,8iNS% LWξ _w>}Ӕ)>qf"UxF 78tB'ωdy ن)~GJ|Vv)qy5K_j ,(豋R&o?ZZq_ܠ/ XsG!%ˎ& ] :C"%mla`r6{Fȅ2ۣ5)lƕY,ۃдO)]\fx#*+øAS5C&A9/*gЊV-6T=F؏BD ڸiWFw( !Z=w^ޢ3O͛o, "MYr;`0`WUQS1?6%>^!gּe>dC,/XWUȝD!#T,υ8 :v9w]mT]= pe!{;gREm8E,RJf̏yGS[[ f(Ad N/*%{gke$eSۅj#_bhNDI89 ٌ`tv|\{oJh]keň-M4#3 P۴>5dQb+UpY3}9SG}Z/QA2a(p;zڠ؏l[tĩk>@F r:# Gi6ݾs1RUˤlsW @\:i&ڢHYyMx#O([ZY-PXYyq9>Tۃi-5~՞ 3Pm;DjfnC0bu3:!$1sdD}РƖ>/5=HR8lMx~ 6S@qTlFC?"x. :~BeOBzy17Cx>F.讈(xa($h3s60ayn3kF`ߘxq4~1A̔TPO>vBaޑ!c$.Da/bk ,ZM:oM gWd&w++DQ}W- 3T=1;TRR2qc~@ )g73f0*4x͞wi_8 pz4ocLQ|95>IJ|y󞿋l"zCAǪt){jP)M,n2  c}B>kXH+[#̵\1d|u,V>b#r7K6xmn]y?|@-&0q}˜{HEa5=cL=yJ+GV7ZCR;yS/+Yu*qFzK[{|f)ޕ5b+KW7^_΢'̐R?8*^5'|C붢=(Boʋ%W x;.~eKkrF^6C}xT\oZSh`p endstream endobj 75 0 obj << /Length1 1370 /Length2 5964 /Length3 0 /Length 6904 /Filter /FlateDecode >> stream xڍtTS[׭("]JRz &!$$ AD( (# H RIxod֚=ag5CTPH,/$T20A A>Hn:؍a ){S`Z($PE$ PH Ov@->: s+߯@N(,..;( $P u9]uB((W N),-qrCaexnP< Ԇ8`: \ X7r8"0U+^uiu0_`Ϳ{( @@3u r_F5`>8yb10v(h G TrWA-@WA~yb ** (wW@^>. AO51=?.ki rsK0zw俪?y::s?'q]W"B]IP_Ղ!\;\AiD av,/74GrAy Ů}|p"J=n~L@X` ^+=χDaRW( ם * u`zm2 &FQPJu]9Pʬ-H1堉`YƮ'Nw<]>w˦wkmK ]'M0F;}h\NŽѳ@fՌz5sM0됔9.6\>~z3̒4cjf~=qPF{d{f]{[^FKH;I=Q+]q(*uYx|/y:͠& IKKMq{ޒTk)]Ķ<ᴰ# m17o},Wu16 c'T{p%!bkˇ؜ym]fƈ֎.C]h,C@BczAE!U!(z g2žeCk8Mhr-k.\IvxG|=O'A3d Fy"ݫy *r\ލPwG/mbIːS!%fF6׶<s`e'(9c)Gk8tҲR~6$.Ӗ6'"-p#ƶ]6j}oE96I`Σ7HZB83%+Gr kΗ_Uc_3lyEaOВ @vy ck/tD~1<(y}X%#Vv?szҬT6^SwY&sxᕓI4)! R{̚ܣCkk.-xboD TH'hw ~A}Z M>`L25QYq1(Mq7mRoxrxs;=C3X5+p*kffgGGLw9.٨{~lW+X+t̄@$4'B =4DNsX pkk;Ӓ]I;ܫtY6aLiy8/~~~Kr{ZD]Xx*Z#!3fHrMb!_]vL׀KW-5-DNUfF^@i"UFJz0#V'AfK&||x#=/~R S?^.fJW~|؁I̽sq"{vԍ4tX's8aC86&rK*]H`&>Aoy3^eA$!2hK}y== -.5`&!yc8R>u'y__,[$$RD;_sCfe2nXzm6A'Azx˖%%I$O?jz4oí? izX6ỹcy/x?u `WH`IcmyN [i#Z{jLX"sJ@ i~YhIƥ1ՒOE7< Le!V@]ɾ蘺_KC%ߋ-sҢ<"dV5̂Ɯ\ >b<=3K*| Clsm?/N a1E85S5y5!MP:XѳzkЌsE:Y\Gy؛bݖzZ\7զ{xu ᣇe79^w3]HRnq6ݎ>b[O૎*nPps7T3I|(iil&qO5:[2,i`(UX{G&E)?KGi$ 1{ fj7k=yWLbtxLnކT/>WrJ}o[I}zXGy`U6-(3"[=yYr/@>膳y]oීȹK"yQÒqݽ}[aU7o#duTT~ҧ,'GEnPm;WtpH>|csmе~x OK#*ód[sKٞjm{POwPϭIkJo?I+.MYy6#H^!]CՐuBd\wp|4q姇Uԡ΀rӽS7ܩ`c29[0&`TF@>@nXip\TydgBuu_2$G=3ᲀ z>ҨX(02mڑz<`2hRb+Qgu3B; GR*&M"~HcU7B}N'DGeGE9܁+{P*h:^ Jzv/Qfa5_l|)t_9}(W:Rſy&zkťyS3e֌WhB)H{ުeS9vMY҉Z[ÉJ0iu]tTe=UUD#/j>Y%xG-lZz*89Pܻ'eҼ )tr` (aI*}VeĖ6 vOA{C̷,!H⍲]SV5-Y謔򷋏2yюK:=rBΟy:[.டD3CxF+sH<uDLEzڎ_gyҟ(oRxQN 96vpF~$^,{^8vcY"d=uaգ'yW},ɢwjB3bz\qm}W[w&7\D(N^*Ry3t}ͿX~n]@Mӗ M$Yp-c+$%'qNw*vLgb1Fb0KmiSzj%A.yO傇vl8%SZdc%pbyMkyW~d"LhzO!al|J}u6VD\L>Jʆ'DVqB #"i7jn~hl 'tQÅxTvpX79COm8N;RDm"B"O5і1OD^ЧI;sM>Q }OVSn1ox(O{˪_VRz@X<CDz(E2i%+2; bߊE=I ikV"H|y+"R+pY:MhvJI67b[LʃkTazo5QQ N eأ߾)dI. 03Q xǗDE>5R]'mNHt]: 1S[܂T-ʍV^B7QL㻖m3 ,ߦOz~H,i>WK]9{3>x,;hJafC5;Ɛ#'s8^v*p8Rۃ_ld?dBG A 8lqf&Pmo?Wu9; 9zO؜,W$Vj.z9T. Ul *ȅ\hGcO u/چwſ>!EBJ\J嘳a@>[Z_]|^ySsKqW|<9Pw>] gœ[MFGDj}/} (#4yi#F[ri\AU~oʃkpeyj'#[%<ݯ!fg> bsVuKfx9v_W`Y;޴ `+|n;|{m癮hݗ<Ϙsk? 7.vGrbyWJ79keۦ"ntvuYko!Fs(e-_S!zv%{t疍dU;n-Z(+{*K q,oxb0zG)MBYӠΓ0Cɍ~iTѢm:M+ȡgDs- ߃bRN{|!\Lqeo{ }N6439SW-8i?\B)or͢9cオy璃Vm%)|e\IN;ܶ㍇cFotZ[4PAݵb򽎋\KG!an cw*2L힢nJUHh~ 3>Z endstream endobj 77 0 obj << /Length1 1727 /Length2 11869 /Length3 0 /Length 12966 /Filter /FlateDecode >> stream xڍPҀ n58 0@`p wN_ޢjx^{PiHX:eVvA<;F GBtv9e! 4ʤ ʎ`= +'dg @ d Pf(8.(4N kk|-|$ 30@ btxhaft!^qdcswwg5spaute` 6 h eXQhZ6 Vw3g U`]^]\@gkv@ X/f߇`olfadV { @UFa-04wq|7s3ٛY@FB`X8 .. ?zd#1K-%` Iځ!+6,]ش@ym^E(Ȭ;;;?'?a#O%|Vm}AV(.fn@o dA`V;<_Ǐ^'lW̦a'wU{fp888|G w[9*S3@07 !;qߔuoE22ћ9=x\W(;Z]e%j!f ~hnVv R kjko 0P tKfa*o^i 0sv6DyWxsn%a!.}V(\,/Mџ `Sl*M`S8_iC6k>5,65տk/|___z D.~ ׆\e _p,\_?J@e~B(ض:Rԝe{<IwKmW=PR+7bNsj'n 琳Nx'%B`|g GEɕsG liMpҨR|}Ya2D߬h0#D\h0mUILNSſٿK[(a;)^~;Kh:&zl Ho2)V(]~k\|+R^úQ 7Vr-t Y"ÜnXC9|>Q+ߋTWDgYfwp5k&RxKbdJ>`n,Ǹ`Ds]ۛM HDY{QHv^k7* %f@<2!YP6~[+[O8Ccn,=3(]aUfhnJTNfI,ED9ֻuqob1ɫt0y qS&`Kp0g"Ǫߠ$SOobQK}G?D[0fdU,\A:TCE}V%'"Sg^=Vjy;okt/N_nu+yfnUvwae"D+Vsɟ$-h ſ>fOQ)R,gS UjNճazdIp GHuD=σˑmHY{('x`SvP;1l {ެ"L+ٔ[ Ų]e}SjBs7qtDP1QLc&hGɾ/#Sp#@(2[I%#As(c5Av1aSja}1s@!f5լĕlku1Efl4Q#:Dh;K}ՃqmYuI7 OOl@*T4'UL%x(y&95G0JL3-g'd( _nCx1B Z,rR hTzfkrL-)dc5E?|2V<;C]ûfŕ\$tJ"1X.++Xw) q &s!*N=~)pߣ8(O!m@N t:t| G'yԁT :jY {t0ե1 A ro&`?YwMMyAo<^fgЭ@8' QKĬd|hn'OZCCzo6U(횾P5ٰMΚWɾae f~leb'@jڹ"}֬ګ|Ցx+g#~Ԝҗjg6=Psm^F2Xs^5{;J#>Tҕvs:]}VtWvs븞8Be &0{R")\VZvv9!Hџnv;@{e<~F!CØ`d:Ļ/ͦaРe?o[ċGpViPZE}%IaEcy@ .{KQJőC#l:Ӗ5H]Lv`+tiN䯜% qm̺{ؤ&yvE3ۑ&ab-Bùz'K /I*zٜUKGC,s~|eFXW ig߷K(eXM{GnSvNڢ'sh~5>яwj ⡛q(ֺB>ȕ>߼d1!!D,Zf@ъ %ɻJ9ɐZNp(Š wd^(muھ!ڶ #c=?ƎE+a+]JKT3F^LVMݢH> &F2 9(U@~ZH/!KaIS4F3RZ8aNF;R?)pLxr!mBsбB(Aɢ?Zw098'pT3A zJY3XE隸ᢆ{l€[~5ܩh; Ѱ+nF6IgVw=@BiKkƾ4TƖ_j*ʜ߲[v%+Nr2'֋Ƽu uzu :)%R ,oS".m+ݻ(4ւ Pd[P0D]}I<5?>;wT%#Gv307?e]`R>ӈ{$H]^-~P[# RX܄Wr)ʺNrF$ܦyvc[i0zݢ@aB0q$קүž ۏZ O.}[e YiL֔v15ʴ9>|(cg |;"TX>flL\ ߍܐ/?+t3NfEj8EV".'Úk+2qBfI\_܉Rߨ @-|||lʴhE%E=Q*uBbb#,8=ȸg]  zե)qGAu?RB{qk7f70D[BpoeVFVr&OB*rqloȧ_ 'Ńŝi 07hh>cDeX -)BU|v?_59۾n) y-v&ZJU#|3\LmgX?U +4 y@~$I t/H5s+qE:`y9}oY@*57^1Ŷd~. N_;˸Q?<Ru6UWˠ/*D?qNBFzD kF;2) /yYi*Dbch/盚 j!V} *CJ뗛MOJgy7 :ՠa9[m$U+ƈ1SH#vFNq;KQl6/y|5 mrCU&y79yjR 6L0ܮS,H+^@Ya?>B;s92JU>)2?ZX$7Ϥ[*癳% v`G̞Vk)DdKPe7$zlos93)9`bHV ͗a`uI0To>gE*q@9(GL~NT?:UʔP\/&z{bgcܢ%kuqJ7|޹+(QoR_y?Ytl57x5=ۏ^= X1y+`l4ρFFȲ說uU *VMkcB%Q%[2/&NH45GI a%Z"l.?{z;)"!b5jV2שKfGIsR?Ȇ=HmizNu8"Wv Y3.ݝN Bg, OV ަ-uZVн2?eAG|kyRH(sIـp~2jL07NKS[>,f8Cm`St6.( (*#OZњ2U4|+?9$n_a3GWN'&G^#ooBH6sw&RX]PfU1!$m*;NM XolEqR(Pr\q諻 K]~΍Jr/d'ꃇUOʾį[얭 P! !Uq.>)z+ +p޸3fr8EM4"R۠7ŋ7J-1K125,*s*v'Ғ/Mf8J!n@շopSb`2Ӽ%~`38kX?&KҜCB :-Cl.2':T`N@S3+ZbQ\@ vҬyd/GcA:%fɝ~_U"{=- ), )` Mu^oVBr8o \}`n:Oݖ"byplL};ũ.0=7^B->jV͍.gY%<npU('ge pa<:N|/^?]NQGItQJGRK q<'`{_I7-~4GmpU%~$8*YvGoõ~V6=8SeMT^Wg"\G;|HV# V FT xl՞IN34L+?{Nms0uSρ _ty|t­H0U&ׅ٘RB%%dOT3u>N ܙwʆT^O߰r[oCf(25x˩#p6M[I?wQTb엚:b&kVl;c ]7ṭ?so.%GۛPOvPjh%sHrߔ$}T!l(m^i;UB - FWLNB=.c=w/[x3Rzn*M+>&h3O 8f~`jIѿ NsCP+BRF&kݠcQe"ڲ(yV@&&$)ΌKBW6|iЪM3.ւ| ʞnWAVL(԰$ _%)cݵץyJpP10,zA˜+2yŨ8e-/)~Ypы܉ןǾ7ON pV7GM9թҁ 6`&A7[3ߟ[ 㓲2TO5 pS|JR Usʨeuh | P+۫2/~CdǒXC-l.?ὌcC?~w})5z݉]gzOE%B.k30Cf)8rmL?5Pknd|RQc^ ϰLe s=KZg2C'x135x039G#)C`\]^4VA$f?'? ߱}ir'MEQ+gU<ыϡrdf8EO"&HL}PGR=%=KfUN_ sNҼc1!_GkR:mdFaNV])򓉏[Y^'vBW$pD:RIdwU[\8aw718h4vi-(j*=)E7zgֿK}WL % tT=!VWC2p8DeHhhzJwq=мrc)JʬtQ;^1zlq^م8u d&QF+M S/5_؝4İzcKM Yc7YӲo哾t>RO }fsf,vLcQ,rNdqMw RljY:R&X[O!m{>^wie2zV87804ԪlŃ뗝#L'vz=fJ6ejÏN hEv:%5 e"e1-2phfnT@Zqx&Vr !.~W\nڽ!nפ)0Qm 3ɺ#Ygp#MXUHN$ů#tk&?SRՙGO8S_@0Y@=O ҇H{F?4``+EmzUL:iK;%5{C/80x` q!C|h͘}4ghţDGeҖZ` tž>"o&ĒKY.JteÚ`3]/K3`ʋށܺSTsx^E;¼sߠa5>2(tMMAf/H;d뚱G.a!sjm~9_>J2bS ۦ }r .Y<@?;&V/[`2pҫLI`Ƥ0غccy4(zO_ܖvRN]"JK l)xMg%VY99?#ߤq?C7mث:a6`=]r($o8 ,ڐ%ʆ6< V}٨#+O!f6XP+9{ #r֛WДVyYuqk̼ѮH&CgSoG\dTyV}3E$!G];OȰK###H跦5֯:xS`M1t;Y9ȦkqFv~8$C K^0ߞ3*D9zg۟&0e5Uw@&ƪ͘`@ٷI/gZ.EˆJA_:7>'JDXo(% $x9C(".b_C+7Z<=R[cso T D~J􂆣K ~TaB p:E')BM(/(F19&{-+6^s|oTͦ˿/ 4d-"i-cd."aҜ,[MH44$Q( _/ߎFU{e'P#rl&\Cz44CT|U_ĻWŖL|ުmWQv|n V^9PMbXgb+&c)OSaA ֶ0 G`54}+g_nzDbQ\f"NN2mB8F IŮ+I@ഁw۵8ݽ&p,|&7&|?` QBTTT*LӬGL axڻ+g1' h^j OzkQT- ~6;|,_ncrPLSGX'bd/BM.5Ĥ&Y⛉*qQ0r^q"NșY6k5X@|ހOv[?yr08s] hG~F}Q[Q#J٨Gwg02}>N55+lyUŠ˻#$L(W#$O-Ua<\Eh%QOIH8fuEZ׷k>V1cT`LJ6H橈~!k3 NwխG΂󥛊NGMcTbl E]m R TnIn5%f"N?6"z{Tn[|Of vM m_KFΞnKRcV;.*o[1Ġ|-EU|Բ% AOІdY<5Mj> stream xڌP\k w 6KCp ݃ !Ýs^yf~DAA(nk UQaa03123SP;Y#P:8Bh8 emmRV6 '/ /33?Qsc,#@O!bk`njW ow5 kd1XḿNɎՕڑ3 = t: 34Fx ʶ&N@H`enq8 v @h/c #@6;ZظۘḼyqF'7'z_V s+Cߩą ]#_52f1c[kk#_;@}wgZغxU3PR6 ) nFfLV%igk067{:N@o*͍@Ss?Abɿ0̠c0o: 3rc3)HJi*m V66Q ?<%mLl:E)|,Y\i;3 #hH)$:(wK F"\殲m1.D|ca`}K ԭiǰ66jk7, wWEF ygĉ݌۞r[ؙ32U?j8|5bؐN.wCiy_? ?7/_l@{d2'J`)7L2 ICvy&UxRtINX,B@c;JLY۟ڣ= K:|RGf݇+#C)Ef2(vHyZ6:z n-^ǣrG׭E= 6кY7aiV`~>YEbt(3d,>9bl7d|(RZ%= iiuJ6IԐwj-q06y.m-V\G;K[KhUY^9/z-60yFw>{}b Υĭ?8ZVb%[ rÕ/%9\vmQF_f/O\5zӿfj;MN '/0I o4z]" 7/'(!e\DxNGkg.;SaNv#[`_\+GS,4a ibnT̓?8Bō|?Xnaĺ F<fKdw7VFF2-Y=H=,|.61=}7$',Av-}SSfPq_LoװNwy݋ D5ǝbBvif /S%Ŗ#{Nle??Lqe+X - { 0&gUAVpNQYνRO(JW-:c#j"ju=z7>x *Isؚ~P(߄- )Q}KH3/[47J?MR'PX="HSHo Q}FT= Wf`UB)p1Erp+ho}}l`utIBnHf;] 鴡+v hct"Xh'a=ykKA~ l$n.L7Jd͗0<}T5U1]2'RMҰՎb'hx }oԸ}"'c5f`=mKFjnGb=U{ۢIP'{ }t!!~CBDC/֝^.~^fpɖxY%QtO{bNzЃ[zRM"% #p![|¥zbt;S[ Fb(26__[X* C΅.B=TNZ1l+߭UfF!(ؠ'D)h薼o(t+ }V3,s'PT c)GUHR]Nazn,r+g32Xob.#w]&X "煗H:)WbT#-b/Nq8Ѽ586 );IfԔ&>l+5$BaYR!~8?RHR(}ʛFeR{1N1Qp/'⥀ 7(I]r N3sh݅j,_3G9'9V:Ů6+y;{Wm `Dfez/{Ə8dlsV459}IPYyX$Mop?c768 y~2/#?yR=eq>sӼ0DSIe#Oېӌ̿'2)#?[Tyd9+x[OQŰN m'M-\- /]4;zb 6X*%ZK]!q.c{# *Wq:Pђ.l[I~N2[Ca[pTކkegy G`*>· l@Np/޵V?jx4wOeZ0T?n"+aL徆xVCU/`\X8ΪTԓȜ󗞋 v^X쀟fLin-NgTYLE0K15%aBgRϕFa,Uwc6CCӭFcT;imᗾ'V^)sԫū\9WUHvahwWgK?O}jǴ}7eNd `YW aMST(e*ol\Ѐ Y–ٷ B=:D\Vydύ_9q #%) JƴVr81C!%mz}Elz= 6|/ojMn/$Z0-~^GT}B"tn)2(3oP~-Hr%VϿN]j*[[Sp'>ӵ$:@ИH)`E1 t%jN%u#?3Ht=OM4`#焆Gln8[H˫ \dP{<|x0Mz\K(C|KNvQpVCwL"C4y>>l%אrw*$9Ϣ 5 H?8zFy* Ѹ~:r،2gsTBgPɰ3"9~?d}3NX 1+Ϲ̄JK<1{QQWL um) N~+A>*-qT3,@c ~ε%@-<m>ĸa{K4]7/MY`Y'a땬w[1D$~Pc4f r&DK)3ZR"ӯ2zDeG!aIȹY+%A].1RW EؔΗ'К4 `%{~(>YTvk n 'BvWD&/"y!1c\YbS0.? C! /uY44Iosg`,!dRz#a4prUqu`Y %M4CȆr}#=SYK-^55&9>8ݟg_BQsRwL-t ĸ1 S B`PǷYY!Fਙ{wD 7t 8w4n7CDc߅)iK Wn_bp# 1^M wvޗ gv$JxJgHL~mw3%bSj'@(z*AW~W c>b:UwbLP-#JE6N%>|d߭B7l Mhk~`y 㦹gޏZ6Ä=l{ݑQ*EBV'dž캷ղ'l*m)^#l&rul""|.؃5B<:fC gp=1dGZ(el }q WմG7 ɝJcY뽇f^*=gܯ6n`z߄?!]&NXuYǭ'4 pON%w?9vQO힟cm;ڭ/Bp|:-PF#e?cC9f]oUI \af2hq%E*btZOMR0{v}Dr''Pv}DM- fH|0*iAx6A?/@S=!iZL_ТQ[_no3ػY,6lFpRnQ=m`BX qRIN5j ׹C5q#˹MtKugXCy5Fjiو u?I Xh@+5 gs Z)PTjhͲY| J5ҏSB*Hjps4 2T8_uMH2wΏJ.# O *|ubR9.oڣNܺvpg?|-W@֖:9$-.iX摨I:ᆡm74e ߢ^LyHoTz&Jz 9'3z7bQuG]/9nIS;9dZmLsw\U;#R :LEbJ"ltZʱ$q`5+9J)Dx7ѱ]Xl<@5 %ߨ78"{L&>m3+XEyȎb:g<&")zpoj\YTH"KZoOfZjV5w9 cJR^=,IGD[+Q}ʎ<V86^ /uVŵFmbaT)u|NZR bGj!⿌XumL\*?R&`mr?D8j|DLR@3Z/ 8쐐!ȋ͂Qq;NӦZ|;p<>7$z4cJc='5lS*8MDcJOI0(H#1,JGK_\Jɱl0<-oP`%ٜ2z^j5QAdhk HxiQmDpT8I[)JTԯt| :t4'wXSrnR~FbZnS?@sU(d տ*2n].8jVۧS)TaVD˗504z1Y Yےt3=""K zҴE;̞!LZR aWVt$;O|Q⭓ODhdzLn̙մ@ݔH3҃ܙ% N]VM@pӆq|| [̉oT=p.R\6]>| 6buLI&-\<_d_y߅lf\uqy;"la<,;+5 >.6 Ÿ Ou&|jq/ӓ,L_F;N2శl'ۈ2R+70" 2 k7=[Cʍ OQ!u>li JX!!BI )J\V9N] wYBhtb*sSL,OI18昢G%{+ߴ CSEg9It3X=f0 -[B!y d^I!|N㨖cS67݆N`;aO*zYj>bvRֱ7YeOOU㼡SX2@|`wtm.:DK4vՉhIo]~9|8E|MY,u1O=)'=.ɱKNjznwzˢ* c;jQžX< qXfT8{6t$$v@ YqqdEB3^5@Зɜc#;;H3)/iYiguU`6SPoq9(+@>|(϶iɪQyw93N: Nܣd x}T^H_CW4_;Qډʴh͉Č8A 6v$_?A`sOjdx$+v(P!йm:bY[yGomb"[nZgA|R>*$w^lI&_CQϔb2Ȇi0 Ȗ`)Nh +ۮR9$'g:GBE#i3"Z NhCTIƢlyv4(ց绛 G`G*58ն#~DY܈&S9V묎rX±B>wv\="@rǿʣ\- ̵0{p" ShYͅ"UΉޔ5tL,Ph #O^Bs&c,li+tDS++%Biz6mB7V !U5Y4dx.ޛSRXkY<~{GgaN26rHLdsFo 56 gۊQD/U]ߘӮL_mJE7tjZgA^> álэtZȕ: ƾ?چǮ2}d$ˆ q'~GU)+W̘|4Q*Gњ'j1&lQ췃{!#]i:_oLBnq$ ?Î46Pϰh7h7**GB L^sp-oU~ۺGE2A%ݷk].Q|-tN4tc6!|Me4bwNZTݪnfK [|U:9وQn;C wƸ@Fg0tJ椐0G~#`|zBEC.A̎ޓKdZURTX<|.Ky}TyD 荈:i8!DpXN΢E_,-dYAW"ޤR,Q֫Aӌg䂵Yx#'M$e9q|ic'MM;tm]iT>jOF~ʘ.x;Xͩo.BE4qAb ݻ`Qې2npf2M0y.k@Ab+#;.ϙ /Y,IXX] O?n $MWotL8<~ ̵a.i`ff:ib`{6s캠rmKsX5%ؾy-MvW$]6}-ϡVW-q7l~B +lkXQA8o_<ë3E~^ii jKj9 kwPT6.Pn#,9ꝋS%}Kd7ˈ#p}~Pjj _ } -&=[\(ȕ'RJqx(-Lj>.zoTэoteC Ch)8fhʼn#pW!h%qEQhS|^"v8W%5G̹mu2RMu}%!IyB6%(kRB]ic#湂9s;'=&ty4UGkי/{zS@ZH@֡M 4HqM}ת"knATVFw68W dUe]8SSR*ɛ JkZh_d~#t[WlXѲ_d^g?RKle%9QU&YZ{)|d9]lC 6|6*BVתGhM8kݫk *nv ע%rrC̸fbcu: wϟ.4c 6V]8ϑ6rQ# yl\Ws)Hy+`$ՏѿIY ;3t~H/$prrpX3؎Ҁk4HH:MS`f\Bz}$n"b$Xd\;f{3$g{tbEMK҃X:#wfPoDCg'5#Nԏt:"hG\z߭E'\  򣂕k^- JG"gX:Yܰ>/uUܭ׉|wT8ѤgI6{ȣ+D m8E5*xvf@60&3unG+>O$GӞ)#CSPxR40kʼ]c̬z!W~#Ñ"v *;=/F^++.+Bn r׶5FI<=Te>,hnQf!wȭNƄRrb#؍F0ӴP$j&z2ֻV$p"tg ܘDkdc2FHFwX;)|Lܿ4 _3?L{Ӑ-}TjLҚ6aJRp+q`9"9ְO " 7L`eAˤ0D#K`M1¨7t omşWJTR(`w*[AV ~5/J`<qg!!cf GJQ-AJcTXg@GsMw.Щezp?}tZ.?MA8wo)T7T46խg%0z^66خX ~HYt{.0^Χvʘ1SZkӃk}b^ݝ# HN%ڬ]mI pjKvf(`?U6#&3ւX@$%F,խl+Gf>'F7ƅP )v\hN1fym)X-j!,C.ŵ/1<3xಧW[wi=;(+DiRz"x K/O~XE).yO*ra^N;|S޻$Ë-?,"̸;eٮPʋ~=ₚQnHGw"qx~L.<A)y{#9/TEg@PmU |S^Vv5DIIlt<1'GLdOiMW}= LF V8a;{\EDcxx&5%RLG}iXνrMxd1ԕ3DԜ]|`ڱ8Cp(\C?bOAFPs9+d.h< ueD`q-/c]XE.D@V F є,cVqV˧qyHaё)N|&B^}" r$͞~1ׄ;Eߞkko?6SNJ'$l88ժ}٪ŖX[ۭ7 QT}VXÍcn\h v:Iά݌|DP~Ì7U {dY=+Ўe O /X?ۯt=wa {}frVg)SPt#&ܬ -_v1 `ǀΦW KC0%}_+!+@IK}0רТT..?\V@uE*ӻ]h$?}JZ!r 1HnueK7K섳շA̝=שpm^&<7o{Uπa_+oin<%YHy["V`+g\Gke/N:(~W 5 P !4{ ˷! WA>[7ѠF4ޛ bj:]䩓c"Q{ ao˃{@sS^WMe& Xw.?S+S:˂ a-?Dd6 ,Ɣ7*_rML|mu[Ě d۔EŠTQg~-6uaqdK:O3`#$He;o\F*V/@TΜ`eu1$C̍p2[QE;R[e"*3tSqCQ ~a݊]ű2RCLpO7-VMX -yyμ}/ŗ(8<u_I:YHXb I)S_C`,,Q*E`"nqS}EzR:E޲ `m`DW JMp[QZS#ߡ6aHL#OZo2l?}JXvU(ŚvQOl0$g\ßPeFJz< awٙl<+_| 8q_$ "|+|G8ד|k})?O~ALia[QD: 4/}Z{wCd4|P7r֩ŁNjT%&lv^ <͔ OG#-ڐF'\2Nt w2#йVDë3JC6w7CuJ;Bm.'Nӫ S5]#v]H8sB);&\Xx-GW0&B" f]n{>JuJ-v$c^l[Qopf&P9Q8pBO4 9aO ^rVs;Pz\;hh&AI ^_O1D %96iWdx@l!2.| MR#ere |/_{/BMK$_E]^sUpE$x3; UVyܾ^r4)w{ьD5JrVx[aIʷk6vTЬOȖ1QUGhEɰ+[D$,c[~g5hqK!`饜tɗb cUƯ6i q mlKWZ1ZG¿9WfF,ID=#w.KꬼE`Xc&M_7@<KR5xAc.- %ĕVv#yG rK.5;꒾䤎҉pNpe qt׍X0?ӏ :/pr!ݠj74:pc=jm]Bہ?ܐnF^܉B;"a7Uz;)q "qT j3-o(;zI!ȼ`_ULzVc쿅*уk [(c9)-/Bġ%\n1FÌk*iex" |._?ՏS`Cz5.}PcYwOHj };@4>+@ X@uu'[^&!I)wAZB TBC 5&C#/7Xb`|F}+n37ž:d+/N@a';k)S&RΗjȜ]}U1P->!psP$^ή9aFM`S&pO1 pPf05w72 1{/׋v &3K_-m͏=?P_j=IP CaƂ5DCCԪ') 4Xy)\ֽt8Cn|Iv'6YP^la7C`!6o`V86wڌ/la{Tf3^_sצFQ |lhMl4ba^9sm2l%W ^Вak{(,M;_7+";zRuc66#@3Lv晤ͦ3>TfN3Ԁ"]75R?FQP}k\Cԁaֵ+a{i[d#se"Pa'6WzKZO{*uzy_B;[##ݙA^%<6N1BAdIip wHKLu]7vMI]-K uFM7#W1B^6Yr9 mV [.Mkf۟Iy"󧰖F:P1uXFͷ$9ypKXuA&M ǻi@rw@Sk5K#Zec8DUwF#XO{^KO/n⊵Eʪ} KgxOTܐrMN |%*lZw" >?Q='m` HD>NQ1_Xm릝dV**w[ #[ɸU%aaf(-߈=VgoGYXD*LjIA`hȸϡbOFqEt[U?9Y6ǕQ4hx\Q N=; A^~.[>`Ml8.x"8(=beB'=+kӋtV|C7b3- ="p ʾRfզ[1dt&|8H 3V#CO;hgNv}ԝcg[7Wc>:[aQcEdπ\1(RH`1"g&V4RXw~VĶr5$gxOy5I`W+'.#k+7̘d5Ki9 Jazۨco*ǥRԝ3S}ՌXk(RU=Q* b}s.1̜5˱}jk#A3p?!'Pe˙UX{ۙVIPA'AsDxbqwRmsrc8Zvtq}1F: x?V=vo3F _ vZ\&#-ޑxH6 UrhQI;/28yLClcvEĈf e$mbS'(Q3#").͵qCs)SՄ4X}ڢ }QSP endstream endobj 81 0 obj << /Length1 721 /Length2 4672 /Length3 0 /Length 5264 /Filter /FlateDecode >> stream xmrg4ju :ѣ D%.E13 3ѣN"D'щ5DF^7]Zz>쳟˥A!0HDT`n `P<V2`pb 2^ `@D!c ȹ*➋`+\7"=`tBTʹ @F`N6NH@ CqA- p'0h8oM8?Ю,Z-A t4x5â>_//u'!p$ A!dM m<?wt-w p f?wrCQ t1p 0YP_z9 $N醀#VB- ]O?ڏcN;z?<50 ⯽bP? \""X7Oa#i|žc4׻9$ #d |r o Y {igKX /(lok} (V{"B-XOΞuZjuӘ'OM{$ަ,}'OίmE3;1|KyzI!TB3`eda0$3;6/3?=KqrytnEGu2rHtn%MbԈpsڧ BJ ;`e`FX(8WD"Q/]*\ұaRƨoV@~CM…bԙe3'3'>]}TJT!{QyŦr؞{ } 2%.Evpz#J, Jc9u}-*;\pf4ѫ&wϯ,3o;!@ LGl** 7$WWpYQ5Ϛ5# o9-ͰEq?sHf =R=]q'b."_{88  8ixxs=e26R>-MԜy$l$Hr*ReK\w:(_``M:ǦBԲmhR@NP >ѝU%' 13atLjgt4O ")<u@VoYA38IG 4_?)o~[u.ᅬpLw$,ttQ[ \6Qb})Ŏ72K@w>T8~5,N乁c-Tlv#$I2<-fJLZ摳lru^Pd<=.m1MMf+km(=[3/71,(m}!\.·ڔe=D{ωM^ E2 !w/3+H6= M4A'Z,Dƞi*s\F. ONޜՍ 6 ۹,W!#%Xfo߷90 )!Us*@>i}ޟ|Gv-z C-d9Du1N,tA po%ǞMݩvIeʾ&Ĵ6flVk;;v^-YlM.#&l^D3 KYOhlu9ZM:IQtf\jwwŶLaG|-;+qm@٧ N4 8$ZTcg3-KVn*?CmY;S^cyס8'"R\R.E(/^,j&Ny[뙧}x0Q;>vdJKo7f>!ʏs5hr\TesnX͈S)lY,W%!%?b:I9;D>b60*/꘤p&8y\/+5D 8ǒܚsϩRXKIHdݢxN m& V}ih6{͎Q z|yń'<3reh;Xy3E ="A`.jbZ_+2f%vI^ف7Ҥz3q|Po_-g畈 eWGߚ&PJ/$/32pDqDwu&:`O#4) =lp7X\~\m+r-]hQ"eG>xTh "#Ud5i\*!' xAE@}oU4gnş5Y,tl:/IZo8io'"v){gdXߟ;ٺE+u7{</&Uiѝ*v|0l (kN1S#k>w?{Y9Ay|'?8*Yf dW(jP ]~:e!=0iټ౱]PEf-|ѝ6%~R)'ryhz`v,z5bphѵ1[$1ʪ{Jb~Կ s;_<9|9t*ʝX|Jy~>M۩^L(ݡ ֣KHڪzԴDjt³ޘy&m=t9+r[lS3΄QDgy+3f^x_hiޠdd357hm Oڻ;=F!}7;\+9n"jqK5T灁?"(l ,A]Dn,,fhaP)Feɻ3o52i@{;H8dg%lo VUÜ{#gZ#K 2f}{UZIݴzEW1M;7I^_w󱛍^1cŐ=!m endstream endobj 83 0 obj << /Length1 737 /Length2 966 /Length3 0 /Length 1535 /Filter /FlateDecode >> stream xmR PSWHD0´*"IJ(T6! ;T0$&O*". es%JŢl"D@& u!әΛyswz2#Iqb0]Aز0>d&ښK@!☷@k!XdLK"Pvy#  T *T,*KP2U! Ġ2A|y  +E2T Q1p ;@c:I! ARb\|eA|usCr#T6+r̒ ,ј5%Ä*&JPqX7_,l=(FC!Q2YP)IHO a$L&ĨNFKrTE2|I!% P2:00X&B$%)IB T)?)} #8&K+ȟV&lI}W=wW&5^\4Wf-KQmˮΩVe[+3w$J)=d@j)܀7?˦:lǚzIzs%e6%nRuFܻǖWE~Qq}ď9eh8št{$ꎿ}'\i~Vx]p(;Cx148nH -5U4pm1t:i%Ė.+w5nnm>m:]d_&5;R@Wt_lj^il~ >i[|V .Fْ\I0M;үȆns_l`WhA`,mk[ෙ V,<^uZ>8\秪^_r;9J"'|S/v6AJv{A/rlqq2MXFϓK6>CwyMB+597f]fiÆZޫ{V4jA-8w+Ny׃ߍo`z=J)%CF+M|_/+LgOU>&MXD㦯$eE3;qM3ٚQq>\Y?|dd>18_b} endstream endobj 87 0 obj << /Producer (pdfTeX-1.40.18) /Creator (TeX) /CreationDate (D:20171118182013+01'00') /ModDate (D:20171118182013+01'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) kpathsea version 6.2.3) >> endobj 15 0 obj << /Type /ObjStm /N 63 /First 496 /Length 2991 /Filter /FlateDecode >> stream xZ[S~9~JmU aT&&뷻5ь9UA4Rw}Sk2 &YLa34S:ɔaN{41X)Ϣ=-v@CyLjoL 3$-eW˞>ׁcǿzsU2J)WZSFlP ]vdm9M+zQ!RP e-f"/PcF cj63Mg2<K 8" xڃ(k3ŒĦ4 %6e$XןZg4JH( c0^RrA`FR:IsN^ҷIp\Uv{ XiYf6U5:Б<|4H<꧶ Q p3z@DlBVKJ8=--ܜFÓQ<7=~\nW)^gC~GI!wvvՈLDb'u|;{|#|O7??.1|¯5w'6tWɫsKSYNԉu":U--YKz&ab}!;1]HhmcDGYXN4/k_s߂Ւ~ {2M^^ȓ/.O ^猟ah ztӏ<—lzS4Ϳoדy0Lf0I3+bZ|"Njx{{lՑ^I%4´MA& pJM959-Uj ~vëC;lVE~rʣTC%>t. sp R&8JXT+{EeQTV1Ccd6c|N(Fe?Mg~0h)\SO[b6]Ϫ=:*Sd2-m_8^sxtds->{V; bpӱ^ҁr#Pm(AS  +P Խާ;(yޅ[ʗ,"rk`_)پ_%zw2)X#~;G*xNŴ gt2 Dtg U;Z5D++rZfk|xe ZXmmZ][_Gq +e߶>(@qpnjh( >1oB qC dr + rYǜJ}YtuNCY Oe=fCNgsaafP $$֣bhxMN3WfVgtlqWmgCՙ-ަۆ7*fuޭ][l(JnPZ jVλuAhKuyy/-KަMo-VWo`3o|-K|]xہXk=μ]$5q}<|n_88Br:paQ,h uљ-=f5֜Bw{x hKTFKT4ڪNtt@vOZ;PbL{ik .s endstream endobj 88 0 obj << /Type /XRef /Index [0 89] /Size 89 /W [1 3 1] /Root 86 0 R /Info 87 0 R /ID [<35A39421108B7B8341F735602A2AB638> <35A39421108B7B8341F735602A2AB638>] /Length 241 /Filter /FlateDecode >> stream x̹2Qs~XB[BOIeҨ,`;MxFZc9;_Y PbElM*6DETŎX[M"ܩZD3xVp':D;Q*Sdg"uq 8>ŮXODbH Q1&QK$qSwz k5E|5MV\E5CV]ۥkr325#v-2\%ùkI# endstream endobj startxref 177144 %%EOF tm/inst/doc/extensions.Rnw0000644000175100001440000002727113177024075015340 0ustar hornikusers\documentclass[a4paper]{article} \usepackage[margin=2cm]{geometry} \usepackage[round]{natbib} \usepackage{url} \newcommand{\acronym}[1]{\textsc{#1}} \newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\proglang}[1]{\textsf{#1}} \let\code\texttt %% \VignetteIndexEntry{Extensions} \begin{document} <>= library("tm") library("xml2") @ \title{Extensions\\How to Handle Custom File Formats} \author{Ingo Feinerer} \maketitle \section*{Introduction} The possibility to handle custom file formats is a substantial feature in any modern text mining infrastructure. \pkg{tm} has been designed aware of this aspect from the beginning on, and has modular components which allow for extensions. A general explanation of \pkg{tm}'s extension mechanism is described by~\citet[Sec.~3.3]{Feinerer_etal_2008}, with an updated description as follows. \section*{Sources} A source abstracts input locations and provides uniform methods for access. Each source must provide implementations for following interface functions: \begin{description} \item[close()] closes the source and returns it, \item[eoi()] returns \code{TRUE} if the end of input of the source is reached, \item[getElem()] fetches the element at the current position, \item[length()] gives the number of elements, \item[open()] opens the source and returns it, \item[reader()] returns a default reader for processing elements, \item[pGetElem()] (optional) retrieves all elements in parallel at once, and \item[stepNext()] increases the position in the source to the next element. \end{description} Retrieved elements must be encapsulated in a list with the named components \code{content} holding the document and \code{uri} pointing to the origin of the document (e.g., a file path or a \acronym{URL}; \code{NULL} if not applicable or unavailable). Custom sources are required to inherit from the virtual base class \code{Source} and typically do so by extending the functionality provided by the simple reference implementation \code{SimpleSource}. E.g., a simple source which accepts an \proglang{R} vector as input could be defined as <>= VecSource <- function(x) SimpleSource(length = length(x), content = as.character(x), class = "VecSource") @ which overrides a few defaults (see \code{?SimpleSource} for defaults) and stores the vector in the \code{content} component. The functions \code{close()}, \code{eoi()}, \code{open()}, and \code{stepNext()} have reasonable default methods already for the \code{SimpleSource} class: the identity function for \code{open()} and \code{close()}, incrementing a position counter for \code{stepNext()}, and comparing the current position with the number of available elements as claimed by \code{length()} for \code{eoi()}, respectively. So we only need custom methods for element access: <>= getElem.VecSource <- function(x) list(content = x$content[x$position], uri = NULL) pGetElem.VecSource <- function(x) lapply(x$content, function(y) list(content = y, uri = NULL)) @ \section*{Readers} Readers are functions for extracting textual content and metadata out of elements delivered by a source and for constructing a text document. Each reader must accept following arguments in its signature: \begin{description} \item[elem] a list with the named components \code{content} and \code{uri} (as delivered by a source via \code{getElem()} or \code{pGetElem()}), \item[language] a string giving the language, and \item[id] a character giving a unique identifier for the created text document. \end{description} The element \code{elem} is typically provided by a source whereas the language and the identifier are normally provided by a corpus constructor (for the case that \code{elem\$content} does not give information on these two essential items). In case a reader expects configuration arguments we can use a function generator. A function generator is indicated by inheriting from class \code{FunctionGenerator} and \code{function}. It allows us to process additional arguments, store them in an environment, return a reader function with the well-defined signature described above, and still be able to access the additional arguments via lexical scoping. All corpus constructors in package \pkg{tm} check the reader function for being a function generator and if so apply it to yield the reader with the expected signature. E.g., the reader function \code{readPlain()} is defined as <>= readPlain <- function(elem, language, id) PlainTextDocument(elem$content, id = id, language = language) @ For examples on readers using the function generator please have a look at \code{?readPDF} or \code{?readPDF}. However, for many cases, it is not necessary to define each detailed aspect of how to extend \pkg{tm}. Typical examples are \acronym{XML} files which are very common but can be rather easily handled via standard conforming \acronym{XML} parsers. The aim of the remainder in this document is to give an overview on how simpler, more user-friendly, forms of extension mechanisms can be applied in \pkg{tm}. \section*{Custom Data Formats} A general situation is that you have gathered together some information into a tabular data structure (like a data frame or a list matrix) that suffices to describe documents in a corpus. However, you do not have a distinct file format because you extracted the information out of various resources, e.g., as delivered by \code{readtext()} in package \pkg{readtext}. Now you want to use your information to build a corpus which is recognized by \pkg{tm}. We assume that your information is put together in a data frame. E.g., consider the following example: <>= df <- data.frame(doc_id = c("doc 1" , "doc 2" , "doc 3" ), text = c("content 1", "content 2", "content 3"), title = c("title 1" , "title 2" , "title 3" ), authors = c("author 1" , "author 2" , "author 3" ), topics = c("topic 1" , "topic 2" , "topic 3" ), stringsAsFactors = FALSE) @ We want to map the data frame rows to the relevant entries of a text document. An entry \code{text} in the mapping will be matched to fill the actual content of the text document, \code{doc\_id} will be used as document ID, all other fields will be used as metadata tags. So we can construct a corpus out of the data frame: <<>>= (corpus <- Corpus(DataframeSource(df))) corpus[[1]] meta(corpus[[1]]) @ \section*{Custom XML Sources} Many modern file formats already come in \acronym{XML} format which allows to extract information with any \acronym{XML} conforming parser, e.g., as implemented in \proglang{R} by the \pkg{xml2} package. Now assume we have some custom \acronym{XML} format which we want to access with \pkg{tm}. Then a viable way is to create a custom \acronym{XML} source which can be configured with only a few commands. E.g., have a look at the following example: <>= custom.xml <- system.file("texts", "custom.xml", package = "tm") print(readLines(custom.xml), quote = FALSE) @ As you see there is a top-level tag stating that there is a corpus, and several document tags below. In fact, this structure is very common in \acronym{XML} files found in text mining applications (e.g., both the Reuters-21578 and the Reuters Corpus Volume 1 data sets follow this general scheme). In \pkg{tm} we expect a source to deliver self-contained blocks of information to a reader function, each block containing all information necessary such that the reader can construct a (subclass of a) \code{TextDocument} from it. The \code{XMLSource()} function can now be used to construct a custom \acronym{XML} source. It has three arguments: \begin{description} \item[x] a character giving a uniform resource identifier, \item[parser] a function accepting an \acronym{XML} document (as delivered by \code{read\_xml()} in package \pkg{xml2}) as input and returning a \acronym{XML} elements/nodes (each element/node will then be delivered to the reader as a self-contained block), \item[reader] a reader function capable of turning \acronym{XML} elements/nodes as returned by the parser into a subclass of \code{TextDocument}. \end{description} E.g., a custom source which can cope with our custom \acronym{XML} format could be: <>= mySource <- function(x) XMLSource(x, parser = xml2::xml_children, reader = myXMLReader) @ As you notice in this example we also provide a custom reader function (\code{myXMLReader}). See the next section for details. \section*{Custom XML Readers} As we saw in the previous section we often need a custom reader function to extract information out of \acronym{XML} chunks (typically as delivered by some source). Fortunately, \pkg{tm} provides an easy way to define custom \acronym{XML} reader functions. All you need to do is to provide a so-called \emph{specification}. Let us start with an example which defines a reader function for the file format from the previous section: <>= myXMLReader <- readXML( spec = list(author = list("node", "writer"), content = list("node", "description"), datetimestamp = list("function", function(x) as.POSIXlt(Sys.time(), tz = "GMT")), description = list("node", "@short"), heading = list("node", "caption"), id = list("function", function(x) tempfile()), origin = list("unevaluated", "My private bibliography"), type = list("node", "type")), doc = PlainTextDocument()) @ Formally, \code{readXML()} is the relevant function which constructs an reader. The customization is done via the first argument \code{spec}, the second provides an empty instance of the document which should be returned (augmented with the extracted information out of the \acronym{XML} chunks). The specification must consist of a named list of lists each containing two character vectors. The constructed reader will map each list entry to the content or a metadatum of the text document as specified by the named list entry. Valid names include \code{content} to access the document's content, and character strings which are mapped to metadata entries. Each list entry must consist of two character vectors: the first describes the type of the second argument, and the second is the specification entry. Valid combinations are: \begin{description} \item[\code{type = "node", spec = "XPathExpression"}] the XPath (1.0) expression \code{spec} extracts information out of an \acronym{XML} node (as seen for \code{author}, \code{content}, \code{description}, \code{heading}, and \code{type} in our example specification). \item[\code{type = "function", spec = function(doc) \ldots}] The function \code{spec} is called, passing over the \acronym{XML} document (as delivered by \code{read\_xml()} from package \pkg{xml2}) as first argument (as seen for \code{datetimestamp} and \code{id}). As you notice in our example nobody forces us to actually use the passed over document, instead we can do anything we want (e.g., create a unique character vector via \code{tempfile()} to have a unique identification string). \item[\code{type = "unevaluated", spec = "String"}] the character vector \code{spec} is returned without modification (e.g., \code{origin} in our specification). \end{description} Now that we have all we need to cope with our custom file format, we can apply the source and reader function at any place in \pkg{tm} where a source or reader is expected, respectively. E.g., <<>>= corpus <- VCorpus(mySource(custom.xml)) @ constructs a corpus out of the information in our \acronym{XML} file: <<>>= corpus[[1]] meta(corpus[[1]]) @ \bibliographystyle{abbrvnat} \bibliography{references} \end{document} tm/inst/doc/extensions.R0000644000175100001440000000653113204065714014763 0ustar hornikusers### R code from vignette source 'extensions.Rnw' ################################################### ### code chunk number 1: Init ################################################### library("tm") library("xml2") ################################################### ### code chunk number 2: extensions.Rnw:55-58 ################################################### VecSource <- function(x) SimpleSource(length = length(x), content = as.character(x), class = "VecSource") ################################################### ### code chunk number 3: extensions.Rnw:68-72 ################################################### getElem.VecSource <- function(x) list(content = x$content[x$position], uri = NULL) pGetElem.VecSource <- function(x) lapply(x$content, function(y) list(content = y, uri = NULL)) ################################################### ### code chunk number 4: extensions.Rnw:100-102 ################################################### readPlain <- function(elem, language, id) PlainTextDocument(elem$content, id = id, language = language) ################################################### ### code chunk number 5: extensions.Rnw:124-130 ################################################### df <- data.frame(doc_id = c("doc 1" , "doc 2" , "doc 3" ), text = c("content 1", "content 2", "content 3"), title = c("title 1" , "title 2" , "title 3" ), authors = c("author 1" , "author 2" , "author 3" ), topics = c("topic 1" , "topic 2" , "topic 3" ), stringsAsFactors = FALSE) ################################################### ### code chunk number 6: extensions.Rnw:138-141 ################################################### (corpus <- Corpus(DataframeSource(df))) corpus[[1]] meta(corpus[[1]]) ################################################### ### code chunk number 7: CustomXMLFile ################################################### custom.xml <- system.file("texts", "custom.xml", package = "tm") print(readLines(custom.xml), quote = FALSE) ################################################### ### code chunk number 8: mySource ################################################### mySource <- function(x) XMLSource(x, parser = xml2::xml_children, reader = myXMLReader) ################################################### ### code chunk number 9: myXMLReader ################################################### myXMLReader <- readXML( spec = list(author = list("node", "writer"), content = list("node", "description"), datetimestamp = list("function", function(x) as.POSIXlt(Sys.time(), tz = "GMT")), description = list("node", "@short"), heading = list("node", "caption"), id = list("function", function(x) tempfile()), origin = list("unevaluated", "My private bibliography"), type = list("node", "type")), doc = PlainTextDocument()) ################################################### ### code chunk number 10: extensions.Rnw:244-245 ################################################### corpus <- VCorpus(mySource(custom.xml)) ################################################### ### code chunk number 11: extensions.Rnw:249-251 ################################################### corpus[[1]] meta(corpus[[1]]) tm/inst/doc/tm.Rnw0000644000175100001440000003350513155253051013550 0ustar hornikusers\documentclass[a4paper]{article} \usepackage[margin=2cm]{geometry} \usepackage[utf8]{inputenc} \usepackage[round]{natbib} \usepackage{url} \newcommand{\acronym}[1]{\textsc{#1}} \newcommand{\class}[1]{\mbox{\textsf{#1}}} \newcommand{\code}[1]{\mbox{\texttt{#1}}} \newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\proglang}[1]{\textsf{#1}} %% \VignetteIndexEntry{Introduction to the tm Package} \begin{document} <>= library("tm") data("crude") @ \title{Introduction to the \pkg{tm} Package\\Text Mining in \proglang{R}} \author{Ingo Feinerer} \maketitle \section*{Introduction} This vignette gives a short introduction to text mining in \proglang{R} utilizing the text mining framework provided by the \pkg{tm} package. We present methods for data import, corpus handling, preprocessing, metadata management, and creation of term-document matrices. Our focus is on the main aspects of getting started with text mining in \proglang{R}---an in-depth description of the text mining infrastructure offered by \pkg{tm} was published in the \emph{Journal of Statistical Software}~\citep{Feinerer_etal_2008}. An introductory article on text mining in \proglang{R} was published in \emph{R News}~\citep{Rnews:Feinerer:2008}. \section*{Data Import} The main structure for managing documents in \pkg{tm} is a so-called \class{Corpus}, representing a collection of text documents. A corpus is an abstract concept, and there can exist several implementations in parallel. The default implementation is the so-called \class{VCorpus} (short for \emph{Volatile Corpus}) which realizes a semantics as known from most \proglang{R} objects: corpora are \proglang{R} objects held fully in memory. We denote this as volatile since once the \proglang{R} object is destroyed, the whole corpus is gone. Such a volatile corpus can be created via the constructor \code{VCorpus(x, readerControl)}. Another implementation is the \class{PCorpus} which implements a \emph{Permanent Corpus} semantics, i.e., the documents are physically stored outside of \proglang{R} (e.g., in a database), corresponding \proglang{R} objects are basically only pointers to external structures, and changes to the underlying corpus are reflected to all \proglang{R} objects associated with it. Compared to the volatile corpus the corpus encapsulated by a permanent corpus object is not destroyed if the corresponding \proglang{R} object is released. Within the corpus constructor, \code{x} must be a \class{Source} object which abstracts the input location. \pkg{tm} provides a set of predefined sources, e.g., \class{DirSource}, \class{VectorSource}, or \class{DataframeSource}, which handle a directory, a vector interpreting each component as document, or data frame like structures (like \acronym{CSV} files), respectively. Except \class{DirSource}, which is designed solely for directories on a file system, and \class{VectorSource}, which only accepts (character) vectors, most other implemented sources can take connections as input (a character string is interpreted as file path). \code{getSources()} lists available sources, and users can create their own sources. The second argument \code{readerControl} of the corpus constructor has to be a list with the named components \code{reader} and \code{language}. The first component \code{reader} constructs a text document from elements delivered by a source. The \pkg{tm} package ships with several readers (e.g., \code{readPlain()}, \code{readPDF()}, \code{readDOC()}, \ldots). See \code{getReaders()} for an up-to-date list of available readers. Each source has a default reader which can be overridden. E.g., for \code{DirSource} the default just reads in the input files and interprets their content as text. Finally, the second component \code{language} sets the texts' language (preferably using \acronym{ISO} 639-2 codes). In case of a permanent corpus, a third argument \code{dbControl} has to be a list with the named components \code{dbName} giving the filename holding the sourced out objects (i.e., the database), and \code{dbType} holding a valid database type as supported by package \pkg{filehash}. Activated database support reduces the memory demand, however, access gets slower since each operation is limited by the hard disk's read and write capabilities. So e.g., plain text files in the directory \code{txt} containing Latin (\code{lat}) texts by the Roman poet \emph{Ovid} can be read in with following code: <>= txt <- system.file("texts", "txt", package = "tm") (ovid <- VCorpus(DirSource(txt, encoding = "UTF-8"), readerControl = list(language = "lat"))) @ For simple examples \code{VectorSource} is quite useful, as it can create a corpus from character vectors, e.g.: <>= docs <- c("This is a text.", "This another one.") VCorpus(VectorSource(docs)) @ Finally we create a corpus for some Reuters documents as example for later use: <>= reut21578 <- system.file("texts", "crude", package = "tm") reuters <- VCorpus(DirSource(reut21578, mode = "binary"), readerControl = list(reader = readReut21578XMLasPlain)) @ \section*{Data Export} For the case you have created a corpus via manipulating other objects in \proglang{R}, thus do not have the texts already stored on a hard disk, and want to save the text documents to disk, you can simply use \code{writeCorpus()} <>= writeCorpus(ovid) @ which writes a character representation of the documents in a corpus to multiple files on disk. \section*{Inspecting Corpora} Custom \code{print()} methods are available which hide the raw amount of information (consider a corpus could consist of several thousand documents, like a database). \code{print()} gives a concise overview whereas more details are displayed with \code{inspect()}. <<>>= inspect(ovid[1:2]) @ Individual documents can be accessed via \code{[[}, either via the position in the corpus, or via their identifier. <>= meta(ovid[[2]], "id") identical(ovid[[2]], ovid[["ovid_2.txt"]]) @ A character representation of a document is available via \code{as.character()} which is also used when inspecting a document: <>= inspect(ovid[[2]]) lapply(ovid[1:2], as.character) @ \section*{Transformations} Once we have a corpus we typically want to modify the documents in it, e.g., stemming, stopword removal, et cetera. In \pkg{tm}, all this functionality is subsumed into the concept of a \emph{transformation}. Transformations are done via the \code{tm\_map()} function which applies (maps) a function to all elements of the corpus. Basically, all transformations work on single text documents and \code{tm\_map()} just applies them to all documents in a corpus. \subsection*{Eliminating Extra Whitespace} Extra whitespace is eliminated by: <<>>= reuters <- tm_map(reuters, stripWhitespace) @ \subsection*{Convert to Lower Case} Conversion to lower case by: <<>>= reuters <- tm_map(reuters, content_transformer(tolower)) @ We can use arbitrary character processing functions as transformations as long as the function returns a text document. In this case we use \code{content\_transformer()} which provides a convenience wrapper to access and set the content of a document. Consequently most text manipulation functions from base \proglang{R} can directly be used with this wrapper. This works for \code{tolower()} as used here but also e.g.\ for \code{gsub()} which comes quite handy for a broad range of text manipulation tasks. \subsection*{Remove Stopwords} Removal of stopwords by: <>= reuters <- tm_map(reuters, removeWords, stopwords("english")) @ \subsection*{Stemming} Stemming is done by: <>= tm_map(reuters, stemDocument) @ \section*{Filters} Often it is of special interest to filter out documents satisfying given properties. For this purpose the function \code{tm\_filter} is designed. It is possible to write custom filter functions which get applied to each document in the corpus. Alternatively, we can create indices based on selections and subset the corpus with them. E.g., the following statement filters out those documents having an \code{ID} equal to \code{"237"} and the string \code{"INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE"} as their heading. <<>>= idx <- meta(reuters, "id") == '237' & meta(reuters, "heading") == 'INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE' reuters[idx] @ \section*{Metadata Management} Metadata is used to annotate text documents or whole corpora with additional information. The easiest way to accomplish this with \pkg{tm} is to use the \code{meta()} function. A text document has a few predefined attributes like \code{author} but can be extended with an arbitrary number of additional user-defined metadata tags. These additional metadata tags are individually attached to a single text document. From a corpus perspective these metadata attachments are locally stored together with each individual text document. Alternatively to \code{meta()} the function \code{DublinCore()} provides a full mapping between Simple Dublin Core metadata and \pkg{tm} metadata structures and can be similarly used to get and set metadata information for text documents, e.g.: <>= DublinCore(crude[[1]], "Creator") <- "Ano Nymous" meta(crude[[1]]) @ For corpora the story is a bit more sophisticated. Corpora in \pkg{tm} have two types of metadata: one is the metadata on the corpus level (\code{corpus}), the other is the metadata related to the individual documents (\code{indexed}) in form of a data frame. The latter is often done for performance reasons (hence the named \code{indexed} for indexing) or because the metadata has an own entity but still relates directly to individual text documents, e.g., a classification result; the classifications directly relate to the documents but the set of classification levels forms an own entity. Both cases can be handled with \code{meta()}: <<>>= meta(crude, tag = "test", type = "corpus") <- "test meta" meta(crude, type = "corpus") meta(crude, "foo") <- letters[1:20] meta(crude) @ \section*{Standard Operators and Functions} Many standard operators and functions (\code{[}, \code{[<-}, \code{[[}, \code{[[<-}, \code{c()}, \code{lapply()}) are available for corpora with semantics similar to standard \proglang{R} routines. E.g., \code{c()} concatenates two (or more) corpora. Applied to several text documents it returns a corpus. The metadata is automatically updated, if corpora are concatenated (i.e., merged). \section*{Creating Term-Document Matrices} A common approach in text mining is to create a term-document matrix from a corpus. In the \pkg{tm} package the classes \class{TermDocumentMatrix} and \class{DocumentTermMatrix} (depending on whether you want terms as rows and documents as columns, or vice versa) employ sparse matrices for corpora. Inspecting a term-document matrix displays a sample, whereas \code{as.matrix()} yields the full matrix in dense format (which can be very memory consuming for large matrices). <<>>= dtm <- DocumentTermMatrix(reuters) inspect(dtm) @ \section*{Operations on Term-Document Matrices} Besides the fact that on this matrix a huge amount of \proglang{R} functions (like clustering, classifications, etc.) can be applied, this package brings some shortcuts. Imagine we want to find those terms that occur at least five times, then we can use the \code{findFreqTerms()} function: <<>>= findFreqTerms(dtm, 5) @ Or we want to find associations (i.e., terms which correlate) with at least $0.8$ correlation for the term \code{opec}, then we use \code{findAssocs()}: <<>>= findAssocs(dtm, "opec", 0.8) @ Term-document matrices tend to get very big already for normal sized data sets. Therefore we provide a method to remove \emph{sparse} terms, i.e., terms occurring only in very few documents. Normally, this reduces the matrix dramatically without losing significant relations inherent to the matrix: <<>>= inspect(removeSparseTerms(dtm, 0.4)) @ This function call removes those terms which have at least a 40 percentage of sparse (i.e., terms occurring 0 times in a document) elements. \section*{Dictionary} A dictionary is a (multi-)set of strings. It is often used to denote relevant terms in text mining. We represent a dictionary with a character vector which may be passed to the \code{DocumentTermMatrix()} constructor as a control argument. Then the created matrix is tabulated against the dictionary, i.e., only terms from the dictionary appear in the matrix. This allows to restrict the dimension of the matrix a priori and to focus on specific terms for distinct text mining contexts, e.g., <<>>= inspect(DocumentTermMatrix(reuters, list(dictionary = c("prices", "crude", "oil")))) @ \section*{Performance} Often you do not need all the generality, modularity and full range of features offered by \pkg{tm} as this sometimes comes at the price of performance. \class{SimpleCorpus} provides a corpus which is optimized for the most common usage scenario: importing plain texts from files in a directory or directly from a vector in \proglang{R}, preprocessing and transforming the texts, and finally exporting them to a term-document matrix. The aim is to boost performance and minimize memory pressure. It loads all documents into memory, and is designed for medium-sized to large data sets. However, it operates only under the following contraints: \begin{itemize} \item only \code{DirSource} and \code{VectorSource} are supported, \item no custom readers, i.e., each document is read in and stored as plain text (as a string, i.e., a character vector of length one), \item transformations applied via \code{tm\_map} must be able to process strings and return strings, \item no lazy transformations in \code{tm\_map}, \item no meta data for individual documents (i.e., no \code{"local"} in \code{meta()}). \end{itemize} \bibliographystyle{abbrvnat} \bibliography{references} \end{document} tm/inst/doc/tm.R0000644000175100001440000001040413204065715013177 0ustar hornikusers### R code from vignette source 'tm.Rnw' ### Encoding: UTF-8 ################################################### ### code chunk number 1: Init ################################################### library("tm") data("crude") ################################################### ### code chunk number 2: Ovid ################################################### txt <- system.file("texts", "txt", package = "tm") (ovid <- VCorpus(DirSource(txt, encoding = "UTF-8"), readerControl = list(language = "lat"))) ################################################### ### code chunk number 3: VectorSource ################################################### docs <- c("This is a text.", "This another one.") VCorpus(VectorSource(docs)) ################################################### ### code chunk number 4: Reuters ################################################### reut21578 <- system.file("texts", "crude", package = "tm") reuters <- VCorpus(DirSource(reut21578, mode = "binary"), readerControl = list(reader = readReut21578XMLasPlain)) ################################################### ### code chunk number 5: tm.Rnw:117-118 (eval = FALSE) ################################################### ## writeCorpus(ovid) ################################################### ### code chunk number 6: tm.Rnw:128-129 ################################################### inspect(ovid[1:2]) ################################################### ### code chunk number 7: tm.Rnw:133-135 ################################################### meta(ovid[[2]], "id") identical(ovid[[2]], ovid[["ovid_2.txt"]]) ################################################### ### code chunk number 8: tm.Rnw:139-141 ################################################### inspect(ovid[[2]]) lapply(ovid[1:2], as.character) ################################################### ### code chunk number 9: tm.Rnw:155-156 ################################################### reuters <- tm_map(reuters, stripWhitespace) ################################################### ### code chunk number 10: tm.Rnw:161-162 ################################################### reuters <- tm_map(reuters, content_transformer(tolower)) ################################################### ### code chunk number 11: Stopwords ################################################### reuters <- tm_map(reuters, removeWords, stopwords("english")) ################################################### ### code chunk number 12: Stemming ################################################### tm_map(reuters, stemDocument) ################################################### ### code chunk number 13: tm.Rnw:193-196 ################################################### idx <- meta(reuters, "id") == '237' & meta(reuters, "heading") == 'INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE' reuters[idx] ################################################### ### code chunk number 14: DublinCore ################################################### DublinCore(crude[[1]], "Creator") <- "Ano Nymous" meta(crude[[1]]) ################################################### ### code chunk number 15: tm.Rnw:227-231 ################################################### meta(crude, tag = "test", type = "corpus") <- "test meta" meta(crude, type = "corpus") meta(crude, "foo") <- letters[1:20] meta(crude) ################################################### ### code chunk number 16: tm.Rnw:250-252 ################################################### dtm <- DocumentTermMatrix(reuters) inspect(dtm) ################################################### ### code chunk number 17: tm.Rnw:261-262 ################################################### findFreqTerms(dtm, 5) ################################################### ### code chunk number 18: tm.Rnw:267-268 ################################################### findAssocs(dtm, "opec", 0.8) ################################################### ### code chunk number 19: tm.Rnw:276-277 ################################################### inspect(removeSparseTerms(dtm, 0.4)) ################################################### ### code chunk number 20: tm.Rnw:291-293 ################################################### inspect(DocumentTermMatrix(reuters, list(dictionary = c("prices", "crude", "oil")))) tm/inst/doc/extensions.pdf0000644000175100001440000045217413204065714015343 0ustar hornikusers%PDF-1.5 % 3 0 obj << /Length 2801 /Filter /FlateDecode >> stream xZKs6ϯPmjZ !}nʓVj3N.IM۪D(3~H{9 "~}צ\dYn ]ʖ4F:[-~Nn~ʒc|=wSb3Ug/et4i{ΒaM ುOr>"-|(^_u Ѷ_E;FkR[T8@w?FT#м̙PJޣ.VWKgdܿku%vYelA.Z+PVEFŀZpq C%w";S-jU@.)ٹJηbN$X? v%jnTCHGG ҞгD;iav- FoV8](Mt -›}üvhx{ Bï;SȜo|׸ݡ0+ܳ#γdl#7Y%QMHvw(d?N{^B>)ߘu{g@CkFV$dKj'QV.C~MBs}~+G*ώN,y"[ G#4c9#3`5 Z"M)L+_k\ ;~6tzVh¹tZeuuA˥oW'2ۢq>y0i109ÕKffƊqa8G&+%ͱJ46MޱL(6@]GJJ{i-AA߆irs`4P3 q jBnjtӡ&Ƌ-o%ONijGTCPZ"a%>zϕ 1DvkOXTsN0lӒUܠ[rȌ:kw#5ʶB,QI&!mF5yX*@P:d։hS@v;g7Y_Ze\eRrK3?'\:HS9L`?"/eHUMm{O^[ z s3Bi1=(tw>Z+f; U1Xix9iPEfc=%n(Z0P$|Ywtr 0)O0C8i.;2>&IYOICJ݃ijs ޣ(3Nb8d q=& "7|vsI3:aV)JJQ!R#/Ǭ C 6Ǡfrj%vt@;8h,u2pU* e}Qhy '3Cm^$&UX'! E'o|KLUa)ݵ5C7ʮ[O'KEյVjʪiu:.&.yS e@ 8\ ̽%gLIl=8MaxZZu A?A,D(Zr<[c2 ~s6f0PEhM򋁳Lu ԥ]ʝU1خ@'ZJ"s먣GigBAE7;kҰ)֟DFɃ~U9S wX@Fm<-9~ @!_',+{NoN0([eg E9~#lUӋ:!|^$ڵ+"Bm \16C[Y;N).v~8"o GU:Ù;zh!TTj:Ui]NRӃsVF#RnXf|slz r<P lqVڞƅ!+<_VetP޸DwMz² 5n)m8 VUa}Tt4ED\]С}\KȢ&!s2 K=jUZI^\Qt@Nlp~K.4,EUj\2&I:F |H=|*o'Fg N߱'"_Q=d{%2UdlsZ8ݴ9jkxFXq 7y[MC4q_䮶`nR'Z /uiF8N;8xoO#{6͖ۤ&+Uem3_Zqpa!>9x-t"9m>cS~vW><$my7Y:oLwSޗд!g?,|q 矃~CܚUތY#%ӕ2mר*7oݴ\k@+mU-)&!БΌr+X@4ZJRWRgϲWR\[OHǹVa<@4}0G/gjR@os8X. [^ܾ?0  endstream endobj 18 0 obj << /Length 3485 /Filter /FlateDecode >> stream xڭZm۸_(:z)${MqWZz(-ײr(Ji?Z"p8P}M^4J\_.<ʫ(H'm}oV*9 ZEe\ 3B=O|f =,OugGoc,=}y!ZE1'sO8۵~/i%'iOL.{vb7p:ษi,ȋӈ8<Ѣj'P2RAIbu,+Otb xzVsLrJ]4}D%k2blxR}̲E_ c+6hnvv4NߐYM P7ړ=.#ɬƴ3ANkEÒkGX_#ܟ޾Xٍ$$^ QU8&p4+7[xpQaɞ X!3%d'>G|"Y u$%q3qh:R]+f`̖QS *Rӈ=0TeڷQselXU4ЁP'TZ[x&gvZ̰3VxhٓY5.]M4T6b}]A8oV 󾑐F4=|!"3qT^y΂r+ -ajYA%QB!o a=+aa>RԜV{O׊cX(kR+8h+ӊ)m͍n}hۀ(YeHԿaFռ(*·ɘg(AzسUuד.A?{kJ{xN5^ **oa;BYEn x$A/5#qL7V )GNӴu8UPm'"đi_ʩ*`HjADC!;؈kha{`E'< 1;`+sc*;"!̻` mЌBH9䤭| ,vް9,k\CvH<0̵w7$\ܹiQihv8,;=ۋv52^-'`#eʽK ?_g9}:JHiPU}0:r+g@bZnQH4@Gع`w3Vn㪩t645:^ҕՁ SUu1*SdFefqb=Ɋ 2JRd~ T=(ɳڕ3*Ūx$-{"kIE-ڢ{,}8F AiZcj-qM@m-pBL.D@EqNƦK +dНF,Kr;tyA?\ }9v顰<qz8Eo'w3εlӍ@R9X0"<h 7dk1FC52lR)Kvrڙ/%?&]&T lBʑ= q[d<-, ݺCIdZ*nReͶ^oV&kEsoY ٔv%} q_Y0Ggz_ t"Xd[Rd+a7ZsjXrE sJ>J_"\e4}Sf0{6u\eRPgQ%Ti2v6T! 3} |֞_<"A`bJ vFiyZ:񨟳SƝM,9‚G~smŪbAxjE/6gߦQ;a5.(W?ڿ+g hnޫnsՃsqf;,|[AAG~D4ˆLLo\1Co JZQN5iL* Z~$A̢Mq9d[.XeP{j=@l\5RxuAC>#q4*bW"C)#pJ5 XWeY'v}6_]*ED;h 2zm*+I{+JP@^ /vpO$ ss!A*>ZSmC gjTjy /q SA_7w.(㦆Eݰ|Ouvu[?U;/+]!VMBBpa&Ou5Kſg[n[4ZݎxC>jtRFitТ^}}_h endstream endobj 21 0 obj << /Length 3243 /Filter /FlateDecode >> stream xڵZɎ)>If8p=  h,(yy[-\-/9uU_Qf}JUY5W|O |dY>{:_:-QE{)Kj#NcyU|P**ӔOZR 7N*G(вOV)Oc=uϐe_ 79ޒBi -;WrZ[P2O>mC,UG:FUW+.JY_R%p L0 I{=f@Q'v" Q5qe[ ,gE4CZyQ;6.1"1:ɢ0 B\3Iޖ-Zvc*IȵCg5GpFS5[48Mv⹡PME©2ʍ·. XkV~UݖX&ʼsyͪ[`uTe+Us;؉2QT~U8S0ҵ;klބ%t]hevΒ;17oDh> Dy0zQ.{fU wcz;#/np +IH>`ь5qe?R[vR"W(LTeйFמ$,q\I!W ?yRF.x:H_FaЀ[/lHp 5 O|i)&^#{I'+K/kVRI]znC39+(HSm"ڴ3OAJ|[w*gG`Zn+]Smd Xr탴_E͏{ʙg~CP;='Ay:f>z)7Dc迲u" y#wy+[z9}ԢRUϱ?Q+WPS†-nlJh\Kb+^Q]ŶgM9 9[mhy̖%y'0yT"y g#*xNHmꌨ-Kabz0=7" z~L4oXR? ـ;'+Nc<\ kHp?o# b{xR6]2>: =ۮ{6+J`m|rQ-[$r]dAy t|@euaz(#:s  ?*q#*9 S> <Ř8vYWs <14–gbwSp=s?gCĞILPkϸ|HTB KvjqF= UNW؞6Ҕ XMXoQ=d# YL=Ve'Q<ޓQ2tJOEJ0%&,?#8`fNMoH{% )p8췸tF[Z9𕫾I@3nP/S40y@&CCwXU GB~ٺ#d}62n,?h8-Jia&О$"jHQ'㙄pV{rm;$qjD%ʮiW]uFF9@7%L9CS!w Q#9 #id܄ixGO_.:x{4Qnz4PɏE$ /CȢ]R͑H]C5iϋtϣe D(9#Y0Uȣ\ b0Kh#i?9Y[b#[lw)W_ ن2L1&/T%wnabEqiB!6/qKJ%DGhΟGu҅I% o2֡~cJQ8q yL! ^4rȴ\t"]*+UVEV{2+=KȞ2{`k 2^م*V2DeNCnW\cEԵ9ꭐҹ߆[HaW.IWؓ菋8\Dk[֎ ]WYWgw^2N:(L[C葳 0)Z<8S!U>5.Ql}R/㨜3 01 \W1k\k@jVH9 Ptf -C+Xilil_ ,qCBe4FZFGI ].W/ݳHnZo|vV4,Nbtwe禇s撖“ro.?u PDѯ ymxrUbܗ&)&7_^r-(nZYz:zw*j:>J0dsy6k9>&ӴV_j[hecfVsյg7Uy+'W48`:(4kq&DvoT\.qGFLiu &M)4SA>q-}0Feޥ-*KToᗨTLU vH&qFbϽ7-ۓգd8ؿ][_()c֌$ r[7Le z @mð}ߊJX%+l:rhq70ga}e>Ls~W`]Yo٫yaZj̽u|g h 쫀> stream xڵZYs~ׯ`%@E^,Ǒ9Wj$" t|kwAAG;3׽g/QMgWgEƜUUrc.gg?O܌Vc=z'^75k&[&n'|P+3+/?egUSǓh420 )n% C7pmKO Lpg< oi-e2XEa&:_?7#a^oeEj`(avA1ZaOyrQE-NF>ˏ1Qh a-LG{tOߍrԮyߑ_SyZ{\a1EnpK:8?-[(2p3=E6(oH|!k2NZ2 0 Ibr[a*>\"nB 5J= 䙪q[ޝ gH?l.2~I@5_q3e\K3nxNl>@up,8yEE6)+q-! nJ9X YL`#Jڋ `]s0W|GꌬHoIhf T`Dƪ5]"oEa=F͋et-Ƭҥ}ۅ\ݳܱZ3vi*F8Y![gL) ,``I6q -k#5 wW'㡆GBZKiEVw}ϫ6:L͇i:Ɋ un)=tLĜ,H$951M 2MمdG(P 3=7:7;20dD߮J̇/#s"Hgяp?S&vC9P*4*lX9H)n"kgwe7eBW|`I^t.Qy 9fu?XE~vɜ[cPLB1}gd[*Iԁ>1fn^Ģz*r@:UidץzԒEpbkMā)c3h=a%Dp>Y7&*Pn^]9jU 7(V طȸıߘv-A}hN^S᥉0MPYt`wY%E䥪sZ㰨q>0e֥;~9r҃฾*7{ xBUY yƔN_R;} )\[Pj{Cp!LY]ݶT,LՕ> UAuˉ)0j 8)#zNЉݨYf])%MO5 ɼQ5CĦ8RܚbM{M{0$›Ҍn۝Kk&B34||ҺPehŕZ:l"8cc:*Nd}&uJ{ܗnS2ݮͿ$Dvwn eJ+ x[S RGLNhW2%_K#8wH%Ba\ p !Xk>yRi> ^l2 X x/}KeLbJ؂1u3Z ]žsC!puVQ~>OxtGv?Pmڵ aXf'"+H ~h`,T }ͷa6? XQ)0# }]a,Nv]MYqcI3ylݤP3J#~7H鳇Ѕt&i4s/+9At[NlDgtmtF«^ף W8?+isZmE@}QC+Umn!Wu*GUG?˅NF׼"/5mU]FcvC;3 .P\deH}w5.4vZ xG5Z+*ݐ>˱((Rk&u$TsO~DEE_?̃Z![?O&2||yίw.*pRT噜׉3{kav~%;~.4NE98f`/Q)6T҅B$4l;hclsƶKoW^s礽\$T(3w5|OX+ / ~ rBL1Ap- I[u?c'J|?"}[@PqǼR8쇱ôk0@1i9ag{NX%E.TvRCMb :U5Q@y!Q(rP2[[Xpsie5a|m5)m@i^MwrJ4}&}t!w'y#:ײΝB o o>B r*s@4X|+: endstream endobj 37 0 obj << /Length1 1482 /Length2 7344 /Length3 0 /Length 8337 /Filter /FlateDecode >> stream xڍT6,) !0t Hwt7  14(ҍt"! {ϽYkf?oym=nY[ D Gryr@>? > ma1CpȻA@{y<`($+&PyBm<5"pq; fEEd!nP0! ;A0 }"rvAKsH. & 9CbƃweC!@nCp[~s:@+X.g]߅? 0@;( RRAz# @qAa ?J:=鹃ݠ.Hww(7EeOYn+pv8SA:^pnk QU;ބ=  7؁wy}=? $jsyBH7H?p@-@pT7Ck^^8?W]MYT/!~Bn~A>~{EAnp;@n_{-ߵ4#rs>A>YRMې q0E끼 7!`SE@n!Bݕ[m(V1FC?*n  tp r?7"=`B x??= } xAMxm^?( u> x"qps?z'/@!`X<Ա>VƋ{sTre(oޭa*U3ԁuES[Ƈuڮou'6pƊwe>b?֗o֊ҩƒ!B]HrAٻMoo޼51*f)LɈ M8}z6E7vGȉ+Pg?r>{3)%-)?45YҘU\IR1>צt T$ag c' X~a,v5LV9/.ohQl9; :9y /d>!J6V(Y͵ZGaF@h)8Z5c}k.` V%Ylʵ ҢoYS4[-;x.,1;oy$zlYaزү-ߘ~u68|IBptj%ӕ^"r~wX:/ NJk!8tP7ڎiܠF$ktaz f}ucI"TcԠ[zQpڌsRX~0Md(~a&@6/w7Յ uL٘@VӾtJh,':S@"pir6z,3rO .EÖ:,>Yp%#r'8FU"C{;P}Ɨ&_)Iv##9+vr+6?ӜtV,E3lXy<Ddw!18:jfғ rL6>{X2VSqʰ\+VYgOn#P_vK5-LyD'~0vH/_b.~Jpp&ЭP܅u4Wʣ؁]fͬUع|GɨĦ_P %ǁ]>D{.YZK(bsy[Uc!̍,ҝ:~D q06CrW$f$>B1簻Jݶ˼qnbAٷ!ӑ9$fمLD2F8q.Ẑί?G, jt7 Iqp _y&9UdSmhʭO=NYm^g|H?cQ]J-SW{4H9ఏB?L5/JK̏nH{,e~d:}/CI $r&;\\s# 1髐r7^M,{gOǞ{QuzӬSR뒇کǞ@c==;zT]fi-m+4ű[ZivcoA^4KS̵;uSUv4A.ˆ&^P` SF%;p*&UNmyw0 x~WV|l >w+W2 CAA>L~#Lْ9L{t8+-%ӣQ0h)/ v'a\-ƿUI- Ú-B9OCźȮB<3ߋ۷Hbe #}+}mK[o$=a `N=_ŝ>xNg4,=QA+ڕ@Y)У8uKaCHT|.8ZU{lχ}nU.r+(xBA%%4=ח.+{ᚩMj T1M>a.G1SZ l*7c']Qm ;"FE>"9OФ􏓲 Jqb@fz` cD1Ӎn8_? ǕݓJŴ"q;ZuO+/NwG =t^k vS(*bq_/-,b3%0u4P^H_)W8 *nŒ_4oS! 2u}SgCuC6/vQBeH,Xbygp%ty_~+-eBiJRE░- `)\#q5Lq|vrhw@K 7}hkcԛM_Cq4eQ:4sg )] \ZE٥Aڅ9_-7 g'&FG}Ͷ/¸ZD '_N`d4W tRMVygɰߦb3̇=+T704-ɽsnLVrVYԖTg ,W=T<3'Ώ2#0(9&gDWמƾL:F cy^;VP4EU>kEm%}t^›箹\oL[ܒ>qnVύRQ o0%99Eu!vSD+vi~(U"G8gG6=v SXm ׄXyl\9ʲ|ϞkSC0y806G' i@obK@5S,y<8aLB&J!53ya](#!+PCaS۶; OKD0#Xm~TB( `US:q&_0WC/zf}|ǔI?{ ^\Ȳ?K|8Uci$6s&|]j7hwmfkHjmEɈ4*6tvSp\X9.x%xZ[UQa^C`@]TYRŚtOQ"/Z- W==.KꆞqK+/Mea8K5kqD>f]Bʙ@F}=ɘt39WVDפVFCBſΒ%z<-tV{cW_S?h& xvgřThD]Ѓw{ߤX}k ,SoFpxNv2OHCM KUQ4HSS/}/6Y&rːdhT aܑ|׷-[3AeKTs珩Ux S<]I >5xvd\ocʃO%QLXIe*}WfκJ;xQ6'b4?ѩJ|78崅-Z('IŨ+Z`H44{X,MdX|CUmw)Yoh`uϰVT?uM7*&hn[{IcןCM.]sԏFiŏgxqvͩn&_OT~gR?Xݱ8h_afomAN e6}Zs࣮դ[cG Vop0)2/֞P6CCM}|cu>N̑:'aGHC1ʐ$}uXwd{Qاb*VT0\ܒbou>!v9`ndDrƥW@Wq-A_J3 g3YlbRaLaa7/ZDEǼ'$:EsA-u|w[765:ҫL.DIQi6e_Scا޾9upꝜx:Zf-͟ڸk!DSMG|g{z[%@UlS$*%+͔!Ryb(M3{Y B[_1?2g/=ҒxmjX"1,&}CCloC5Z' FbEj io?W^*^gG '֜`Q'f~\%qh[cPKLv:'8cr86^d& RաJUG)t;^e*Aޝ$7BW OY|ʻhI4?dgHl_tZ}7]wLO(~RaT+tZ1i~,CcZNeyjh}<ڕw+|(5X Ӣģw6P\2􏱔rC,+Y#& ̺to2/ʙ-=W;:FL<Զ|{A,f3g0_OhIhC3W endstream endobj 39 0 obj << /Length1 1695 /Length2 11702 /Length3 0 /Length 12779 /Filter /FlateDecode >> stream xڍP" 5H!Xpw $w~rꪯט>sRiH;d!,)I=v;;+;;'2-b Vv3yI!*EW[WO.CgA4 lPa(:؃\i=V<9|$@`3=@ٽf44 @Yv.Ζ w0 r9 PځnevK`q:[ xTPse3pr7 43ss{-`[2+ ڛauqx@W?Kd%qau#a^Y\dqA>i3=6]ٴN m^E,A;;;/?yY@kގ6@`  8|_0A K=?_ Wqd0s{[12-W))fp88x|]ǿ|-zO)o \󰛽~8?O,#+oE2[2*`MuA)@ aoikȂ=@j`_tKǢقAj.? ;ѽn?Uߔ2fl넁@O!"7:li@mMPo{y4ӘnC9?0Yx(QODʢ%S6ɕ]-ֽOΣ|qN5c KvǒyYsT2DF3W׳9/qLȾG\Ek漖+8]h `Ǧ% ~xG ?CDF\"܉Cӕnlc6 {KQB[*gPE=\jE {&ӌ׃6gMutC(;aG~-ёxVSKZf\3WFc%۠%:ԧbg"؇O6`ְpzTk5lX{4i!9|Jh;fV$D?RL]eõs!ՆѝR7;$C}8.[T?>>iDGF9Le~?tMZ(o`f6l]NAl߆( Q覇T+,'rݪBN\j4CcA']d_e3od҅H֦YUtfkLg6!ݔ) ]Ӕԝ> +Ḣ LVJi+w4ʐxUt3UkQ)QQ!NcK."% 1NotʢInshS>r26@/v1BsB{ Ƙ鲜Tsylq[fF=Z%k! >h-8-0 ɁXXԣv+mm4>/Z FSz%^Yq*}n9%,/LmdkQP>VXX }6b= 뱄ىb[99N!FaTB,n{[jCzdprK-=';sC_Eix >jDP4G;Gc~\W('4b\z[RL7[|ƙI DtnPzn&Ɇ{8sM*c5:õd}(\G8&e x8f]~~c 8ͻLyo~®pHR NCSgu }NBW׀Ղ _}xcLÓRCUQIv@&j"IsT=s5SbHǷQÜ,*+j؊UW_{ TA q2H~7nG "^iL{b_PV"nTlE&~4}'^( t-]{^X8g ǣ&дŵGtm9T%˫+q[ Ϳ M|5@̶ltKSJ9lrt)1H!̒є^wؑo6F8/\=m]]mm_3rFj|X} Hm7h2Rm\9pV 侒 JʻRcU=RNԍZVVKs6c҇_y} O$/Bqu ߢD*N8YBos 0a+zQw!|(\*˞^;bg_w5p$[SEGna:sV6MUpko(aޜ2w*pxsg :X55\Fx>צܾyB_gJdS!ᢗ)& Nw{5ҭilqAVrpc-3#wy`> ETSS¥0Œsz03".qu9!rSD(BwgRyZQ 8Y_P=0<)7mtF5BPnZ Wo?Wꚻh+܊Ϧa¥Ǚ+/@ mNIMo{g0AN>=8ul_3ny\уn*IP=J6eLLͧ|kdy-5a 5ok"GL:9s,oѶEZDYM+_%(% >0 R'oPm )ڱ¬LN4}AHSCaΥС07z I"~&Rnl LYuXYB%&j /*k[kSY_ XoXK`*,~P.dTx(M%jGrOUNCungC!]xĢ)(ɱ3_&2NN #9MG>)/% &X.09Affo.ȃQA7UD Wns!Nt޵1-WЩW[k_G{>Y~ u2L&{9rS؉J"i('Qz gXPG^wEbA8#}I_ضs rN ut{ĜDd_J[z5F31L}U/mY,u_c,JJ84XLx\[&GIP(`GLE˜$uV﷡1ɚ*iaR&M 71V7kOo0b"7~bK)=Wƻ4Sil`7Q%%?@l }dz1 5Ʃd0kB}<'O]ձ*l:ݸuf st['K{p>C?`ͽ4r@J2z @VerWNNOU1E=VЇK2r&+?J7Mgks>}b,%BB'Zw0AkMhVcDUj~0lyH;\~dK9\  ?6|zj.56't}H.QSP8ЯO-OWV: riŀP.G |P)N %-rMMx*]j~WY*}K{s )NF>fl1ĉIܝ0{%j֊䖯x zfYj7?¿r yfhOƹ3t8)W"6bj>6G\|z*[]9 wovr5Xlsf&A NxtG)*n xw}:B0ORh?2HC gU[pk\s~kiCCt&0"AsepZ_\_!rYmjfaI{3#!&ΧlǪ wqWRRcNIdxƬ~[8_p05kbJ~K?l*L1V6>zQF|f$RySόl5iռ϶v3N RZ; ā\ }ȹ(Ъ|{|osm+5x1ލb}pu!'&+MQY*Ͳ+c29z&lwؠE7aw]y% q,캶HՖ~v!+T\{K(͖څ#GɊnE N"!#XtghPIВjPX~e5nuFL<,B⏑VI9z5BxfH X5ξ%V ֈ\!vGvUe8e м&n0;E2~ J\z߰QooS!DVkCP\*8>uˠ2Z<c~%OFKӾ_|Ta0PYߓR=jH]KXs(fLAm=G`!# LSϣgTM§c-sl_+ 2,<6W b{py4Ѩr0 PT\0o|[1 v_S]u4qR9ne8X2kTyd3T؅m{PҘ""Fl!-fkRV4C3\yrFfu3goCAtMuqUW,K`EnKA?)~ a$%,?Qst/Fh {4UCeRid\ІwI1wAK8y`Q m 3xbA#.sn{mDzݡ;Ϩ.\ <晥 Qc.{HD܈ܭWOhb<5D/4 .OYԟ+TOtcocBX{,xwzqVʉDƭ~RFO[3TR&aOOcÜޗfXAQxHz-PIޝ_~`!eev+UEl6k j%Z.6ᩣOCf9R{RF๩ܩ2s: *(znst <ŋmQ72!{Boȳ-d '_?1C6Fhq{糾0RarUA3 % ΟrFb5$6Lhri\SMlV8nTKJo%P^2$dRr̫K:5kj/A5|2@/*RP]sL]K}B¯Xģ4e۠~3A]QqA|b<ƼcC rXS.oAonc?t'ZgW!.2iJ2sm1x1\(Ip$iF9SKߩct1We{[?E/o~_ 7’7`/C{'0GAK'bӾІl_8sFTiQ  Ū?GHA5jN@ժL172ۇFtzIeT9q-lU&Rλ5UܯRgC.} ._YOaiV@^LHII PER6JmiD~}mdry,* u2xù_?aHZKʄk,E rL1>}{ ݺ'䯳h$[U#[G0IkSқ)x m[<s9)g}m LUIh˘uD%D50#NYsUrh,1\ _nhS'{ 9\UKg\ 6`U +7$~H޾~oPΥ$Ք[`MV^/ducU4eel8@uCwVhO9}XQJס6ȺSoF\ܶXd%uة} ꒵9<[< '-55\ x*ٸ ,DYDW:X%k0q&?|[.nM~x:@j H3urPUEQz!"/8lI4p )J摞\3 Stt9~+0]i>1qhjQ]חyĂ,Cq圗Kg#3bvUV.Ro [M-2;-iGhE"Yz{ߨ$̸%D3*7gm5*@WgX{C^֔c &7&gOJPBy޿9sVyc)ژ!)f'\q YP`qv'Y3#%6Ჩ):-  36_$vԝՠ؉~P@oB,'|д}x'âYGڟtY!h 'MkY5l+ȸھ!)[#Zvs âcCI`VH8`<@H9{fDm2N15ycolVe󢉠"qAQtSuE,kJ|mY4C_ \>JÝL YlVtH#ҽDRbs<1([*k l]0,ߦ փ>wd+y%2é0Q N!ZB,J)xmO-mÚn+IqhMhb"Pk+Rdp71*Nƌmhņ 3#GP &q1WfJQǪg +uQoֹFt[݊dtԲ, -#sNTd`vEn \2YBl8roM[Dox+5SF廸P2[QD#JcEV.zULZq_hoMGi̐?}uѯ[91f9e7H>I=:HLP$ 9@Ga̓#fi%9R5=yQmR<]8T=p@/)nUU2&uu{}/6Eܻ/^TgjW֕GϺ^Ol[[63cGUD- gtoljHc߯ʕסa;s%.*._U%80J.:Uz(8G[5.Z욏m&̘FMt"FE=6j̄%-AURF!4Q-ڞdZX,/~f X_clhV إ3M>}B.~q6̯?}P2w77x0UTej a F G˭EtQ\ўCdRu?kVNUSr3ʩmt$:ўۨMIsԥ:NEAdX(+8sFyGsډu%0@]1l2yf [QIWZ>% P#@=ˬJcjv8ó<^\pM{i4"z+m]zUȎB4z{ӽw6K6@paqfr3æN&uTAo;^8O )4[f҇v4i2ũK T.oa̺aW/+a#4LDѳj dO"Oӷ_ yC[h$ Zj\#kQS~+GNGJ 1JEܣoiy6)VQMmOk}#DV,I,/7F+V8?~sK؁Ux&:#y@wLB̢0iY4u8%&miR_?`ˋJc[!b_^LV')Ml2)E*T`|k[sc &23 I(7F:6P%/܌ŏvV#DP>v[Gde)-Ϣ[U7A8`O]nu"P[kc)؋hy &g(k &5}1E v] [T bx<@P'z T%K/Amsioމ}hYUBb@$IgFe @izԷ_v9E/|ǽΔ:*5Ceٓ6OXOh(ئ5쐊][a8LnVD  Kwgw"j !k{I@ >r]Hh.g:Q:Cw]fi!ߑe!$v7ٔIK63Shɻ4zGa8]EyLƜ8~{F-_\ 6P;Ȼn@xV=FG8l4kvogH/cci>rC[޼X;} y#G2QThiY^dЍ8,#ƥ%f9icA ΄\rEm c0M+K T.oSػB#*n2[ϛ.+|w`#S1G)X,\3jk&;c'$78irNe]r,Q\ 9_=%lRʊKhj៴<}+c넮S֚' .DŽuex*%Rijg6Mh#FCk8fl`]~Jo=ץx5,Z.olF{1ItrI,В(Pbe uiKh(169l>Yq|Mc`n;lf?3_4w~W 9S4~Er9A:d_;L gw.1V6wFdm]onpݗI + kk߁⣗]Nss\:]TBnG,%OfT)0B sXZZX{h ͏'9<`y;tOqa_=Ml8*|NS 6r~r8lɑ ߶[964 >#7+_XwJWs+,zGE~@jU)/q+(ś]hʈ%~쵣3g!Тqٕ69S™kU?$j9@nX$0{q׻C"MXvOӐhdOdi+vI]E$ ڔ瘜ruT?hOd7)k֭V쒣/xW<{?FZ6v*24H) l<˯jXNMd$+I' ܿ]tcq2,rO!rI"蕆$; у鴄]{3OǘiPb'ݖ&'7:k,^aޑ.|l,͕;_W&FvN} ],"P'vcNEL|tj![)U+}<{mz!TLim\QwosXFR e,g |+9 U(3oUdDK v̵ endstream endobj 41 0 obj << /Length1 1674 /Length2 9705 /Length3 0 /Length 10778 /Filter /FlateDecode >> stream xڍT6LwI#54 ] !!=0 1ݍ t7]ҍt9soZ<;>N]S `spxD/U>.^,&&m?v,&]WK8h<8v (( '. A. vy VPDDt=* kcE @ q/ qkGQnnwww.3JuhB!p7{d*h\XLmk-Kwx4A- 0Wg 8řj{F4, t\~'C,ݓ˵9ü,01:0+DA毘G?6+ @GHPq@<,t6?| <~ay; +ߎFX@ pC0?áCG.H>FVXX)?: ;8C?-N p~.wIY @p8QJo:!spyL< tcQA!ߦ?0[o$An#Hz=VլC`e/p6w]CetQʨmV^愒c*A!K@ȼgOí.թE'Kg"iW&*Ur>H.E.RYjTm o !pr ՠAd` &+T_w?Mmmr,dѝ1:Qۣm{:+%yfgJSt]iY5edLĕ1vJ}{(F>Ϫպ4v.hfHfb)G}6ڤؗ{3gɄ _žΫBw`] Qa6Zqw$en)eOY҃ ģVf?{=AdH]&C$-RН G{h4mj`AmcA^8`dK{u*LjR'8cym(6Y 1em`Jem0w^JL=E0bO# Jx;9#oWXR+8aNˊbFXT#D"h83ÎSkAwo)| Unc/ 70fآ|RY* _T+'j~17}fL`=#8qMW(,;r"<ٴ~ ^EBuAMd[ 4(MyTj:_ѭ *O<7;3]Sļ ).[˹U,Ye1^(K6~5aO5@Y|O+M/L_^cq̴v52dm4" {q,jy f 1R%jTM{|ɝ@xΒazV}RZ&T~JB 0㯔$ln=E.JެKBdmO!_ 2BC8!/|*Q)ӪnxeY\>2)/*YUnoXTgu9!p3Wm[;Rg^}Y|ֵەخ|٫B9zI6"7x9$2ݐ! 4I3-CQEX0HF9BLkCPJc:R!έ`ᯌ?!S l[[&dbW㨻S6_N2,ݗ掜N:f_;H1 {9,jsּ fzW5#k'Iw T43S~SJR$?yPIX$D zM1Q;Y;)[#T ?2nϻW$_]%=uޠzqcO]Ai[%Wln;!AbBS3{ {dž LRDSZ5^poj(YI{%(QJted$Q'lՇzS$wv,4ö`t(l;g"Q₩qhP!&Ů >?ByJEdZ觕彂U:j Sghs C=xc]x{X !9|hY3n bǛ|Qқ-? ;CʋQ}wšC1$+,vúQٹsB9r;UQ3H,qSDd1D#F *mn&boSί#eoeAF6bMBHq%Yg8O4W/!xxw#w YYU~"qïBBLiܼ *ݑǤ|^oŮ^, +"z!lwHoƌ4et1 Ųϙŗ7c{uTQ{nr*[\DrZQO`yz97{.k7 KWqm>]w'ehj\]pO!A6b{[ed9GaGYPMp*eg?iFE ^鳛vj2b%Ü f t@]0LVZCyX=}TA5&| l詎>x`ZY ZexDСtX82E_6(4tGf%NH P '^o6ڶ*Zptwlehs~t*&$ljKRG! ثǾR-f>gUsrAnEcf{IKW훃T-d zCك/XFhV ib:޲[}N>Q|^ݩX_{Eb+~([xv#(D smR6;eJU:y젉P.υ6z.Op2ɡ=|X G궫'HỆciQPoe/HѡFgگuHwѵ5Q MqI p'ϗ)ђhIĸdGW =IBzzG(Sd7ہ.'/܌UNM';6Ȃ~zqZcVsաDW:"/j[)[>g6Oe;Qb\Ą-\fF:&G *ڑ4'>vТ['CvaM/Y勊Qe/DWʾ hĸX5tx1WOѪ^9k4i ~`Xo=]EhP5a&>uBFcHME7wYf=١i$&-Ƞ"1+\e])'^]&N37F<b461g՛Ӵ >t'3#B.!,2f 5g^mg~N4*p}J'FfHY8ȋjE_7hQFES%*v%(ogx<="::^ѭK<{nn:h3F)O |qjNJ}ruK]q( EvÚFu N0NPי!ѻ*.t73Z+,fDg'WۻAH r.,-6>%l4,| XPGGS}ry)x͈:_1b<|JLJ#扆A~zR.WW{;=D{0eY+הl>aNQֵQV -WCKi?;gY ?Cn#kKW%ʫαM!d yH8|{poKyϿr2s%7pͻ[k}Z' ;/YWV+j;%=okM*I[r[/0WNN5<nOIcWreZe"k qF!MWh#)r2 J|q D^'nl_MSH~T,&idX{6SИLXnJ3XBX;)]2ZG3ASQh nG*51 }S@{X8438EKul*BZW\f80 W>i3}# pZ^!Yu>DRTLx=.P<[m߳K8\yc#QS@eH}MS b1,爏[w?{r}8GIѭơzSgZ(-G˞ -:@YNnM~vG8rbrM"& εyfjMD ܣ,̬Vՙf 'u腍0,ƕ,R8Ę ԸzRgqk9PxĬ1w/R'ӤD1C.mNp Ty8_LF`pa$h|͠ZU* sk cfkUJ 4t%u y|`eHbdM=7UҸ?mȞZn]lOTpT+?(n4BûVL=2|$` ,2Ѣ7vyp?=Lw)`Xy>iD_HR5E`v =孉i^|Kn*0蛁BJ[?;QD!cZpygה>=klA 2hЉ10b/J R 6?Q~ )'/t)Ymm=¢}2~a4vv)JE_ w퓲Jܒ.5"nT-9p"L;^X ,z?#qh2R(+V"I򍽩`*󡡏O*8+'?K+ᕼOdt%9uF?cKֲ!>YplB-K↷#Ä`K| lNuF6>A:JCr͉̂I\@hinE, 7N< ER085!{Q_ 1y{Reǁݹt1x_XPf4RHwuMm]Y!9X`TZ6-eF|+O9WiVGR.)L}wk*~?PO1mq1XcԨvߔ|Ir@Cv$ }^h^? G(bY,oyZ V^DzY$P X܆{J9&3]oưңem8չVK \Gqc(lCN_\|NIBy`kns*D%vڒ_:i-("N)mHP; Z0y|1}F֏\ T.8MID<ػkArdOcZ4£zq W6%Xh_7AjBpp-?TzѤP7z9[)Olfe+Z!%D ݎPWؿM!J|ӱa.l0vVۅ!z&*_o]֚uB[S<_QWM"dkp8rDn 񿧕7/Ucok,\o芥}., endstream endobj 43 0 obj << /Length1 1451 /Length2 7129 /Length3 0 /Length 8104 /Filter /FlateDecode >> stream xڍvT? "!086`FH#%4Jt# RH/glϽsss3V&]9(Dxy$ Z  |>>A^>>"VVC8 G"$nt` R {8"||>>H7 " xH̝UG\rv90FÜonHWv){E;"9p=@sCà_Eΰj%b mQ`7F7>( ps=@MC ;~^+ ]o8` wt5yQ^(n;#oh0 ls;,ߔg7 ʝH07}VB@0ʝW~p7@z"|mB.@#'FE ` Coo#/M.HM!0-Pn04["@fGF C `w@~߯'A'࿧ T5V濭H//G@'* 9;.g᪆E~M' \i#o CvK>a>]L''NBn끺Y-B jc`P[PC9K2 Յ pѯ}s#`Hw'_%8<#7v8)a7;$ nn`oH7DR(7@^u)`t#5eQPI[@$1t nn7&Mɿ !H:T6Vy I ?&@u)$d(VW~Т-z0uZX-tèihc̉Dߓ̟H0rL$ ܺ0tZ ev]~:=( EÚ[+ _iR7YG5\>~cƬ!|FYA;GI^"W콯(_a<{2vFBvUMcp/^HXvp)?-ăx1}5$,p&QV޿al@q5S +\PQ["ɹ#a*ĎOU?˥ƖT峗Bm.8N:ľj{7w^|spFEF_/^J⧦EߺdO` ,c9PRнS`ґ"Ǫ/dʊ= 蚾RP_I~s0:Z*jOrWÉQjB˫ECtTY)؋lJ;~֤Ex?M{'9MI{t3Uhm߈rO^k9IA2IR3 S]=ҩޟ־ŴC5u,3r~ DA2}j.yLe_'8V!y]skoVD,-Ӈf[vƋCdPp83Uۀ$5=;fsd߿ػ\iډ|!~ɢ G( BrD֪ΎUU޷7 I;+Z6BP1sY'6|GB{I }<Ӧy&UztBmРh*lA8&1~2ԩ^%@Pa ĵḪҲɕ9aޤ'yx^Bk]w6 29%IP`[&B6K立ԣJ@^MCrt%O}c1~:ag,J1ҟt^ާJE3%^b`xҰ|O)%zngRPP0Qs -# rb}" yKga_h ULģFu7 5 a{Wiirpь: r Y`L&3baͻ\S8g\%ͧ ߊV:.ցVvXo="ѫ AǨ}ihF깑fHd؍OJ r;zː wyتg "fNh[]50Ȋᑾ2~ b?u^j B ;V ]=ne*G]I2 r60C tK;cn5- 3' fZvy6OӦ(IK]v"8EПLg8#LY*#^]jpY_!&׳;rr{Xgi*`_GO=!7Ӯv⠻Ǹm۶"ouH@j`oQEq~>` Qi:Qr Eē3d,&P!rGq-cO#4L`X8}Ϟt]"ߑYng ڔImJVd^'ǘ=9׫P#8BJ ;?[%zϠōɟC5,Ẓ`dZDZ;J0t``(.pmInAٶMEH4FKq =<GwI"[K_{u; UHU]j0ӳ]Zuﻏ>zס3t\}UlQJ 3KRM6c{HvvW+y'.lT EpsL!f 1L3+Kʔܑ*5wrEY@؃57ڪ "FrSIRUIJjt )42kMB ɿpDF# c5dP&7!AzSsx a=hup&{:V:v!̈n)xw0#no6W٫2)o7}*qAآ`<`]$c3˕na.6'!8༈K@`g}*%u#I[ޑU#>Ţ!qt$QT]3+FWNGOI|}QawVTx5Ѯ/]oVA} U aQ8jue^S֞Lݵ+N[I>9;Ÿ>wE$,<8'}ht !H0sE+AC2͸Y0GVfϪ/ss>V; Q}kg0L]I8/M?)qɗm`%Ա>2#[w9rܧJw] 35g ?:5wB7D `ubGu&m1IȸFߣ;_#½ }VeIQFl]^{@)) C!F*YQ;'#6T#o,T,]3s1[P%">yM|]x%~IeU4QxP"Ln̠d>c%z.P3|L{BD(*25)0`*m75ifgeI(^Hk8* TGxu Fk8xDj\2x6d>Pr{!tO[Ҙ)fе?( 2s >/V/.ÔgI~++ŏ-X2?ܸFK9U_lqm8k@4@Mc ]i(a".Ng^Ji嶯.:'U|^Ih9 4]DPϰeQ?(|E:#0GEp7L-'/ _Ed ½bwB}k*gޮ/$5~m;i*I3dc0۬4 ?dN~E2*Y ۧ\8WLg@&dG]WӴ]Gldfe̋~LD/9LȝQħ>XlV"{/L䛋~zwL^iQ\IIȯ FjM6o(KΓ?TL֣ż>yּG>Xvwiy 旎,yeFɖ;tu6}zCo1͞ZxI_|SҒۻ.ԓ$in{&8p@_tuiĺmR(8r}bs|7m>M[Sx"D#Ox<.>Q . lgFN] 05A`!yH3 /g[ԯw fNyc{(NOE ?}ܥNxl^=Pͫ2MG]aʴ"[›Tpzsc1$qLEԛjn:I> CzAS_].Fvh<7w\gώʲzn;Du:Cc}7:+缏6WYGDz1E^ .H/P835QI:@,A&n* 2*R}W.IDר;P<>;cȒK}YchvaIGOP$tR|#)B.se 5EsOZa,&ˬڹOo !9m J(: ҏo+'ݾ0=lpUNM\Bgs',l~=f}B(Wt0|o\Y#wyuMnq$hYa=^B% eñ!f5#ŹނSXEt1Q}q{ am;:Y9wp1^KDr6M?aIb>~7_aP@PV7ũ8(OEPˊAHs|\0/>9b}tw=~7)LOŰ6O:HSd6M ìEUmi2&MoH5BrjRF^*Y'ce MbPꏥюDspt ? QQR*;b&O sEF9Koƪ,~Iixڣ#B /^zSAeޫw!L7/w꺓Z]''i 9.YѬ#B* ,+uȷ|hC T&1lhyهR#iJyG 6;s[Mvѭ`,~aD;N7m7u(I`z[1l-~_Ď1XLp5xKle@6gaHf6̱w:.JgCڐ qbĭ iӇ<{Aog6}}CWa Hڱ045JO[47 nݵnzYbuP ͞\҉Tl8cV a'%.DU ڭbب=olaJ#/j8GAU(ׯr9ﬣvr2|_KtPc6D ܜ_ mtd#t'MSޟxޮ;0 ؉}A*0^ƹ 2|ӏ[ƌOϧN?1MNt;Χ~8ïY38ƍpzՔýgb{+@bk*) Gi^O= cWh8+&B؇nb]rXE?9N}eΦz/;s}xznJ+|ڵP̋tP#g3;ߏ> stream xڌT\.LHt P%( "%)]% g^}_Z0]]{TEQ (redeb+XXؙXX))5l\).6 ? ĝ`+NsXXXXl,,c0u(2@@DJqG/g+kW0|ИXyyvmMAESWk=`nt4֮|L.LVB WkU0@OeL k@X`gc=@@g.Pv1VǀooL _l@;;;l@VK; @YJӕ` onjcgj6;sS*\幘;80U"_a]Y;A.'a 4݋ɾ9x|6 ˿psd8e%5ˬNn^v 4f+#o%_bp~>Kp@?K ;S`ac 0ZـG`m<,cOpy6{J*ڲT'`kɸ;Ϳi+ t-M @w,%4܀yvߖ'$fgoGmjocxi\@| @TϡUZظo) F)O?\Sfg8u_:2:\+ >9M) 2w눱qrLMC#N+,Z=^b3tpFk\fѿD no`x̒A,f߈ ,e#0o`V|O?̠~#0o`|O7iF`> ^7~#pA`xd#KboY2[YYH1`N-C0.`r KG<7o^@`,`77A_z758Yj#]-_LGMG2 ;|uu> ' &qn͑oZ%j cRr]=ppg=^@pc' t.s7gpg]{?#.:քUz0 NQheYpns{@KL^sMF[ޒY$y9l oNPmy}2SiA?GQCdW+=tdeJ.Gҥ;\HO??iFMSe2ay%G;^ࣷ~{K'>5$~Oqrg@I C2#>[ZZ}du;; JM؏P]ҀeL]bZHn)zeinW4a`ccp?퇔满.?">wVtK|iD)0P@Y,4E$'0s|=ȑw'پΔק<0 C^C^:R%M?Iv}YVM20Ix(orΟPpJU E{1hkr橒nE1 Xoޔa8hݟ{D }خ颜*-Ux5Ӄ`F~!NjV[CakɃTV`AO@ wp^*PuC [M r:g%hr㶡1Bu\ru[?&(| 8zѩvqTs[_/i#_,#W5=DkUM׎Gވõ` KW ^g2m73DHb+\ jV :u!)$#4!L>q%eQ ~߶M$DT`~U@Km o%0C5X|.CWN2!* fF\n BεLJ6U~>}ul5;t *MxD;x$F?SEuRuNb- ?WjSWMq825S~7^xij/ĔkNͻiƷ͹U+wͣXֈ.tYĸp% B'fhԕgZ5Dŀ+=,c!'&نHLD)OMeJ+KV[#c~'4pt(n_~d*Ѭ;Vn?_[/[|jv4BnWގ5 D(ڼCiu\bi&I$&')—7/l<"~L[5$ Te_N5[~P.e Ww~2(JLM$.NTg`$ ]zRFOHÐmx.ږ 3vr2.SleeE^~JrůUXZm)4-YsudW7@wfPT;-RKrQR/-)zLs<3`qŗُ̖~ki? ȤK-]&It5nu{>!NQڎʴF)˼~&1۔ktA 2\&GX+}D-n-ę*Lr\$/@FTvڐ41 EK&s ]%v_;(w fjYjҝڧ)#\X/cFHAևM2NҶyܼ)/W.-̝z9IW g2:P5!]iwmd.ەzh*ADRZ/N8B{"6{C2d (c4;!Q&} Tlgī9\nǒab%ٚY y~ߨtvN8`τA=-2`Dǝ{=+ΨRڍX: n+s43uլG !?GX]R+^k >HCS{yM7e-oeNE-Etmܾvl\MuF; hA VHuEG>d,el6-j՟.r{W㕏EՐ/1աJW߄"tz#KjƩPT*i^kMI-9Ѣ^x~_:kE]pNͷ#گF\'̙^&g9(ch&?G$3=`L swm.LCq1lD"waѡV x_F% cD 582*LR =BʛLj)MsOV:\J<}Ǟ}V(V2 _jm_ t CP=yijts[Td@΃Jc VAWa6Xbis_&5_4/o 'h/Se@Pd>\]v \[敤OBo??c.H% 2 ![RF\%sjBDsAt_l7cC:Y)g˽F@C#^B+Xɞp8m}, 7u#ڞ7C7}o(qAfb|MWж6VlzzkI47,wG{I6*3E>݀*u`G|k4MDğK a"6<:C}Ќߓ3 %_e\pI:jP7aUliVWa]=4C;^΄?U15MIAjI߱ȼ)+N K7uYؠ6&)%PF0a g=FI qȫ[$]Oj0:3֟//&1Ri]B53}uCEj΢q"-= i?B ɲLcßk.zmK[B,]JF ~,H8Œ4ͷnaK^Ta 4T$(-EV:[лv UUnj, ,o-e?=SGbďn0$n"^[_i{//#r $aIZ_TCkW4t޽ ^\6\F8VjYSpSi&Ylow}֕m#>x%: @췻}Z脭0#,%R]T)&A~H.ovoG#9FV wTF+DKCzqzpObT3:(q3R@iwhx`* o|qǢc2~cQNxgc`F=mMN,*3qILoP j剀LMӀצ甛La/>LV*Ur0%UHI OA 9ICos"b*Zo~HV߸L"DWLfeɴ\qO>Fu [5֧N{OHuG56a" 5X \MOYzsўQC s^/TloE6Y>cehːnh+ 7RJNԽXs~h򞠏V7kH-o1;k4co-w$ ^7\+'fzY)fݾQYbtE4NN}z~0fet#f|=ʟ-չXƉ~5RIhoqO`{\N%Z6؃u̍;Ӣ zj/\]vok 42G cI"5Pb2 Mud_g}[Ou*3 ]ne9rSP;HiqSB_9G:ſXT$B>; {nxHХLR^Ma|ݐy2!u/d W!_Ry˞JԪV5 ފSЃ.9s=\!Vo>%.ES0}Q]h)2b[MB3شҭg&,Y惧ػm"LDZGΛ{bȕM:8yS ̠$R1߂vUW*~lfb-Q~{,FMl '(V\]xWUs̗*6Vx+[%4YR(Ȥ:&bHtqVU. TAra{/XuzSD]8ZQ܂,1; J<+5.U;{B0 2/qO3Qy,cb`ٰ͐-}E _>"@+>>֥.*8ɝ.y6ofկ ?`R> j eP\FdiP= z/t񐮐`a)x*,1ϴoS-T \d7Շx32©dJ F'*@mT~{r˂_xV^DhtTKYi-E}-ADd~8ƟN ;@ R AE:]krZ'J{~FxY9r!&WRűWްjPiDL }z1S#Π!{lnVk=1oOVSvo , %1s&VSDGb}8$RJ6@ _JilT!,Aᓵ쉋Oy1?־ѫ q'gY^yOc }iYoCW VoĢO%M>?zDJYˠ@zggR AYIAD68m&I_L靼9a_ "}^*^*ɖxy?>8!S1WY`Ў (H#Shz*1~+ҽQqAo2 u&8?2>6'• b YJ uc6<CmlĂ2YUԀpF 8pdHzs\ڒTok _v޲ jLD<LH5F0$6|M„j0n4޿o0aSӿ.g0y!ގntL_xn 9A|GU.]Y@2իWM3[t4!1ѳΜ{q'.$4G H4Xߕ#b7ND(527{'oϻ.W[l5ɋAqf-]3lIQɏ[܃hҵ3[gy[`pv\揞IȽnPPNrq6=2<,~ǤMZ0ik]Pb,cw-* EջMgaoT_Z]oREJ 'ʃ%0/ ]EoC@>/_grkZ!ưJZ! E?Tq^LPVrjzpUԮ "-bva=XVՅV%y %t <α8r̋#SW6y&q-`,c-7"KD>}nHvn8Ó<tZb WCq,IXMzz:cG 67ү&!W8])c=:(-v{N[YoŹ<=G::{/ ]r)d/ˉI*zt TPoșۺ8 P3kߩ2|w ?r< (Yj$x8~e|1 iFNIi zp˝R\az4Rliܧ0ՠ|Dф$ÀOlZh5*=j ~ 6 aGH]2{5Ӿa&`mm'7 v{Zh']ֈ2yP`,ZqoF>{tNw c6  !р9I5pz9] `qɻ7Ac G/s+-šhͬ$#1M&e##:V?{pg"SA# oMtȯXk-\+dce6 VsS㧀Tź$`}O*w[0V%{FZTNtrG+G%'5-en.jcRN"cYe>|r>Da-_)O-R%;bDt#7~7LFR[8ZY3'U=.)ފlp*ɊI=l!i4L GYv.u|]߫!^#8 &g}<կe"kYzN~V@#m 9?=x.ˣiazFmhfFX“dc2@Xg cyeiQgHhC0!i^fs$ԠcV-Kn/G8u7OiقKHGdGENBlY/6T0PkRsxR`bDZ^Q2Qu2T!scS2nUQMOԫ/M;tpR~:tfw(>=]PDY(گf5/"Ah 6@b1([ίh:>2dEnghHԯ]\&=qе8B|UN YOX|%+Kez4L&.;$~qV\yX5w Ʈܪў6+BuYV|gPjQGp*hZHw1.Ad_ňnjS`)n%Yc/gfՠ=tG7Ρi{l"=TM%+̿Clw^ߍJ ^S6}r@luRNDr`r0Op>ۂP5jFEO %\DsOVA*7ˬ9H-H)JX~JyqȢ:W0"0B@msS* !z|wzwuԐVwY~7]i/핰B8r#!|%Ք! PWt{`H'GOg6=hM\Q%LXoh1k|6S#U;cC#%AnCob/_ZhLw\{J j;5$gΥl HRrw`3ծC 6*kf v?@eW)ۢW}@?#YtUxڸ'}=|miZ_nxg Ri7#wPkKeF;#e1I3HwaD:/ye)[q] C׻wC1遲툹e9'$ W\p.ӗ_dEAzZP=;JMF^/_ֿ$(.ş=ƶ.y5$θv |J } *``Q)˕KnqUwQ+r:kA*"_e+jwzu_eX`'"QMۦ) $z+5MƌLڦ3e,x+9 ;oQ+Z !XR;V++'o:mA|Kc>7ǒAP1=;3-19;s|Bܘ{z_Ob~\u\wT$cصO{5@NDft j --w?&pv'Ipڝov|A?Ųf 섰u9[SƟ47%l].(^y -3Z\ty/&b-'J9t:v("ai@0wDΑ ʏbo1xq,h+ [뭢3Nj @F!װ@יPi毩? lj"%Le.dUE;p{ƹ׸蘭Ap=Юkwՠ].)ܡlvR14nLrmە bCUȔAB|}XOkuV./OɄּN**:{-H\1b}]@_֪ʧXrGXr(vK{AvK^ɴP+*:1() $'#oltr 04|+^'^kH(W9[4!tjHкoE۾>!VwqφU%^ E*8TFz.ۨYo(^Vs:Ŀhwwn#FUr>C&yQP#7ˠ`-4ڴ@ȎJ˜I{Gę1H|Q0(CŏEo4۽ OքY[Dq<'o+)n$kIXvz|!upi/bt af3|Ă9YfHXJr>F\%M(pۣ2_*G^8`0d ԚJȤ LShۿ5 jŋi Z,8{c/>D([GRsʤIZw+M-d̮H@ K&N>7mJG",>WZ+j=y mfl k{hM4T=ӥ/$nrm)z/-#v^7zFzܖpdQu|.Qj Uz.=|yr+)5Qk}QGpM-@N6O ME&pPjp8CH^%(cm|/T)Aw'Ѓ!SZE]=iC%$"GRIסk<}NL#`K2>ʆʟD(SXdEJ_Ud1gRSºv; ]p31{ph[ty>sBֽ^(8#,{Gߓ1 4Xi+&x9M*p-p/`y1 |fΓ0 ~څ~o0˘:/:S'@6hz7[]*j4G3KKg4T!eRi!<9Zkst@(:e_4[tu|DB@WqG$P+l)U#O4E:11a.[B88/IznO2mkP[|p%B-xnjiRoÐ452S!'^E xYϚH p`$OIv{ )AyiL^OJ`o @nLD)VfabHO4h3N@YѮk♟Dds^S\V?vtWkc` g׵2 bT=dyNlY:Be՛G#s8̾^R``}\_-LUANW;3KaTiyʰCٺ@Q3d`ZLpDI%g7YE9V} %>,GTt&XR0ۭœfm >7s@Fi#Pcf/![[JF]#0gFF!*MQnt`C;4wkΔs0W}SDrJNL01V`7`R @$\c0kt jAp&DCa 2xDoDqml`n)ftj [h@o˸ `:F"SV]r'ɍ_ӂ/&M4T9(йdxI7;vPLؘ[0\n`K"-/XGAmꫥ _KbPЌ˧17:q7i&ڭ]bУ9PF_,\ rR"W Pׄ,2ᙦ7hn0JG,2-wx4ѯ)!Q]/gјګk)Ɂ lni00?SjLs\}tekaXiVF9Y#O@mJevuBN!!(?73iB$+{.;"7`>L_{9VŢ>iM  &Dh¨[%nd~?9bt )X j<H KbnZQ>b+v]9h!KG :+FW^?gP?ʰ-;]L@$eվcA\%JS&9ќj1_c,+雍B2<Ҡ]*)oU1(ʭÉ"Rw9܎'~i ,2qP,ɻMNj+l^XA>!m`&ܶQƑ5O#TVs85MI ͒RjFtɻsk_x6㵿J8#~ؕoCÑ9%Ejy Mj.~x}z}4 LU!bv1ϥ3.wz@uɼ`逞I{s*`S:rrt*:j&G#}f7V!;- % YCw8HGV]^`hL;xddp̶%+kMрhUu_tb һE .8g(iM8rPaNc#Q]KHe lT lU]VX@ӯpf\I#֐CnQ,!Mi^WiKզ%|*OEe>X*c&55 ƻV5H\wnbf}b 2ZMV3Ȏy}mOo02{]'3\JBSx.ŒuF<+^C;&`to`dsW`>: ښô a@Fp50h&r}r?X{{F}/ݪohY@ZnAûC̔8l qm2  IY^7{Od\{qa T``=kEP),`T-E/3̪×q>S} oPƻY2,){7p8I'B]"FMdᔩ WC:H1t6l[AEGK[ ukFGө#N8%Z ZoIYKb[1ȥJzl:4 ),tD'uhUÁ܌o} dAT9DĒVl0S?hrf<!SF3c=_[6H5 tj3F|fv2 -=an:izp$ZϿڼ= PwǪ/b|Yد "" v7ҫIU@7 HC0tK)ҙc3Ln4l $J-ѤX[ A[+B% pUld^%309NځJ@U Vs^iwrXgc'tHE,$IȖL[pG?B"Z._]ԝD`$B=wg"ۿHNm# xo@ωkW'x6` ]JFMi8U@mU¶>(hDz8բhXc p9]'z/7^5ڄ+`W|U hDGM8QO@=lѓ|M>0H&?wѧpC*;+"k33pRRͲQƮ:g;n%xs;IF5!L73O:Yif܃ )w!KvD;96{ )[Ȟ3Cr2&-"Rm!vW=ǹo?JjUS6fϖ4)8Z칠м AD0Ib,H[?jNc'65 GdS+RV(H\4`:2w%d5;OnXP,U(6ܥ`O}%eL,qJM#cɠX MKq׋A<0kݲ8!}+%8lrẳ7O%(ވb&*D$Еt T s H.2jb֌3ڱ}f4HaDn+P 3”^1R\rDU妮@-"iv,L@a)mRaZ'ޫ/-y8\pHdۣ*|7f=QCQ [V EL\_R*ljkڟc}QRS\LL5P$]:kׄvUs7^?gugJi4L@b@4b薊tp>yF9$5]~HFb Ƞ,X;#Dw:b^HsszrgYhuևzqKnrՃ0U$"H[akĽ} g⑁E Qa չXr)R5L?ix!&s>Dhtg:(kLR.%f/y(K¤bN[\ћx?ܒ5I*܏WF2y=j|fVeZ{hy8RhJ4qMzWjPb&_`0o6ıJC) ~528sA~QEXnpÄ +#e}J8ĵL-WxAL1 V\TdL 4vfN:Kf2Ѩ735kl:`X#6o_"t{ڂPcg˾pV7n祀nBww@Ө}~@q<yX}">2pJi݆.+Y9brŌ:OLA$@D'*}W=?qyN@5ysc!w>Mn@VIY#ܕܣ -@Nuxoĩtox%V lNRe[?86zL&䊤!zbNa{|`.AQx0~rYV~\ݬϸEK߂)0~h)5FeoID'@!ʍhA;=h@嚏O(L> stream xڍTk6L#]9tw R҂ 0 )݂tJ44Ht ssZ߷fgkfFm]n9%D 4t@ȏ̬E8@c3@P' AB88c(ܡ`@ qfVprCmli `bpsV @8>d9t`E yy=<Q9,zh\Pg++w#o ;y!Çjx?mA1?0ؿCp?>ܨR?'CdW;"<Ѝ?7k?0l O¬D]]Hu')m7> [|ꬠU\/Jl |4`&=oѰ;p~}я~X2͜o=CMؓ`r8wA-o?Fx?f{BߵʘxtG~7UbΧx>6C1潭s ;5cagE%U}4ѱ1C0ڪH+V"^A^HGl6{Έ#\ǖ,+N[ςQm33b2ic]KGMVG޲!5{VN];c`G*3 ݚ֔\ENtnb-41OR2ZqM붿gV{`_G$-/"*z{ӄ>F2.B{VpfXlȴ(Sf+OLb~4+Dc&Kz'^P;(ާ7P6ĵJåBh=yQށ^hd; wXxXьYB.}_;+=_A _ܠ2h7/1q0zFj#4 5(u &FL%6 4$%^(EiBޅE3M覤+_k{,]XaQ"TlI aKMчlH>2v].jRq< \[D`]'%u.gd݌cp%IEesBQ>M V 3L#ȡ33ܞt4/da!D3fO+⤎t|J"&>*QfUB|`,<iθ6^6K%'3hSz$nl#B3݅Dfw2B mۢ^{Ղ]Ri$϶&X  kھǭstR, joB Gѽb}}t brFDT1)QF,aƽ#+"#{DM-G 0`ٖչ&g_XQ"Y"E: qS,-' ϘBT83[L{]m33Q.b,ڶyMQQ$+RUm2I{6j2IMi:O)ۄ:+0f渔i}8GTD#)5uq'LY*Ii}gQ:kBk^j>Z0P<;P hc4qqn~v-Fa53u VAjHN|te0?8N$qFQ+SSdHdÂͱ j!G_fja%U'SIH`p<ƜVYؒEĸEK9RT2װ5!;U?[ @f.&zjCșqVpn.LvƚJ}B={w9B:&=lJrjo_ Gqэʆ\4'&Nxxnq?DLfI? giI> {fPZcbȽ+X% 3'ϥqAIF@pB!ypPӦ̣1Cn]"CLq#yS-m`_%i?G$'W50sp]rp\lf7:E$ƌfJ9χMPAA^*}VKgK;NNj˵l3ۥJs8h0YUSU`'GiAS4; ϼ^П`LF<-< ݪ~E0|/u%Sͭ`X"UVT3RK Suj}ȡ!,;>LSOֈ^#m6&F&N/URFj/s<ݐ D"s%)7nUTÿڌUk3'Tuv{Mfu< 2[xεS "@NV2$:VW@K4}X9`2<)M$MLrh)a R-;wј#LS29+df |#ekֈŚi_a:Q9da`:̌ x!ϺÎK]M<%5Qr]D\pًjF𬴙Պkbk) {b q y:dz~(jKAHU/͚a edO֪[L!zYEy٢UsTs11FѨIs׽_"ߜL>ڲH0N9aQRB yoEH?{M6."Rm1V]ᾊo}(k_'*a#>'0xIVH -4pUbď)VQK;CBeq^iVf4HUKxek}%iV؁x|IB^ 4% %-Aلf~ Odpivb1~Jv}ewHٝ`x%닂ˁs㠨\ȈYb1̬~k`Mu&Q(&IAZ:n*}8Hv &,V4]9:`Ee2~9'|bZ2 1Q[xTJLt ~4?z`ډɽWT.'QR}LHnMdj:*i# _FW)/nzr?VHi+bep) = H\"zjutuok%Vգ/lDl<,@@B.>}J%iU 3[0[3Ǜ~tn ЗtWsLVSMw7W}[7+5E|I0fϨ1bBjÇ _͈ud+z8?.;Cd 4Zkj췽H_=}&?)Rp'XUDZMjXS'PJ x sS`~?Yf (]#o[jr15'#h` YrN%dF giΩC1+Vlœ|¦ ҁOǤ>6vL{f6ljlV[s ZhQűA0qsdP.U&oGN- &gA[P<WآܳsB;He3)FeahehWY9OG[xGܱffExFqf$b[v:rԌnQ\}*{3WN$/ƕBv{ IHlQjIHTĹ}Jsz"n&rWvӞ{[EPg'i@.4'̦/T74#RE3 OC2x\Ʋ3ؖ":[ o"0#w)ָyE,P8UCcdYvԈdAx&31z*PW^B\&$yuSx/,[宽NRss[ڔ9N ײI`.gO,=<^6|nݙ!)Rv'7j2gpS0 @g{YК* ZqNF4s4Vmk[î^ a8 0,Ş9`*3i?(\c;>Iq!?s;x6Q"Z4 ucá~_:$IsȍvYK/YhlFߠ篬أFmeZ{YC綉"fƭonͫӀ2ZS6{@t%}΀hsbiiA N % o;27w Y2mMS!JSZ'!nvR ¹oߤ0{P#JTM|y:}M]_#t nL5\r}ˋ:w嫝a&+gȊ~_JϰMżӮa{eǬ]<4,Mݨb萢8)x|m9KOKratP4mKm*lWp7n~]4w|&DEM|@L\f4*=b1Oe1g =Ȯܽκ= uy.Abb+ S)oZV/VY5'?-2Yhwt'"Rٯm䖓X=-՛fc zgqj9$jyuP>Hiљ,sj?pGRRH49!4eHC+-%Yt/B_=[% l-%[Aώ94yܞ|UAm(:%29d,݅\ DdsUQm9`cdf`pQȲtD%4ζ4\g,\)v^ amSJ1 Θ p3:hR[H ن4> MMw4&UQƫeDv /P 2 (HC)o83K3H0gK/:jְp_o߉rK ^Ej?t cc&o )-ї|^Ypu^^z(G u} d.6gHV(wTI&T =PؓhCŒ= /'Nr.+ oHZϦRNI?aO& 8g:wlq9WܬQ8 d4IP43p7R7tXZcN'U߱N.cb-`){w&ɴ?މ)ݦ3G<ΪGc=7h \/sF;}C}ҟ|~ +"m CW*ʣZoʅG]YOzR, }7-2GUi.$ *[OGD^\ɵaأ.)o^,( 7a7*mӪ!nKX (֡;B䊗/yEO8s ~H蟙n\ hZ toKycN{QB0V8#,S3ܽI·k ڒ :X_nJsj*$R;'{*jrWgnIlr=X %+BOqJy߁Fe^A//panʶxp65]^@eG ;گU2`DO k ~'KaMYgIdŘVQv[N& DS;3dE\/5vy ɰΥY<m%2;Åj%bAcv+Lv?1?^`IBM 'Tljm-ꮑqE_eQԚeP޺uhx ]03ELDWqa;H29x ggMJ<H(F :N0q,W j꺞2jo~r5uiLZoGذ>!]f A6|-/-QC8eMQV yyy6UL-^k8vyB'c3VAWjc\>3dļ0̠-EC aO][晩[26'ܿbBN+ZK_5}ٚ&jUe|=仸?IZi*f!o?eXkC]2l:>.NfO B9\œSrA~{H%sLWWs*:R A,]#mbilP;F̗ZrNM_׉!7 :VNd#3XۆףFݓ//4w20&Ppw/v̬)M1A\wT1d&6zh> URt KIc~7:>ӨT҈G#)2zHPMmv ]7Ӟ|47pIԪvx0ɚ7Ulm4pU(';t)-cD<,F<wTCqYHy9o`-R0̀_W̏n~OY>]QbS^KD'UϦ47y_3]>QN\2`$NVo;Taea{j-f۬1y43%|.*Y#L >^-._ެjHxū̘XEkGƳ hoI4糔Md;htޓ0c}%s(85ooiNnH5;frD&ӹV@ ?aci0 n_^\Vќ$Y,EOu zU=@`aƣa+ĥ[71@o:E97)^%tJIIWZRݘ^NTci oO(D!t`;bm:#xf/:vQ ~tq@Qt*hZF~E.,YA9[ 0_Gn!a9k:_S{Q@kYL>ew6OϪ1_XU}-̖RmgkĬcy¾uEw#H4ƕ(Z匩Cud! ΔAhTR{Yf>gPxq_{s6[ ԾIwJN?H":M ~)#t?.y50YxQunY244\XXZ2;G /OֺVimCŧ <щMǗ* BRhU:<A\|DgY9uJO@2|,߯Wl0ёfzk>?pxe*Rh> stream xڍP\.CN M=-Kp \KBpwydfUթskojjrU &1Ksĕ `e`feeGQ.  $f/2I3;%@ `qYYc4sY 3%>t6>>?b@gdjhah8X^VՑÃޅZr]@Kf?;cFhڀ\k8Xz9/0qypX/rG Oc? wAp6pw4x +PVdvteA,]^@`3?*7H^= g Ea^,pB\]P~' rZ݋ϓ8x@V &,Y '7_&/"d@W+++zZذC[ҁ# / Bv1s\݀V7BacX,\@k/b՟AX?ePguo&;+ (f`Ubؗ)`Οݠw,epܐw i705n/WrxY%%rf/{ =F4h r,ʵ~/:~*&6Vѽl?TR  0sv6Ba}!;e-p qp}q rpF}<ߢ?Eo `XdF|/~fߕX X\п // /_/_/RۿK" K"?MN/=(s!!7bo9_% `-oI^.=z#~Tky0WiCY?0Q/VOL̤)G;7n QՖSPh|<5 ;O#{y0TEdr࿫tbdtH66A:ko#ĉTڼ.,Q٠ܞҗH8!׬Hs˔y}]"q#|zxvKgv3psf}3D#k`$܉]ϏrV"j{[u:cO5fLWjZ!æ#S+{b !҈xPϐmm6~DQ^& ΟxzD ߳J Db,e}p6U3kOX{뚪OGKR/|DZ! fEFOՁPTr5C9ZY`o~X$P=x2x%Ԉve|o8X1;Jfk-"0:h3bVј<z!dN<,ze:;rw٥qxtT A*.qZ+^=1&YG :n&Kw&r9_DW>P |Щ0 sˍ==A>(KBϧu&]7s0Uq2rCR͑8+k+F,j;Կ4 Ǜ7ŌzDdC >_Rіql_kaZu>B&W޽;&G\ =3%&f/P}l8q '"jg׭%ch$y1!EflYVä?n/!&FZE\ƨh5ydiO}md;:~VF3>p-ģXGOgu x+ȕgffqqSƋ߂&*-txr[DRBZ/͹V!G% sIݥSmWo!e:NL31K71ݼB3*G>~ `$ )&9ꑶ@V'[5;tEޣ ءĪ0"VZ"m|Ys68egw) A*CVkRJ]N{usPf(4qW:j,TWБҦmc˦ZE-ɨմhCœ)UOάjgҽWѼ,_5 _ty^|Վ_H껿?i unu*⪱Víi_Ž̀7GFac)i 7fa,:UsqW#X |8}T̔++N4F&)oP/3ZmlPp:üb31𻶱rPQY BLq(d-k P$Y oHEύ\Nǻ$C|u(m(/94bնž>Yut@7}t{_'5\Ɖ[CS~U[i1ڷDT,yil"0FRvv9'z:G Du*iʷBF/%2 Iѕө,[ 4Vo=?%'tft (v=["驢a_Z맩u׎_gU/L*1܃X'RiF, 8 bDAY탂[]6Lgfp]-.ԕ\XiP#dSAb٭/m9)Fqd !Q]4^x'o K;gjs53+gr]"5 :+lv?p*%UFf3WPajODN=v+~NҜ:8伢Yc`'ڐF6Ug5Xɘ%0l%{pLjlhmM $Sq NZ\6 EL}J)6aWyV< Sbّ0'W^l#07dk(dh"ԋ5r#=ZAoGSDТ+)⦎߻[tK!{@?2ݦ楛Pf -ldῐj&x5{LJ|8b6rjhFӾl; N$'PKgRBZ8A˯SnPwafgيQ`7Hu#pYQ,gD)|`%\*ӭn|/L7)z%^2ةTGδd\%,bzؔ""R .1-A(s?mP*AIj [,?ﲚmH kGλ6`Q"pmsf|ZzK~sh%jʳbyo%@ %790Å}lr5;׹ɞ$<|]}Y=zٖi09Q;įb|KrPY6)=]vR :8{ǠodDM1[4jbTzQYdopB`CS`nD#Ț^(U.$!~jWU/H7JjeoM8|_ذ\6cOX/7,sP AS~ThgRT_g)*Gg^i8Cu๶Yk9n{X֟q˦!OثH͸X9f,ԃ\fϧ2C}vȕ%#Andֳ;m*ز(< M^ZߦM,Eτ/ڟ f.TِZmLD$tpSEx.qLs%(n3qli\qєz2ӵ9!vcH֦.G6oU\PRC3 ݈T9 (6)G#쫜勗O72@5]/_9oDj $}rBBg̢30$$턌GE6?ڣD_%$\7HbϜNE(]c)F2#wN9w< Do ߤ'qNZu{:Vy[#h ?rNgzūu*~V׋={c_Z{{7acy!Zvw}AՔ~RV蝶-֎aS#%"Y}q'x4SfXK^/D>/S/ʉ\ձoj[)^U1`*d[6-)?ΐ4 U =vwI j:t5ÓVY 7Zj8IVo%dI 36ڹc% n%ɚ ^|Fk:UH 3L}8@4jK1j S}SN<rdךɂ \ kJ͸]M.-"S{B9;O(G k )q'\(~bQVݫDi$rj|@)6ݾ ٷ{Ғ Qɦ5dP%RaÐ/4pKE$=J}:jX:wAy2 [ub? Ffh503aQO]y 3C̑]7?cZּ:eHp]a0=դwӒ6r"\㓤,HJ-?9彫da|\ ns%>E Eόisq(]潡]з7^v7gofN50b8k M4_רr9UJY,tskGaOfVjpY19qTlY) q5Nt%L-W=2Q&A&^|nc3vgM;u93ivJQFDԽ/^X'ZRb *6X1-4*_v:_B#KN{LH.da m1&:H8ٱ|7,?vi`Б]k[(߾G s?=FHM]ӟi,$q5N}'!,qr)QarAO]pxZ+:d|y7l?vn=MC {E*^=9FrY#]t !"-ؽB\sQC9OӍધhs 1,ÁMM!#ZuVQlNv`q|ҽ.0w"xd!j>vFnss'GW(:2_fS g' +q;s V뢸B~}!%0ZD[eË_fEsM3s (ɛhGP#)V>D^h&WQ`*)+2'V7@*dlܮ,]M]-|CO\QLŇxG Ɗ-8Cl[:p oqN( Ϫ$0=;em_qu|%d$坎0*OP9$Εy|PL@EYę;YTpMV%ow'(yՕfx+wIwO.OIoi )pF'86?GV 9|Y>,`t+e~}꿿LpݙvVLFmJo6o54V-fi:5%D cڵJ2cIy`6<!)}j5ahFβP/OpT583ˆm%”;%2,'Ct4r ߙ~^k|ˡP/r:ҦsAwP2.zyV!Z@bdW<+֑Rf[41u>%/'qF& ˀri69#&՜TM3ؖ/G$%GT2,^—md)P"yT4g )lxsqn o6v:Qgk3]e?@3XUVlȢ[愑p3'{#9|VGҋ?{]9jR+g`l &ϔ-7u.[7+#_OCgc>i(  FPeW`mITZ Dy c˸_ҏ"L4V2 m N.G٭nX$.E)4^srJϷ̱;a3DdŅ5O^vWW.d15zYKldS_2";Sdvti`%j5K+<7Q|-v&:pt#t Jrօ'MdmLFm:6Zv0@ Sfj3K}Gx7վF\ P/N8>Z̮b-s|o44`E[B$%wV2mTyAްErRrɘu pÑ197,6#Ib{+T ~GD 7PfvPfI!u/=˂LiWtaXB#iqq}ˉ$: f`꥞C;\)ʙވ:n _fQ*:vHˌec=NGwws['b^ƆIyy%""-Jso!fSh(wB#wxf|rm$PtΆ&i 6QLڃ<.~qR -[D`{VՎGeX+LX#:o?ڈt Ea٤1*pP|NJ;zb2/' :!}uU@Z[!Nm ZC1{1EGUH*qYf;jMta?,+pg*{*L>'`rbF5.:w|v3|YrڶE A$LF!v5S"̬ +f=-椊.THi×}GQMI+0Cyp-+tsxc r1<6؂ '>e8:̋y V,(?l"5tZѱ/%6kS%"^s +4zl*4_ƚ`B_C9u6* a܄_-$[+1C}Gg'3`)#׭ 2eKW!8jmXId2C2. o endstream endobj 51 0 obj << /Length1 2367 /Length2 15018 /Length3 0 /Length 16409 /Filter /FlateDecode >> stream xڍeTӸCpwwڸ[N-[p 6}ZM]zi*2u&QsGS3+?@\Q]ACʎ@EafAZ;:#4q$L@TEG `geх aamPd9:]]-@@kF`a jt63q(YALf@7qA͉ӓޕRifP]<怿([3@_*uG 7O $6:́.P|@ /¿_wˑ&ffN& k; @YJˍ``doabmgb " @JT`X;2ZU%_n@t0w:" yola`nW)N,@Y@"?2Kt@g̊ NlAu:9:,@-&@;Ellsk37)wh/ k/>+h!h>h5=1=)_fpqב#_==:Rr 0@g XX@loay9I͠074nuPt-jŊ@skwu3%h9%vXY=&kvv@GW뿮+сvtoJUcx# ZNs `avpt@,]:Zn._! `,E<? "qXd n"(AE*;h P?*@ѵ P<">P.&y1qQM P.&f@a*@q9ځ?Nο$u,L\e_;.Y)-zt OVYy;YɬAMU'nP v= boХ': z<eG u^sloAgt ';$zYt gwG7?Z_ln(̟{ڲy:>?SjA]5],^ 0sw5;tR^@3EG30·L{ܚwv࿘!l+ 9 oy$TƬC*BG$Jxu1b`c@D27~PGd H|$]本NJH|C=;AY^p)䷲]*?HMHd&\O#b=fm2W|4[Å)n%DUH7x/ciL5cdJ *NG ?LBjlۭl<}"R gI|ʓ 9=ֳ{Pb?DRF/iLYɄ߽%Iq'4j‡}YHᶅ0!U}~dDvT37/&:c;^ dk7JE\o68awW;qJq!Rt)c0DR0Mpz rNXCm}ռ3vrv44'߮^%:N %,Axq|lIˀ|5M'4xrUru.>{yǏ&lTp vXÏkA&ҮP&Rf(=4>>~h X7捘8'˜lw"2Qצ2}†91OXQ,ekZ7*(_d z5oY 懙ԙ\(PfuwpTEݖE0amXbspw[r8 h 5\e~)ƒz"=ίGj(ȎR Y )ɺ0u  m( "3C_9jM/J:D\C~}tO.Zg@p8 Jxp0.$T׃Gkuy4GōN͔Zwť"ߤu0I`|^lebaQq?ӘՃ[l+߯;r[ުʢ\)\Y ,ǭbQ\':Ga^746iƇ1ҟʩ8 fAR0Xܩ]܁80_n$gM64)׼$\˲&YzA-"f iqXteHx/4DkKC3V`Ӿ!XT{Fql*$v>Z{|޴֑]KTI76{#JOQiL0QCK3aCCw*~KVr8^ŒTxbgJNI $S=OIg/=N{>E"BF`&|3^ 8xi7Y]9uEy+fwyHHҳ5saNŭ(VsGLvX'l_ pDc U#h9:Thn><*DOc1._E \8d%^z r$M;g_28֟Ss|^2c3ӧ/f]QmT*|QHR1':|Ộ>sfC+g75{7ޅDc/&QmSG #J~i%~Y9/*c鳧OЊ;L}izU{@cĂ5Dkug ZxqwD"jq"g*35\8b@ d?Wc 6xf;EeuI"}G GfEȫ@ORؚT؎qz3Of'] |.4`)>>1 CDuF%<1/IX}F4%=Ri7I#SIrd+.*٦j!'y}my ˅1 l,x6* ;OK:fnɖ<<Y{WsFbiszC0B]+ekʂ!iD|Wӟ\Ҥ!sGtL,z=JʧrvнoQf#own1H & IU8RǧkD)|6"*q bMs蠸7"frӵ!]H9B0mqm`DvpfƳ_ G!3e~HtTːDbb w;Uƃ^ɩIד+ld~߼r V֏+NuU5bZ7.} s{JJ-Jٲ_kopQJeB4|t50 vXTQ 9)FSD;H&~UazkP>Ooy4{dOTx!! <*ل9 |fFܒW^dxSʂqC=x:w독Gà enR1] AIIqG'B :BYOTJV >pzɤv\ '/}?5,}JUzi>תC}8Ӳ|Vl% c4R;_sTF"_)_iczA WԚZVHv`CxY؋Z2 _[wQ'.׭)ԭܭWZx8>mvU/pnKAvETmYN`smM*,^B=1T3o_{fmq"'2`{-_7K~°-,z 1l*%0kw^ (Oߢ̐PnUIVX L&o%d^9\_tuĥM ^)I׀^:RaY'A};"aI4!D#/>X{ccd72kw򨯁d8^G>!e*|𵨝{vfJ5I}s3j7`W0jm'|$=lnBJ-TM1źՏ:z 5ׇ-!X}5h^,MmrBCƷ Sw$+ݺ]qhx>u3~b;E pH w ٥uk -K-(qh7o~VZ .7 s-%>R[G5SD߳vb9|?.a@x% ֖D5O $Cjy"V'q wEFJ? %P!p}`9uY?!Gy /͘9%Dܨ{K˸NfEkvV2A:\UQ ѱbO,QJ@P:MŐjqƞ Bhn@UNvm^ky< };*CWc+c5!u&+ۡrz&+E(qŷ <w}3ֳJUץa= *8Y+n*?$tv(|ڒXI+q٭ n1n$)Z1R-ut)Z&s;vƼ_Ex"PUv{{H)7Byڄ-6c>k>S/;oOL8 L֏BWq*I6̧}q9R{?hO=>+Wdbپ f)btŐ?QD{eWCgUhaZ}7%✁Q5sҴ"Z}Q{5"//N_>Mh>ny❶o?-@޹T80y͆#P&b)͢^}j6-!cff\E_a*;d-n=udnEEkh]ĀL[Ԑ6EQ4Ҫ3]L햨ػ5-#ޅ1i 0kvR5-6ksN1?} ፫-^ms!?DGd%kaGC_V.Qr~PH>g`b]| 1hgh(h(XYݔ?t|2'pQS \ ^n+f8AlЖlLa~;aK}S'`N5 J+~Km'KV?lΝv։ʹ rM ^}!n5+9^\ 9 sK mޣ £a.5!T[%čʞG3w~`2 ’}\0g륕Ӫ/pw!?C8IpQNOME򠾉ż7ުGY@DH*]KrxrlU%meA&NW3 Ň?F+h_%Ƈ-DY)t/Y]9XnJءKV4Jga`ُ$~Xh([9~f*s;/(A?a*C&;;K0B6%?Κ;Ut-5R|P}6礶 ZsA@Tb5LޅWV{ZBpb&1 j|*>$oư^w ?ݢ_bQ"Z ZNi{UtqMf[gI87Ŷ1/qs\X:)6xZg|JzΨ9TǬ5oSDdVY$r&IB^Ra+17WYz""_S(~ԊDmt̶Pf }6񵴗YtS좩]E_/lV^2æy0ۅ76݌$LHP`ه$ 3q"jK SiXK@##r=pѸ\7rH[UUvMBYRwKo1JcaBx;aw1Of\˖ssۘ&"cЧ&OX]#ℨ [nkdrAEո-@ێxHžk:2$ocSl)_CH!ZF#Ư!.jyCsbD5+ZAoOQ\Fmv͠XQʞo%=hLF(G ']%b<ՑD%e /bX-)~gյNF{ևK/[~};=c{&ͻdR 38ՇF@OKiE>Xmf[9%jP^ eFU_݋2A_ai -ޕ– wOHi|'c 5dQ ~p3.YkYféajEC AbuȂ[{˦ Y\p;fLӬE:XV?xp, #cWu Q>ܘ6aRDb^>jnܧ HΆ3ackМC$Ѭ N!CI%fLGj&qz5xBKR*aIKտ|l{LY2ˀYWĶ qN~2tU*H V#]*,صl7Wk84/skNjMGZTp.nȼgV#UKn/* Вę?(0]sPdH{hT}'/}[V8dytlcH"wYaOW4lꠀ a%T˽bR/Adt0ܟP j0㩛S;YqXL\%EfmF}E{Њ }Ł_WmhWGuˠ`1",m*󌏷KxlИuE@Ԯ#"`®V%lE_/ 4U0шaFih\U!GnGʄIl0.?\#Jv No&=4D=hOZ$W԰(`^ihF61}x+O`_ #B#yh /ˏZ _b'؉cBnJ`,XQv(d7hR'ݭÙ ﭧW [([DDqގU;4E(rzxK8RY$zƷMG:?J]pFCAUaఆgKF%RӅgk18rqoN]au-iNOGEa4dll/FX( =kE'tԗ>ԥ."XTxWJ?_y6%%, [8?ak{GndwL|Jw`hT³O1T I`%H,Yȓr]ia˜vsmhay*㳭8Β`Y@_;{ίU͒JnHb3Y?:Ufw9֖»@ǧ+*Mz-,yƻp|ag15r)"W~J9 h; k?rrK !CqgQ7kbN,yaF1kNosg__rN[/Vy'L CR =ڒt:0x~O>>1w7}x{adm͛ϚWSuG4$&;Ƣ\3 Ԭco'jeZes3 ?cv 2KK$$8bbQb*.nsn'~;dwZʇ Qjfk4+="u`$0%?v0ϤxU쎣689E@1IY#I1'/ُM"3Je;GN7o+5gi kڈ@S#wX*\:.5 %C>|9s?tO'W$WCݮu z`x^ORAM5\@k/1&^`8GS8T]|%BWF}<\%U6yw?x$RL)h57>^*J^OM_kv y+`NQYF?<>wmr)cA:vTw֍l2a:Hzsͬm53O*ECR kSF r MڐڜHmb^&koW>ȣxF _TXmU%e|O#|yH ?⿮})j9 0Rpv"LҐ2FDظuix;!}*b.nJ:jKlI7 |{P|oV ɼVRfKiXW#Jh\#9åWHWtNOF\J1A~>J8^KK'2ˤ)qY`KUk &&(ڒ留$>x(&19}Ecvi:'pCQ|yZfU6P̿`i;u{5n.۞d.f#0Q\<<^:N4.7>k.s ,Ly81q E-6._v/G5t89kܶWՈ3p2tW&3obu [g!'`EN%Ttmedv r,^Cؕ m{#(mxZnl\QMjOR+qR$7&<*S 1~N>jcKfu.d?\ {=%'%ҳ_Os+#M[(oT&Sg gmB)n1TšrIejR:rߚPgT8IrPɳSr/ ,F&37υT{0/z)3+my(-#otfoC4atVa1o1Q`Ҙָ# >$Oыlm3i 4 ZZaMOvNf@YN#vw͗\Ɓ sm TlTШ}aAqd/@ǧKQʯԅsI,d$}"W y.z,Ŝ R/m$iMMMCaF7;tfqeTxpz- 5i d{Ϲ;%J­E?^BWGgk&8WwzSXz t6Lz'LOɏӧG]6^01 B'9OJVAa] Jv3*Gg"||-S{`:E(6-'P%B:"“d5EejP>hVr_5P&7B$Xj~`xS=o"  ِF6nQ[@Y&7gTz xY2o=.z^+ |s5Pm no=5{+%:5wXV_"qBKb] 7K'GfH> p:kfA'ֵ>mNA؝Dk6/4 тLZcRV]U õWv.xi\8ڍ1ft0h2| p椮62M|.-/=" DdW;ubyLZT !1sPl:9S\ct1Ŏ_Tuh9`4^CQM6hG+[ba0(H5mr_oU &R 3aW;"َjrW,#\oy| ؟GzSߔloH-N뿟۱GĎ \u?3✚w>/2]x&_Z53T6,X3+D!|1Pka8A/$:3Ўd?.--3:ՄO &jG6fߪVL;mqS>1 V>>0Щe?[n x.!~uA{$𼴡Z<=.k>óU[Yq- .lWn:JpC@'oV{@/Xj!cP',2~`!p(W5TSI\23J@!+%4gy#lh&)+bTx!oþXlR,i16ܔۘOAw,kH n[vv惺7eFVZPtgOA+N>!:U'4*!@"ߒ0q=]C p+ '9>XM=[̷\a%R.Gʹ6+*mvڴҾmBs{qo̓I&BE̠N`Ϩw#6\~-x,M];?b}1|eh;;wW`Z5Rq}EzNF64Lo='iO.e"D?%#-97`"ކEmH8NaSA?FawFAp^9Ӡ+2}ĭz5*,uJ r&`#.ml>Dc!i㕨vƟc>YʏV.6}$w@ۧZE|i 󮘂E%e2* |̋e[ER/p̨CAJa% r|,[ !N&Ovb"JG\G\A5JK3mx?=\dn0*.?R9#z.^`xEr=i98XG kI;mOn0H8"6YΩڊcۭ2%%/;QPo7Dx\s+W?`:f'TV` p,5}#جJCt/{ÓHI)q|Yg; ^ fbO7hHQLVԷm͂6ZhF9!xǦpɏ&mʝ0}];HKtWju󣥱\Ե4>(~.cنyKӕ>͢$ (Ѽӊ%'/(iOq8{B2ѣ4UxasmV.sg}oe W[Bh/^*׾_ԃ0|r:C GVO=jWheB3q-E$"9ZD_"]C?OT=yp 9ZӍ/xIb? ~(9/B`Fg(+ɤ7Bz1rwFzNfu v:Sשּׁ/'7&Lx11IE탲[hv"kR хLQǽjiS/.S-MKNJq`K&]4^} h0Au'hOAnbAUWc"g'd`݋:eV9.GU\jVUۚimse_)`}C?ɗ" nFbފ7i&+͗`)/a8DGHJi.x?6ڪ6Ǔ?a8_A1|<~k1K;~Ej <q"NrJyuz}6 q"ʢy$g@zӉ>"dey/ǗFlf%VER ?(|75fI-NO1\W;HH'ɛb"{|do;;49 llh7|uՒ!pFɵ1LDt endstream endobj 53 0 obj << /Length1 1370 /Length2 5960 /Length3 0 /Length 6892 /Filter /FlateDecode >> stream xڍwT6R HK7 tw03 ] !-4()%!ݍ}[us]ZD%m(aH.^n @ ~nItma҇!p d `Ma @$7&ȁ=67 A8Lpo7=ߏ5+**;vA0@ 8V;:pk(H'7 wd؀R AC$w񃝡NuGފ@ ~+C )W buwo|+iӿ E(@ 6P_|ˮKiNPDz\ nex@ܒ rYm~ɌOPq@\|yohMcGކp7_w*h2#ۭ~_mͿϿ xAq&ա-gUT\˟0[z"_s}U?q)'Hќ, b92 KVA,qvAhlvS&hQ[$L\ wV\"VE7g脀. +ݺmDǸhdJGfꮫ5w*Cqd۷ޞ|Jp" be(H2(2'c](1G[iuiexE}gmF_CE)"W`|d}hF/jN~0(.5IҪSPbE,f촗oC!vv5!}Yw_,a!o.oqهW؁G[U,JLقdOhBS+B>1| 3^iAK c݇'EB/=${&Q%:(wDq"F4g]L21~by*WH 4:t8|-0B ja)-9'Vuj:0 @{<=- mE ݖJ6rJeCޖ7FcsC;۫MAU-gi@1 ELCӳВe # '%EIP?I{pC2bo7j9>B ]MbeFtsWc ?mO9uJКoD^):4$Fչݣ 9x)&UTǾi1 טmJrHƑH)z!%_B 2~Xrz]Z^|.̣8*oX!YI:4DF:ɢ85鵣v]E+ %r$s۱s(e3C$vol6 Gkч AI9*4Gv;?+$GvoK-$Y-^ayr+!@Yg)ǡ%,gAt\ZM~™ԴzgvQI0l72ʎ_9 LQ`gYS7޴Fwt~n0#7W&DX%/KRTH#P71v,3V\hj$\ۺd`8 XdM:$w*@^EWk'銳#], jL|1܋3iwcݹ7^݈n/Hn>}0Xy'A `?->P*t.WtPD:xX-dL.Z{|J Dr^x@ݻ@Pg ]h9sēSIa/ Id?A9[IP >=~fMk0#(3uVHw BGfo`3ZHڼ)͝۝R*c9kG{?LFOokw-qaKP_з fVd=џoK#3df½̭ eԜC ۂ.pjRUpY˻LXkP~+h;+ӱð<wE&\ǫ8{X͍pNX]ꛃW .s Ke6@FqO 5YH aQCs;N)v x8aN˕SdCЭuop,a2jL@GR+=_v7e2t=3h18P .Q̛dݲ:#cAN([ߦVV=>EN]ZyZL.dk*ƭٗ d:ep9xBr;֋p3V? O&-& |ga0$_/cY##Loz#< a~ɠ?IUD|GֱrwE "Y[7@f|,Lz2͜ߪP dΞ^hBOhggs$t8@6\AubTWj<,Ue_޴ͻ#p_ɂjͥ־3N*C&F:9Տދ:D-XW`/q.R.+DWzJR̾i}.zv:~P/F !-rMN *,P~ ߞ jV_ Yçb4%7h|}Z^O/=+ʊ٫O9XӕnegM^Э2KYTruÛ`T;e U"o6o)cSh4&l&"7%"a wã:mL*yloIkew͚XU@fù))o,].` gmc;uM) _0v! KҜ%G Z\ݯ7GJL|pu+!y]>KR,IyCUrUMӐm3[˲cV-CRJ V>Ԋ Dy>mtU >CH:\wX}s-#5{(^c+)RE;}two$P$$Zڶ膔E0Zq? 2⦓L8uRI1mg21oL)˴R|îrC+`2?,KDIlK-9.hq,ܩ}fjs˨{sS<*{۟:#AZ؏DrZ+nt$% 0Pe+4M+?qbdJѦhi#IXԹ> &CP8vI!Cu3\CVݷ.У&%B]ϓ'>‚^ &sFt':z\͵srKO̺o(J|m=I!Jt.e6 n"V'Gq*OR{8O`̚AYrVD0EW1lL'KVT,IJDlεQNx3etr 8z ;I9kyW++mC\+iy63b6 = ]졯{xlPǽ l+Kz|,G^c ԟ2.j8$hF$\8! d)/de[ o r! mp Ű\2PfŸ4,*8F|Y_WmdL|;+fVll]Wcb$*F/jdZ%̄j,*eHFoTl֙.6ƃ<@;zB~tPV A>/zMY@i.[>wW/ҳ+QȾ: 3𨟿$r bj`Dz0Tq_~0=T$r ޳7 }?@Li eb % :{&22JG{j:&_Q:>/` 5uP]̰q>`}ì֊*Hm#PjV;?M2/&~N6fXHJctFCMʻ,n(ZRD^H3_hI(NY3sa^=nq0FphOLZIL&5Rpv]3S+7a/~Mg%S?Q]);"J^(SJȺT0V HH}<ϗ4Mg@Z/:.{,n5ܘU ?4\0Pb{2# G::6 >[dbAN;zv#&]zU>ص> '^ HDJ~F`7 Ҫ!gC?ʏ׺B7ǭFLZ Go`2*NZ[*&O4J_3֢pؖp]cF+ ajƼcuXameđMAl]5v]2I?T6WTa!+kY7lH "|~1-fv֫̀.b9(&#> stream xڍP-šww/.- @ $Hp-Z(Pעť(^{wϼ7Iv:g3a}!c + <\ܢ9 =n7777/ AE!l S PusxEyD" Ak@b1\ v}ZxDD8L8] V (@;>hr¬ ``ÝD@.+Vt`w5M#oj\XL=;_] < V`SxxW'p@LYY@P/`q^*s=@+)8,lPwr8GcVZP+C\VOr0Ͽ jm k7'>V;Ʉ-`O+;y9ta~s<AlO?X> w0bXm!PT2mO~ϿWO A4448o,OWzZw->Djղ`{@]K\0?B7z)*Uۑß~ֿ?~#'@4 55`kzUࠧi>)?7_v"l[̛ ւBxa4dVO4tfUZ6^AtOH4`? rAaG? 돋U0@#!$Z<m@xBa?I@?Se?S-]* 'nO.es#(X¨.(M++JaAT/FVi<_:-^zh2 Q83aK֟1%gcx)0Iz?Ar:a D*5Ͱ=~;tW nr5d &pS![figcX/5M6Em;G<)@5C|y^\^~nUIfi ?w8aYQ)zpu9DZ::|9 ~V~'IqyoS+LvP7~qc=יظ}cŹEG#HCvڶ@p͕~nj}%-iDk^^&ـ"y >^ ҵe<΀,*Ι䯟sDKyq@/f0 dP 'W :1A:uh?PqpK\StP M'nCB#`g1"q;[S&U)#/a&0S0MirIxMqV zZn"(L Xmmm &8z3$x3C`ـ+_ rO7hD>j_KyrZ^2>khj|09(hKiy\1WO˿,ҜhD S\6h* tD c(3c7$.O{?\}ဗL|C1Wzf1dڶ7ŦKʹƣ9>j~yΌ:)m僧T_L<{ Xk[n(Li;/ߟOPX=K-ztHMkJ.'c؋*%")'#, ë,鏈|ُ#.%1kyi)Uh."ouI9DQ$37 :iy%A D_& c a2_/\n;,a g[D)X\44,mPp6g{g=s| hk8gO /e/ (~,<,o,\!O_b /[Ix6--ɓ. oFVҀ,p G7vdt ;͵Z  1fn$7urFiSCd2ɿX4ĥuǃBb[m(K$JO~l!VMp%0&ODWj t*Jd>+NJD3,Fj$>_.J/DMKX0їkj-.mN>XT~j"sDxT}hU}7;Weqݥ>T}jU0e/\Jc^Qe~k|#YT>bRŰH0 0'a]E!06Aī׍\Mj'pO&l%ŏWjx{ڑۦ Hf1+$|G. Ľ<7³Y=]]owW_,YOe̶ _~ JjR\_OyG0_y@?hkk 7Mgڮ~0nG[~2RZtNr`ӁmG _Tܩ@yk;/WNF+sd9T^Roާ,]#=7R!3"orn U琥iQM7GPw@5}=8q9 j{@CPfUYA槦#Ep;[FEt +PoDbry0KWYJRe}[hZo J^43dî=u;jF 7Xws۳kxͽ?ˆ8BOLm4QKhh\Cό|vqΖp,}(`44U' KsiC$Rd;qZH(z!=M["FQdIE B_@rVxA`#jS~q!{^X.b'9If3aI~/i_S c ?mT_!}T)k%-*XCYܳ[i0;bu>C'hMJoDMG'aKKh> 8ԙ$+5n~ήAEkR:mJ4E fݞe瘍>XCBT;O#&vP2rrͅ ̤0(8&B)Qbte%!}=y;)|ĒЋ帒ȞjoIsbk ˜LU\EY>yb߹-\@.~F̝XG)zzhblƫzt9|Ul,C|Fp r VeqN# 0k忚jHXݢVnzzVX\Hs)ʼnC*]efgt= mkxyO]E=TJFF"h?U~nQ9T4bO7L=j<'1?p,.k.m +]߇ ?^ 7}VE]_(3+;+=sTNo.ˡtLxeEQEla##POyrWͪ<$ݎkzH}"E;ъWD2?ҰNrTTr2ʘʺX2kp{F3cT%gK=Q&<ؔxP[JZ<|>躥tY/c*g{,E)vZ(@|feFJuVGFr#Jc>_:(g Yx6x&l>uz6=KoYP2C3F#"1Ef'^Qb WG@>i&&YYPRzΐȾe=#ieOY;Y_ Cca6~蕄/ÑOՑ. 9F"]O)'Rzf ZE=f8"~M;#@G2ܔ\QjϒޕS K63cU`{1kel*FJ3C[u6.+x>OR|S3Z hUz}[X>\Б5@/h22RY`aM4N񾜠b;~|7\KO5. rl2sSdV\k++U_w6ȉk)W25񫡅FATWrrq[)%!%i0jRYq or'W>r1 |KR:B.:b$^DFUYHT@:ŵӧ 2*J>D Mdbo=LV=ltX^]hЖ,LSӘ?VM$`@VYdcsT9lj~sp͢F CByZEtS* ?BE ܶ>Ov\8FD(x^dBw& g|QU=p\j &BYP9I'u}H9uu߳4{yO` .f6Ɩ[lɯ3J[!?vi3_nV~?tC 5m`,ο;zo)k'4Qeot0pޡX{t-3=R tC㰨ђ^  fySΙh0i1&n,C$RerPI-%-1~ ?%0gPF6#5y@,-f^ z/䎄abY4H6 Ȅ@GGθ&9Y?Gtk{ivA{!u MqxҖnwx 7pϋy ìEM]Ϛr d)Ud;k|{Wc:JۣA$o$ $$OG5rõƎJY]t.G9$ W[(6+TN( ji$POxcREWf©qɑ%󹬗}c\mYV<{٧-P7CDݰZ+uYb?PE%;"cb"k%s¯]4KbJ4Zj/`m%m^,iD*#\j+Y#gAk'b"Hs80~Ԃev,YyFC4'2h 凖>RByv_?P}[d yWb5W08E[!:d2BOm;aPᰋ!#9*ݘN}r!:Z;*-/_U90{saGR>otTzُ|ÐE; N1-mR$1)g9,`"=n)ͯY^_|LzCH-HEj ,N͛L)ﳑ:ߐs;ul+$bڑVF~JNJ.Jg̍3n 1A8.Mj~)%{!B/_c3RCOlLy/fF=wGSlCL2{h:E)"=1 zpx]-GƟ=9~J"XWJqˌgof* gfƲb2QCx0Vj֋~+պOZ Wf1N@ T?㳽.8/_Ya@3~ӱrg"{BOUPcgGi eƼa:Lךj4Iĝs:rlBYxN{yaȮ=L1qk2m\KO=7u&X?D/c'.'.lNWWt(q%u\{1_at.f9/璣jǼHO|5eMuU&f:_5H5E/{^${xs2i$vC֊8܂kEy ,Ft9UWdseg-:]䧦ъk?V,őEW+JWk1>;Ij>r )ת!{'yQJBFGt~ Q͓MK+}MM`jU9fU1-w+OkQg>Xq>IAb/2M=a{@?fU6Z+(g!<<4FIIJ1KXsrқ4 S!6 elM^NmfrV0@=եO_B!/^`_Y@1Es}*k6[V]?YWJS=Na?> 5`v~K?mJtK_)|o!`"͕u WoBU"IFwGj&Mf>U-#Z9ԭVˠYh!G6%ј&`ѓi&xc|c4(eQ-ir3X;cvW<iXQ1a7Q 5DZ糐7%27_B)o\҈9;_p{Ro *?HS|4`rGo?P7QP.^zP((\?] u'Eh9lӬYK,Xo]ס,HwS&gzLqQ{.$W D6UޛDm!11Y:'˓p̭+49,0QVrC$zd5EGwlnO endstream endobj 57 0 obj << /Length1 2430 /Length2 16966 /Length3 0 /Length 18392 /Filter /FlateDecode >> stream xڌT% lm۶vvkl۶11idIsϜߵgV(HDA@I3 #3/@LA] GAnl /Bd #&n&ȺX,,\Vff y&F,G!pr~ߏj3 _@Gk3;G3[?&yM>:1-inVUh2@#(VNa,L7MxP(#,z0co߆R613}7󰶳XXJ;߂&N7}Wk[7B7HL2;?'3Gk{g'F'k926Vf ;s1Ǐ@;g'[;wsm@nv^Ev0wgҰvpʈ-FC:8X@̊u{_L|A4>?p^N&@ ߌEp,,sk3g)72?=c0ۄl=b&5EyS)* rx1Xl\NNZQ6;iY< J {=hkK6@1g`6{yR7㿭H/>&m=x[Pmgq./WmD,m);\YZANֿ/ ޖz8_,G ;3%c8:x1M+ḿ 1x`rRNoI0A<&3Ib0IAl&?$yP<(l*A&?ͦ`RrljxAof.ot}xL=@; nid>},ŮV* גctMMfHH2q[#G'8ՙXCS"  {ޚ6mݲ.HnR +! *{5rO4"J(LqHah.ܑnngr^e|N؊t7X{U:BޠPz$b/zG ,f30{uˠa3:.Ht40h#f#-YIJ+  u3WpV\zws|#w|Z?9g "&CzǻCFyWY=wΡ 3$8¤4df.:tv}S0X^eIF˽ӼqTnGO77&ĩbSfqHi \9 #rud²+3' zQ,Zf}6kmI"WaZ׭*<(</1Q76I%~0wceocWI%42zU#'d&W`6˯6SY$pS7NПdPK:;۫c}&s-JqC Jy>RO>|ֻgLn>0=OF.I7EŌ*ҺlөN{uo$]*p|_Rz*Wbg6Ngp;AtONbpR[ `mτ3M;iNKŶ$:KL~kYFf/4TJfJʺ+v 6$yetgcg*ieү\1\~w*i'`usj ֍!~4GsfxGk=oB^QDec E}#%5WY|[ŋڞ0[rlujgχN_t4wn.7L8 x*)* \kF25d3Ck8~c-P0]hA|gc6Cu$eW8Jޣ8;UՌr"w5Ēhd!p4DwbM3d'+U&B-GԡdЖ,IOŋP FLgcIQ"Lu'_ m|;hFg+ŗP"Zf\3:`E̯lC1\D"H.!Ob\;1ϼئ#<ۍ颓D z 괧$Q3:`T MԐU05 "dCqw)/%]_ JYB8!\ YpSc2X&'`36]8(F@j3{vsc<h|ص&b66w]']T@mkW>Mcjč 5,?h"~@Mu+`u4]aw$=$@' ed%nLM?-r{HMeX;kFڄ2c ]q'֞e!DmKEr&#hK Wy'>F׭hRO ?0<| 8Q tt)6 vo4Z3v\At'@D99BYn3D @dQ;R3 C~ɘffg}8Yw*LEKlsFj^ޭLiB{kA,JACOwe/ 3nj-NPuKHSMJgUE@ߴV/r P#EOs%9@~Ї&mCS>VYsGzc l 5pμ <|ɓ{wwDKttU/5Bk槝5S"hSn@^h^ep7>Cʳ` +4 f"wYL86v[FqM ^ AsK;m>ެb1]mGhY%>E,P&M' 6RYo͘b5|`8h9N%>py8sרUMvlT4kfl ;9x[f$Q=5B|()@d+R"!] { :m:tQ q-Y=?<4ĦCRt Bs ?dLr>(J06ηk7B5ⳫbO7]l"!!BdSz)1ۼtMKÎ}2W/ vЊ1­Yu¦v%%U"J8\(>u#j~ A?!׎<f/}7pyU5 6\',Q?*ܟ&C}[_y߄IU'?d`&[i5QݔNo4,v@柢>! F154'65OP'"5ruTZJ , ZdJsȫU9`;P ё 5J5{'z%޷&cYp,kBٞ2?n|sy$FI.p穉0V`p<Ψx;\&vdt{1ޓ Y//࿑ށ=ot yyJnjAW#M,>}Š,1FUQuU@4[k۟oZ, "QJ<@׵k$3F45K*zG7C;NˁP y\a$?b`n9gdt!%J Y!N5.|4uo3oJ!ڸ#]0iA~On4m/8=N]Tg9X @~5e}M,هr@ V8&^61X>%QWR~?!&8^ cͷ>{v.Y~8E+9ztFLJɝo+bY}F=_)EGɎיq6PYf/'+Ԧ]_qIܖܶl/J$o(L$3$M<`7uBcwJC1v $@=\\=c/%HϲJug?TV>!h QD oK|47vd0&wQy9Ǎ52T̓[pJ4}~`K]\ɉh/mXɺv@BC]"Գf`R3Usځ@R.~`{vWL ?AMՓP} XR:nCM/KY1뽨u@m| ',w} 'Ud؝9_L/a^H?r)c7X?Z+>kǷ xW8=AW\m a m#>/πqg[PL6 g\\=]p<"F'љ :QZW_ @W6_ܬH2|4 |]TbknU=_κYj|mW;':\\Oyh]Ӈ['~6񍦩@ =yrY< el(vLI!X_$;c!_E33p9:v%9ʅՆ|ʹR9mk!hhn=E"6V2q ^%Nоb*:0B%C/BALةAGCǵseDIEF4d+ţ-OvNvufa)3./mw<0 p'^~Y6MB>:gN8 ո6y@0ͥ4a>c&pPT9ǽF6" &aȰ&96GėRDm :!z֙Eg૛^% ?>' !z?ޚ>1g:=[wZYTz @{  ?=8HnIZEn\q 5.Ph.ZGhO4@fmkט+z?+&<ɍ}-h;~ )ê[g*$(3:"J+״vc<'$`%Y)$ȣh_RJClj:8!IKs*[/ˌSid+;JUDcW*v;=lVyP5Kce%4)Aŕ'TؔKRro3Ln5 QUa?L |J_FAp/MS)o-R~N92 ɡ@~ *S^81B`×6.fbS$t*jbb듳8Z[yӔdH%:G] (tI*l' G1`3h_C,LPۜbJ68d9!@3~ߙ&|"gSj̵푡u"'^s"foB.>ʼn7ϛ?lDjЗ'`CK_D3 MFJgt;tW-2ޱd7yz!{VKڇ2ȫ WjMD" BZDT1*"+Kl+kgwxNx2X'IWaQ*;u0;|og~V&S}l0/z^ ]?#Alf-3v5GHukVjJ!s)qB^I8 H?H#e*}PBHHyst%9뭼2o h5]wr'dȋ*dcGX'HgP $c2g>KM`B&>qt},P2ϊhlJo u><BV}`;STOo _oRށ+~hs3UbyH#o;9=ZnD(P9pX~yz*WvSm=)8N[1>HyZe>U{ۙY;wҷC|ģd#S v%hX QS1uM= GfXf wV5 ]虙3!KkDGx.ְ.'EWJ@tQ֣,ƹUר@|!w'5 ( yba/j ڽH/[ٙ[W_Uw|4/ @=şQn5ONȌ5> Wc()kr\M)e _7mgY}kc|)Z`-M((pl_&B~Zr<*zt9<UAWN *V^]p=x yCQ5G~?el}ܮQf8i3i" C٥'_GJb)pepjfPr{)|㮻iG襁R78P H!nKơ"*µńf z93ȍieД #6%/8_Û,~7PQ&G4C43e 4EZcTD#v [Q{ڒ_Y0$ُ`=4E l@,qg#iQáVk +UnZ􅃀Q6--"BjՅM@;=6.`DC)e?^ ͍rX>My' ۗ!X+En=ao˶Wc+XS &^sڢ">q p}t&QƧGX3E҈K2o,2SOю 0,DfggΩrW-w&Uhgg0*dh4ؐ*^ h6Au/owc\|D1Hy'Ϲb,ݗ [mgD-(Z0޻xR; J|G| #]"Nbl.ۛd[򸩞yzu{^()O!4ۊ﫷CI\,\V& D-[;Ѷ>Srm:(q;$`|4Z䎡l7\~v;+}o"n`<_LBpݑAOURGl"4  )Ccބڬ %\-جC=!j1J)e8ґ3:&/1PH4/d "{JTTOӌ*ߦHylEIABҽRoe$k%x'"Cr(fѼ୷x |FGKkfIm^4fIrɎ S7l}훰NAuS-bC᣿lMzb 5dbĔ Ԩٕ㶶fUx\K;eJ539D>!XNd־OY{}2|^U 栩bQ`@Ak@0zZ:i7h c)()H5::7Љu~, (rIŗo+*8㷯k r@C+.#' )BД>RڤT|}'{$1 ojŃ_-|ok=~=)MɵXEf?o8D>vJtbg.ܫT;R C\|-^79t5N8^$͉-}/Sl:Ob^FpRkP`i L I&bhh,<:F$ԹoqcOk xjň7cGd^8CuԹ?%~I6W Zmo3Y$#_ɏ B^ggd v?HtwRj> ܬ(gzG~7ͼ~Lо:z:ik3b6FI}?ǜdWh=euOPmIΝ t ri> $CrEm0&a7B{IS;/lQ0 cbw)E0¨68ŕ| UyUpvHƾbE`HrpGG&89 \2¸{sܩH]-$W5,zWYXH:ؼH^δDPV\SGJ^#nXp#fPl4/vϜ~H ub㍡Xl?5z<{4,վrUMA]HCKStGRn#./0O@_(CыsQmm_ Ծ@\#WwJ6pg!n3>YIܳeeQCq,;mr»Zڴ]xSPBaf+,Wȧ3MIF'-O-񋘪:_z2$S~x3]#Z!Ӈ\ϐ6|B /zٲLP#JhZsVF՛Z$ηG05=)253v<3#0Emk4!,T }SLN*G;r[\:J(iu R rE~FYnfK4Y^8 X4[y->W:"KyDfZQf$aF !kΕ/F-7ӼVH} +χ^-]ݧ1W*v~̨mGlaJ53mlNEE\[ | /˺櫪(}A%hyi_Bs+|E|uY J/-nۦ J{6A(SѼ>e﬑%X9'އizeQ.iw?pDԗo@ygU Ob^6t_+r=QPіO[udYMö3ycA%&_D g}gbSlGGSWzؽO0+}#{+ZCMNa2ۛN9iϛAL*3ϭLgGDg+ W= !,)8qo4vh!kVmDʓdr(gwČm,Zg9vq/hwk]jԕQz&B&,!/IFmwӥƽv 8>U v VSW)3m N y›dR"iNx]SahU{~ٝc;O,5CD^hnXnAFBCaίwUJ[I{S@r[Bj&fZ8Qc`R~A=i÷4>9l/ϊGCVwvӶ[ݢ}d,Ty<ZSCM V'+֙(h*[ᛲO\;9 [KW:k bBE5g)>4 eJ}"HEWLe"q}fM; b)ŏٯUD{ IjCyb*nxgX"D>B*%읨*cR.t,\;7tpt>4AN"9CEt2AaQs^WֈqIb f-Ik i jgUFlTl\NWq ?֔] Op[u go?be_Y0pN9# ڭ~r)@*կF-3_+<]b)p#hXG$'6YLTp f:`zϽBR]SדX聰& NL|sg_\jTdsݫpF[~sx?״]]e!ݏ0,(V7irBBNd !# 9Q'+Nt{,':x>*JKivM2F",LO3;;Q4v|v.]Rӭ)7Aڵ1K6Ʈ:&fAۥ<^s~xZSb'V9ca?'$ +T)=R\=o@REFKaSEz\`,8vJF_b I^l">yTK. O;79$ϓtK0fR:Rg\SS^Tl8O 5:zޱ_}ż˂.UI8ǂ=:IGeYnWɁɴkM^&u4АВjgIN05X"UA0^INtK2I=v0*5"*wO{..qۄ'E׬tM P?!YoMr57o)K0S Um-|ūi=>qn9迄波NQw/K+BϤwfVs?4vzifO lWbX ApQlJr^W~k;:`YfUyܜAq}! !Z5 C 6dMc@*Y0̅[؆6{!Z~|qjs 1X?n/?C |POOO&xMfkB ^;]ŀxeŗ̻E`U~ @kcUcPݏaq&IAM[J9wj|pO֯h+9^}J* hQlצc ,>PjrvDخfGZ<0*#+kűrV})ZWA\`eQ6Ȏ 2[[$=V+[~ѐC߮Y60bpKnG:NѴ|!Qł[ Gm lQYxVLosW-O.S⢫4e9Q4#wNn/8Np?}-cEdSe>f1/&;Yyf 9waVʮ1Ol'ŃhQׂS BRD B L o󕾸5b ͋a/fխ.iSA,IMk#08o0gwt"b[ 8=:^Tx}W ,bej|!^i5g z[PCp:P?5ٻƎKq&Yf(e>1ȴRa|z FLJ7zSxD}U,A}2o/1ʶFf酎:$]ͱAg[wN܇|wQWzGž8&|Bnsw pT__ 9>8W8"+f )>e43ƅ@F=7u rBk.{8εi[%ɸqX*k 9W 0/U#>m܄{I٨$Nuϛ*ePe: :v-RW)0汃bZJ8v !LۧQ'@ !LJzGAn7;t .bP6@cR42hg2)7죢T~_H/m9:$Ke1"ElTR0kk/JXN]xFBF?DF*ei':{mjHI"":`W <)Β*b';shp!ѡmmܩ'^` 6) N\@TwiuEpk0UJc-(j;"9^P맔eJjV -4L̺r|o0<}TߪY:7AdmH"̻<>*:DޮOz~#ҲfWD,3]}yyGe-R㶿J49n('3WHd{:I!u齍;Y-D<#^huvDxSjɍ'`+>![.;:z^4ȶ:f/?њ)϶̱a %ji I&&E*S;YUEqgɟ/$ܐ-9a5 (i1@;X(i;;1]~ņHC pN"FOU3f;)r3 TS |)vq?`P} t3_>{ow -!Gh>U[38سG"5f A1X}sf`O:ncȎSNqDdkF!vc6K)u1=x;VO|o'{o3C, T.eMyMӣTsB`霔Rvz+0g7K x|S"|Y8QqmĀe.m^uKsuɡɐG fϼpОQ,_Y9!Dܽ☥{|;:er/E^_R92&NR: ͏^&䡝*r^ "Y|A "985@gЏ=_@{Ҝw'IV3g nGpm){]"}t*c*PBclƲb̳m|%k~@`ZgsZ $` &kq7Ŷkd ьj$*xXoE壳~#[<,Z ez~HM*҉XQXl.Nή 43$u=F-3:RlǖÍ#'4zU G-\2ko׮KﺧI͞dnd" 6v Q1] Y3 , endstream endobj 60 0 obj << /Producer (pdfTeX-1.40.18) /Creator (TeX) /CreationDate (D:20171118182012+01'00') /ModDate (D:20171118182012+01'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.18 (TeX Live 2017/Debian) kpathsea version 6.2.3) >> endobj 14 0 obj << /Type /ObjStm /N 43 /First 324 /Length 2595 /Filter /FlateDecode >> stream xZYSI~ׯذ"c36, & mЎ@XG6>M)eWeUg,#"G0Z%\,|''",+-p$`%jaJilpaO 4rǢ$`PHx2$(]9U mQ茐 ̀4ȗQ;BW_}ڝu><qk7xژ[c`k~7om(b610m h@30A!vC=*m-j=ԽczZ2d (0X(-iCFB|q* s_=M33[#jLtpGeA+#-Md6@!It*/L۰1g<=4Ijٵm6&AVOޫeϱJBX[tz1VV~&[c#:vpl@9hWZ+?sKp b`əL}ng%FI!S9NT&;4 ˏ2,vfa J+@>rTn@ȑ! ֚Ô1*,yh]T?ŨPf5@Cqg;Wᨘ;O#ʶ"sDeD,WKϦ>ٶiNI$ynp4ZN>6htpߢ3y\{轊'Ӹ3VT c Y+e)yk-~G}1cL)|/DJD!(B)|6p{opSךrm_lNJMKLp;n~>,UK쐻c:& J L |g1BPcGU3`t)Nhg2ph 7{&y psBBQe̸@NOO-Bg8Lp4<`Xh5P.5~bSa#D-Ȯ<'T,lfK\ UhhH|,g r{Zz9^'=vӛ~~p{{|]16UhZnf58v8ovw8}TFȅ^. 9x%ܗcz"ߐW?_Ϧ5WWtGKpǏcI>e巛UzLf$?-Iyu5f37 r9**/KY~_7M|7W䣞4,8..Gk_ 9+A3| ~Rb{{-=-hF ^WMz9u÷^el&Z`^+ƘuOFT}l\- >FgV ʠBst|:f7 酼U#HnzN9z3YD=0)i'Ϧ˶f]` vKXf, K|GN󽽽xMƸ.OKAmlP1ʱ5ң֟_۟J/jrҊ`Iv#hcȳ{ue5_AZ}ޕ{saA8 V|6Wݷ5wƳ-/zT7[p]p#t6~ܑchIOi%ͅwѡ+Ӳmewbٷ t n7ݹ oG[\IGxkб@.@wmGpKmّٝxӹw;VGxutu[8~c?X(j|>gga',:b~_0}NC endstream endobj 61 0 obj << /Type /XRef /Index [0 62] /Size 62 /W [1 3 1] /Root 59 0 R /Info 60 0 R /ID [ ] /Length 185 /Filter /FlateDecode >> stream x%9naѪ݃ fLf!HC%s. 8 3@9!FՓ<}R7;T]EW4EKEGG7@$#ۥDmDFpbȉቼx((2u Z=3wz!+|kU_1 o'7I7w;+C; endstream endobj startxref 152264 %%EOF tm/inst/texts/0000755000175100001440000000000012213264557013043 5ustar hornikuserstm/inst/texts/crude/0000755000175100001440000000000012213264556014144 5ustar hornikuserstm/inst/texts/crude/reut-00004.xml0000644000175100001440000000151512074065306016305 0ustar hornikusers 26-FEB-1987 18:18:00.84 crude canada Y E f0308 reute u f BC-TEXACO-CANADA-<TXC>-L 02-26 0064 TEXACO CANADA <TXC> LOWERS CRUDE POSTINGS NEW YORK, Feb 26 - Texaco Canada said it lowered the contract price it will pay for crude oil 64 Canadian cts a barrel, effective today. The decrease brings the company's posted price for the benchmark grade, Edmonton/Swann Hills Light Sweet, to 22.26 Canadian dlrs a bbl. Texaco Canada last changed its crude oil postings on Feb 19. Reuter tm/inst/texts/crude/reut-00009.xml0000644000175100001440000000270012074065306016307 0ustar hornikusers 1-MAR-1987 05:27:27.17 crude bahrain saudi-arabia opec RM f0401 reute u f BC-SAUDI-RIYAL-DEPOSIT-R 03-01 0108 SAUDI RIYAL DEPOSIT RATES REMAIN FIRM BAHRAIN, March 1 - Saudi riyal interbank deposits were steady at yesterday's higher levels in a quiet market. Traders said they were reluctant to take out new positions amidst uncertainty over whether OPEC will succeed in halting the current decline in oil prices. Oil industry sources said yesterday several Gulf Arab producers had had difficulty selling oil at official OPEC prices but Kuwait has said there are no plans for an emergency meeting of the 13-member organisation. A traditional Sunday lull in trading due to the European weekend also contributed to the lack of market activity. Spot-next and one-week rates were put at 6-1/4, 5-3/4 pct after quotes ranging between seven, six yesterday. One, three, and six-month deposits were quoted unchanged at 6-5/8, 3/8, 7-1/8, 6-7/8 and 7-3/8, 1/8 pct respectively. The spot riyal was quietly firmer at 3.7495/98 to the dollar after quotes of 3.7500/03 yesterday. REUTER tm/inst/texts/crude/reut-00008.xml0000644000175100001440000000654412074065306016320 0ustar hornikusers 1-MAR-1987 03:39:14.63 crude indonesia usa worldbank RM f0379 reute u f BC-INDONESIA-SEEN-AT-CRO 03-01 0107 INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE By Jeremy Clift, Reuters JAKARTA, March 1 - Indonesia appears to be nearing a political crossroads over measures to deregulate its protected economy, the U.S. Embassy says in a new report. To counter falling oil revenues, the government has launched a series of measures over the past nine months to boost exports outside the oil sector and attract new investment. Indonesia, the only Asian member of OPEC and a leading primary commodity producer, has been severely hit by last year"s fall in world oil prices, which forced it to devalue its currency by 31 pct in September. But the U.S. Embassy report says President Suharto"s government appears to be divided over what direction to lead the economy. "(It) appears to be nearing a crossroads with regard to deregulation, both as it pertains to investments and imports," the report says. It primarily assesses Indonesia"s agricultural sector, but also reviews the country"s general economic performance. It says that while many government officials and advisers are recommending further relaxation, "there are equally strong pressures being exerted to halt all such moves." "This group strongly favours an import substitution economy," the report says. Indonesia"s economic changes have been welcomed by the World Bank and international bankers as steps in the right direction, though they say crucial areas of the economy like plastics and steel remain highly protected, and virtual monopolies. Three sets of measures have been announced since last May, which broadened areas for foreign investment, reduced trade restrictions and liberalised imports. The report says Indonesia"s economic growth in calendar 1986 was probably about zero, and the economy may even have contracted a bit. "This is the lowest rate of growth since the mid-1960s," the report notes. Indonesia, the largest country in South-East Asia with a population of 168 million, is facing general elections in April. But the report hold out little hope for swift improvement in the economic outlook. "For 1987 early indications point to a slightly positive growth rate not exceeding one pct. Economic activity continues to suffer due to the sharp fall in export earnings from the petroleum industry." "Growth in the non-oil sector is low because of weak domestic demand coupled with excessive plant capacity, real declines in construction and trade, and a reduced level of growth in agriculture," the report states. Bankers say continuation of present economic reforms is crucial for the government to get the international lending its needs. A new World Bank loan of 300 mln dlrs last month in balance of payments support was given partly to help the government maintain the momentum of reform, the Bank said. REUTER tm/inst/texts/crude/reut-00014.xml0000644000175100001440000000227212074065306016307 0ustar hornikusers 2-MAR-1987 07:43:22.81 crude saudi-arabia bahrain hisham-nazer opec F f0161 reute r f AM-OIL-SAUDI 03-02 0114 SAUDI ARABIA REITERATES COMMITMENT TO OPEC ACCORD BAHRAIN, March 2 - Saudi Arabian Oil Minister Hisham Nazer reiterated the kingdom's commitment to last December's OPEC accord to boost world oil prices and stabilize the market, the official Saudi Press Agency SPA said. Asked by the agency about the recent fall in free market oil prices, Nazer said Saudi Arabia "is fully adhering by the ... accord and it will never sell its oil at prices below the pronounced prices under any circumstance." Saudi Arabia was a main architect of December pact under which OPEC agreed to cut its total oil output ceiling by 7.25 pct and return to fixed prices of around 18 dollars a barrel. Reuter tm/inst/texts/crude/reut-00001.xml0000644000175100001440000000200512074065306016275 0ustar hornikusers 26-FEB-1987 17:00:56.04 crude usa Y f0119 reute u f BC-DIAMOND-SHAMROCK-(DIA 02-26 0097 DIAMOND SHAMROCK (DIA) CUTS CRUDE PRICES NEW YORK, FEB 26 - Diamond Shamrock Corp said that effective today it had cut its contract prices for crude oil by 1.50 dlrs a barrel. The reduction brings its posted price for West Texas Intermediate to 16.00 dlrs a barrel, the copany said. "The price reduction today was made in the light of falling oil product prices and a weak crude oil market," a company spokeswoman said. Diamond is the latest in a line of U.S. oil companies that have cut its contract, or posted, prices over the last two days citing weak oil markets. Reuter tm/inst/texts/crude/reut-00022.xml0000644000175100001440000000453612074065306016313 0ustar hornikusers 2-MAR-1987 14:38:34.72 crude usa nymex Y f0753 reute r f BC-NYMEX-WILL-EXPAND-OFF 03-02 0103 NYMEX WILL EXPAND OFF-HOUR TRADING APRIL ONE By BERNICE NAPACH, Reuters NEW YORK, March 2 - The New York Mercantile Exchange set April one for the debut of a new procedure in the energy complex that will increase the use of energy futures worldwide. On April one, NYMEX will allow oil traders that do not hold a futures position to initiate, after the exchange closes, a transaction that can subsequently be hedged in the futures market, according to an exchange spokeswoman. "This will change the way oil is transacted in the real world," said said Thomas McKiernan, McKiernan and Co chairman. Foreign traders will be able to hedge trades against NYMEX prices before the exchange opens and negotiate prices at a differential to NYMEX prices, McKiernan explained. The expanded program "will serve the industry because the oil market does not close when NYMEX does," said Frank Capozza, secretary of Century Resources Inc. The rule change, which has already taken effect for platinum futures on NYMEX, is expected to increase the open interest and liquidity in U.S. energy futures, according to traders and analysts. Currently, at least one trader in this transaction, called an exchange for physical or EFP, must hold a futures position before entering into the transaction. Under the new arrangement, neither party has to hold a futures position before entering into an EFP and one or both parties can offset their cash transaction with a futures contract the next day, according to exchange officials. When NYMEX announced its proposed rule change in December, NYMEX President Rosemary McFadden, said, "Expansion of the EFP provision will add to globalization of the energy markets by providing for, in effect, 24-hour trading." The Commodity Futures Trading Commission approved the rule change in February, according to a CFTC spokeswoman. Reuter tm/inst/texts/crude/reut-00007.xml0000644000175100001440000000647212074065306016317 0ustar hornikusers 1-MAR-1987 03:25:46.85 crude kuwait ecuador opec RM f0374 reute b f BC-KUWAIT-SAYS-NO-PLANS 03-01 0091 KUWAIT SAYS NO PLANS FOR EMERGENCY OPEC TALKS KUWAIT, March 1 - Kuwait"s Oil Minister, in remarks published today, said there were no plans for an emergency OPEC meeting to review oil policies after recent weakness in world oil prices. Sheikh Ali al-Khalifa al-Sabah was quoted by the local daily al-Qabas as saying: "None of the OPEC members has asked for such a meeting." He denied Kuwait was pumping above its quota of 948,000 barrels of crude daily (bpd) set under self-imposed production limits of the 13-nation organisation. Traders and analysts in international oil markets estimate OPEC is producing up to one mln bpd above a ceiling of 15.8 mln bpd agreed in Geneva last December. They named Kuwait and the United Arab Emirates, along with the much smaller producer Ecuador, among those producing above quota. Kuwait, they said, was pumping 1.2 mln bpd. "This rumour is baseless. It is based on reports which said Kuwait has the ability to exceed its share. They suppose that because Kuwait has the ability, it will do so," the minister said. Sheikh Ali has said before that Kuwait had the ability to produce up to 4.0 mln bpd. "If we can sell more than our quota at official prices, while some countries are suffering difficulties marketing their share, it means we in Kuwait are unusually clever," he said. He was referring apparently to the Gulf state of qatar, which industry sources said was selling less than 180,000 bpd of its 285,000 bpd quota, because buyers were resisting official prices restored by OPEC last month pegged to a marker of 18 dlrs per barrel. Prices in New York last week dropped to their lowest levels this year and almost three dollars below a three-month high of 19 dollars a barrel. Sheikh Ali also delivered "a challenge to any international oil company that declared Kuwait sold below official prices." Because it was charging its official price, of 16.67 dlrs a barrel, it had lost custom, he said but did not elaborate. However, Kuwait had guaranteed markets for its oil because of its local and international refining facilities and its own distribution network abroad, he added. He reaffirmed that the planned meeting March 7 of OPEC"s differentials committee has been postponed until the start of April at the request of certain of the body"s members. Ecuador"s deputy energy minister Fernando Santos Alvite said last Wednesday his debt-burdened country wanted OPEC to assign a lower official price for its crude, and was to seek this at talks this month of opec"s pricing committee. Referring to pressure by oil companies on OPEC members, in apparent reference to difficulties faced by Qatar, he said: "We expected such pressure. It will continue through March and April." But he expected the situation would later improve. REUTER tm/inst/texts/crude/reut-00002.xml0000644000175100001440000000634512074065306016311 0ustar hornikusers 26-FEB-1987 17:34:11.89 crude usa opec Y f0189 reute r f BC-/OPEC-MAY-HAVE-TO-MEE 02-26 0105 OPEC MAY HAVE TO MEET TO FIRM PRICES - ANALYSTS BY TED D'AFFLISIO, Reuters NEW YORK, Feb 26 - OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices, oil industry analysts said. "The movement to higher oil prices was never to be as easy as OPEC thought. They may need an emergency meeting to sort out the problems," said Daniel Yergin, director of Cambridge Energy Research Associates, CERA. Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets. "OPEC's problem is not a price problem but a production issue and must be addressed in that way," said Paul Mlotok, oil analyst with Salomon Brothers Inc. He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices. But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December. "OPEC has to learn that in a buyers market you cannot have deemed quotas, fixed prices and set differentials," said the regional manager for one of the major oil companies who spoke on condition that he not be named. "The market is now trying to teach them that lesson again," he added. David T. Mizrahi, editor of Mideast reports, expects OPEC to meet before June, although not immediately. However, he is not optimistic that OPEC can address its principal problems. "They will not meet now as they try to take advantage of the winter demand to sell their oil, but in late March and April when demand slackens," Mizrahi said. But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd." Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output. "OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then," said Dillard Spriggs of Petroleum Analysis Ltd in New York. But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production. "Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production," he told Reuters in a telephone interview. Reuter tm/inst/texts/crude/reut-00023.xml0000644000175100001440000000156512074065306016313 0ustar hornikusers 2-MAR-1987 14:49:06.33 crude nat-gas argentina Y f0783 reute u f BC-ARGENTINE-OIL-PRODUCT 03-02 0071 ARGENTINE OIL PRODUCTION DOWN IN JANUARY 1987 BUENOS AIRES, March 2 - Argentine crude oil production was down 10.8 pct in January 1987 to 12.32 mln barrels, from 13.81 mln barrels in January 1986, Yacimientos Petroliferos Fiscales said. January 1987 natural gas output totalled 1.15 billion cubic metrers, 3.6 pct higher than 1.11 billion cubic metres produced in January 1986, Yacimientos Petroliferos Fiscales added. Reuter tm/inst/texts/crude/reut-00016.xml0000644000175100001440000000216612074065306016313 0ustar hornikusers 2-MAR-1987 08:25:42.14 crude ship usa Y F f0300 reute u f BC-PHILADELPHIA-PORT-CLO 03-02 0115 PHILADELPHIA PORT CLOSED BY TANKER CRASH PHILADELPHIA, March 2 - The port of Philadelphia was closed when a Cypriot oil tanker, Seapride II, ran aground after hitting a 200-foot tower supporting power lines across the river, a Coast Guard spokesman said. He said there was no oil spill but the ship is lodged on rocks opposite the Hope Creek nuclear power plant in New Jersey. He said the port would be closed until today when they hoped to refloat the ship on the high tide. After delivering oil to a refinery in Paulsboro, New Jersey, the ship apparently lost its steering and hit the power transmission line carrying power from the nuclear plant to the state of Delaware. Reuter tm/inst/texts/crude/reut-00005.xml0000644000175100001440000000157412074065306016313 0ustar hornikusers 26-FEB-1987 18:21:01.50 crude usa Y f0313 reute u f BC-MARATHON-PETROLEUM-RE 02-26 0075 MARATHON PETROLEUM REDUCES CRUDE POSTINGS NEW YORK, Feb 26 - Marathon Petroleum Co said it reduced the contract price it will pay for all grades of crude oil one dlr a barrel, effective today. The decrease brings Marathon's posted price for both West Texas Intermediate and West Texas Sour to 16.50 dlrs a bbl. The South Louisiana Sweet grade of crude was reduced to 16.85 dlrs a bbl. The company last changed its crude postings on Jan 12. Reuter tm/inst/texts/crude/reut-00011.xml0000644000175100001440000000521412074065306016303 0ustar hornikusers 1-MAR-1987 18:31:44.74 crude bahrain saudi-arabia hisham-nazer opec RM f0427 reute b f BC-SAUDI-ARABIA-REITERAT 03-01 0084 SAUDI ARABIA REITERATES COMMITMENT TO OPEC PACT BAHRAIN, March 1 - Saudi Arabian Oil Minister Hisham Nazer reiterated the kingdom's commitment to last December's OPEC accord to boost world oil prices and stabilise the market, the official Saudi Press Agency SPA said. Asked by the agency about the recent fall in free market oil prices, Nazer said Saudi Arabia "is fully adhering by the ... Accord and it will never sell its oil at prices below the pronounced prices under any circumstance." Nazer, quoted by SPA, said recent pressure on free market prices "may be because of the end of the (northern hemisphere) winter season and the glut in the market." Saudi Arabia was a main architect of the December accord, under which OPEC agreed to lower its total output ceiling by 7.25 pct to 15.8 mln barrels per day (bpd) and return to fixed prices of around 18 dlrs a barrel. The agreement followed a year of turmoil on oil markets, which saw prices slump briefly to under 10 dlrs a barrel in mid-1986 from about 30 dlrs in late 1985. Free market prices are currently just over 16 dlrs. Nazer was quoted by the SPA as saying Saudi Arabia's adherence to the accord was shown clearly in the oil market. He said contacts among members of OPEC showed they all wanted to stick to the accord. In Jamaica, OPEC President Rilwanu Lukman, who is also Nigerian Oil Minister, said the group planned to stick with the pricing agreement. "We are aware of the negative forces trying to manipulate the operations of the market, but we are satisfied that the fundamentals exist for stable market conditions," he said. Kuwait's Oil Minister, Sheikh Ali al-Khalifa al-Sabah, said in remarks published in the emirate's daily Al-Qabas there were no plans for an emergency OPEC meeting to review prices. Traders and analysts in international oil markets estimate OPEC is producing up to one mln bpd above the 15.8 mln ceiling. They named Kuwait and the United Arab Emirates, along with the much smaller producer Ecuador, among those producing above quota. Sheikh Ali denied that Kuwait was over-producing. REUTER tm/inst/texts/crude/reut-00015.xml0000644000175100001440000000213712074065306016310 0ustar hornikusers 2-MAR-1987 07:43:41.57 crude kuwait opec V f0163 reute r f BC-OIL-KUWAIT 03-02 0109 KUWAIT MINISTER SAYS NO EMERGENCY OPEC TALKS SET KUWAIT, March 2 - Kuwait's oil minister said in a newspaper interview that there were no plans for an emergency OPEC meeting after the recent weakness in world oil prices. Sheikh Ali al-Khalifa al-Sabah was quoted by the local daily al-Qabas as saying that "none of the OPEC members has asked for such a meeting." He also denied that Kuwait was pumping above its OPEC quota of 948,000 barrels of crude daily (bpd). Crude oil prices fell sharply last week as international oil traders and analysts estimated the 13-nation OPEC was pumping up to one million bpd over its self-imposed limits. Reuter tm/inst/texts/crude/reut-00010.xml0000644000175100001440000000511712074065306016304 0ustar hornikusers 1-MAR-1987 08:22:30.94 crude qatar RM f0413 reute u f BC-QATAR-UNVEILS-BUDGET 03-01 0111 QATAR UNVEILS BUDGET FOR FISCAL 1987/88 DOHA, March 1 - The Gulf oil state of Qatar, recovering slightly from last year's decline in world oil prices, announced its first budget since early 1985 and projected a deficit of 5.472 billion riyals. The deficit compared with a shortfall of 7.3 billion riyals in the last published budget for 1985/86. In a statement outlining the budget for the fiscal year 1987/88 beginning today, Finance and Petroleum Minister Sheikh Abdul-Aziz bin Khalifa al-Thani said the government expected to spend 12.217 billion riyals in the period. Projected expenditure in the 1985/86 budget had been 15.6 billion riyals. Sheikh Abdul-Aziz said government revenue would be about 6.745 billion riyals, down by about 30 pct on the 1985/86 projected revenue of 9.7 billion. The government failed to publish a 1986/87 budget due to uncertainty surrounding oil revenues. Sheikh Abdul-Aziz said that during that year the government decided to limit recurrent expenditure each month to one-twelfth of the previous fiscal year's allocations minus 15 pct. He urged heads of government departments and public institutions to help the government rationalise expenditure. He did not say how the 1987/88 budget shortfall would be covered. Sheikh Abdul-Aziz said plans to limit expenditure in 1986/87 had been taken in order to relieve the burden placed on the country's foreign reserves. He added in 1987/88 some 2.766 billion riyals had been allocated for major projects including housing and public buildings, social services, health, education, transport and communications, electricity and water, industry and agriculture. No figure was revealed for expenditure on defence and security. There was also no projection for oil revenue. Qatar, an OPEC member, has an output ceiling of 285,000 barrels per day. Sheikh Abdul-Aziz said: "Our expectations of positive signs regarding (oil) price trends, foremost among them OPEC's determination to shoulder its responsibilites and protect its wealth, have helped us make reasonable estimates for the coming year's revenue on the basis of our assigned quota." REUTER tm/inst/texts/crude/reut-00012.xml0000644000175100001440000000543412074065306016310 0ustar hornikusers 2-MAR-1987 01:05:49.72 crude saudi-arabia uae opec RM f0600 reute b f BC-SAUDI-FEBRUARY-CRUDE 03-02 0095 SAUDI FEBRUARY CRUDE OUTPUT PUT AT 3.5 MLN BPD ABU DHABI, March 2 - Saudi crude oil output last month fell to an average of 3.5 mln barrels per day (bpd) from 3.8 mln bpd in January, Gulf oil sources said. They said exports from the Ras Tanurah and Ju'aymah terminals in the Gulf fell to an average 1.9 mln bpd last month from 2.2 mln in January because of lower liftings by some customers. But the drop was much smaller than expected after Gulf exports rallied in the fourth week of February to 2.5 mln bpd from 1.2 mln in the third week, the sources said. The production figures include neutral zone output but not sales from floating storage, which are generally considered part of a country's output for Opec purposes. Saudi Arabia has an Opec quota of 4.133 mln bpd under a production restraint scheme approved by the 13-nation group last December to back new official oil prices averaging 18 dlrs a barrel. The sources said the two-fold jump in exports last week appeared to be the result of buyers rushing to lift February entitlements before the month-end. Last week's high export levels appeared to show continued support for official Opec prices from Saudi Arabia's main crude customers, the four ex-partners of Aramco, the sources said. The four -- Exxon Corp <XON>, Mobil Corp <MOB>, Texaco Inc <TX> and Chevron Corp <CHV> -- signed a long-term agreement last month to buy Saudi crude for 17.52 dlrs a barrel. However the sources said the real test of Saudi Arabia's ability to sell crude at official prices in a weak market will come this month, when demand for petroleum products traditionally tapers off. Spot prices have fallen in recent weeks to more than one dlr below Opec levels. Saudi Arabian oil minister Hisham Nazer yesterday reiterated the kingdom's commitment to the December OPEC accord and said it would never sell below official prices. The sources said total Saudi refinery throughput fell slightly in February to an average 1.1 mln bpd from 1.2 mln in January because of cuts at the Yanbu and Jubail export refineries. They put crude oil exports through Yanbu at 100,000 bpd last month, compared to zero in January, while throughput at Bahrain's refinery and neutral zone production remained steady at around 200,000 bpd each. REUTER tm/inst/texts/crude/reut-00006.xml0000644000175100001440000000204712074065306016310 0ustar hornikusers 26-FEB-1987 19:00:57.33 crude usa F Y f0379 reute d f BC-HOUSTON-OIL-<HO>-RESE 02-26 0101 HOUSTON OIL <HO> RESERVES STUDY COMPLETED HOUSTON, Feb 26 - Houston Oil Trust said that independent petroleum engineers completed an annual study that estimates the trust's future net revenues from total proved reserves at 88 mln dlrs and its discounted present value of the reserves at 64 mln dlrs. Based on the estimate, the trust said there may be no money available for cash distributions to unitholders for the remainder of the year. It said the estimates reflect a decrease of about 44 pct in net reserve revenues and 39 pct in discounted present value compared with the study made in 1985. Reuter tm/inst/texts/crude/reut-00013.xml0000644000175100001440000000225012074065306016302 0ustar hornikusers 2-MAR-1987 07:39:23.30 crude uae bahrain saudi-arabia kuwait qatar opec V f0149 reute r f BC-GULF-ARAB-DEPUTY-OIL 03-02 0110 GULF ARAB DEPUTY OIL MINISTERS TO MEET IN BAHRAIN ABU DHABI, March 2 - Deputy oil ministers from six Gulf Arab states will meet in Bahrain today to discuss coordination of crude oil marketing, the official Emirates news agency WAM reported. WAM said the officials would be discussing implementation of last Sunday's agreement in Doha by Gulf Cooperation Council (GCC) oil ministers to help each other market their crude oil. Four of the GCC states - Saudi Arabia, the United Arab Emirates (UAE), Kuwait and Qatar - are members of the Organiaation of Petroleum Exporting Countries (OPEC) and some face stiff buyer resistance to official OPEC prices. Reuter tm/inst/texts/crude/reut-00019.xml0000644000175100001440000000320212074065306016306 0ustar hornikusers 2-MAR-1987 11:28:26.03 crude usa Y f0976 reute d f BC-STUDY-GROUP-URGES-INC 03-02 0099 STUDY GROUP URGES INCREASED U.S. OIL RESERVES WASHINGTON, March 2 - A study group said the United States should increase its strategic petroleum reserve to one mln barrels as one way to deal with the present and future impact of low oil prices on the domestic oil industry. U.S. policy now is to raise the strategic reserve to 750 mln barrels, from its present 500 mln, to help protect the economy from an overseas embargo or a sharp price rise. The Aspen Institute for Humanistic Studies, a private group, also called for new research for oil exploration and development techniques. It predicted prices would remain at about 15-18 dlrs a barrel for several years and then rise to the mid 20s, with imports at about 30 pct of U.S. consumption. The study cited two basic policy paths for the nation: to protect the U.S. industry through an import fee or other such device or to accept the full economic benefits of cheap oil. But the group did not strongly back either option, saying there were benefits and drawbacks to both. It said instead that such moves as increasing oil reserves and more exploration and development research would help to guard against or mitigate the risks of increased imports. Reuter tm/inst/texts/crude/reut-00021.xml0000644000175100001440000000173512074065306016310 0ustar hornikusers 2-MAR-1987 12:13:46.82 crude usa Y F f0206 reute r f BC-UNOCAL-<UCL>-UNIT-CUT 03-02 0088 UNOCAL <UCL> UNIT CUTS CRUDE OIL POSTED PRICES LOS ANGELES, March 2 - Unocal Corp's Union Oil Co said it lowered its posted prices for crude oil one to 1.50 dlrs a barrel in the eastern region of the U.S., effective Feb 26. Union said a 1.50 dlrs cut brings its posted price for the U.S. benchmark grade, West Texas Intermediate, to 16 dlrs. Louisiana Sweet also was lowered 1.50 dlrs to 16.35 dlrs, the company said. No changes were made in Union's posted prices for West Coast grades of crude oil, the company said. Reuter tm/inst/texts/crude/reut-00018.xml0000644000175100001440000000254012074065306016311 0ustar hornikusers 2-MAR-1987 11:20:05.52 crude usa C f0937 reute d f BC-STUDY-GROUP-URGES-INC 03-02 0156 STUDY GROUP URGES INCREASED U.S. OIL RESERVES WASHINGTON, March 2 - A study group said the United States should increase its strategic petroleum reserve to one mln barrels as one way to deal with the present and future impact of low oil prices on the domestic oil industry. U.S. policy now is to raise the strategic reserve to 750 mln barrels, from its present 500 mln, to help protect the economy from an overseas embargo or a sharp price rise. The Aspen Institute for Humanistic Studies, a private group, also called for new research for oil exploration and development techniques. It predicted prices would remain at about 15-18 dlrs a barrel for several years and then rise to the mid 20s, with imports at about 30 pct of U.S. consumption. It said instead that such moves as increasing oil reserves and more exploration and development research would help to guard against or mitigate the risks of increased imports. Reuter tm/inst/texts/acq/0000755000175100001440000000000012213264556013606 5ustar hornikuserstm/inst/texts/acq/reut-00042.xml0000644000175100001440000000217712074065306015756 0ustar hornikusers 2-MAR-1987 09:49:48.14 acq usa F f0554 reute u f BC-DIAGNOSTIC-<DRS>-MAKE 03-02 0115 DIAGNOSTIC <DRS> MAKES A BID FOR ROSPATCH <RPCH> OAKLAND , N.J., March 2 - Diagnostic Retrieval Systems Inc said it has made an offer to acquire, through a wholly owned unit, all outstanding shares of Rospatch Corp's common stock for 22 dlrs a share cash, or about 53 mln dlrs. DRS, a warfare systems producer, said it would make the transaction through a cash tender offer for all, but not less than 51 pct, of Rospatch's outstanding common stock followed by a merger with Rospatch, a labels, high technology and wood producer, at the same purchase price per share. DRS said the deal is subject to approval by the Rospatch board, and the tender offer expires on March 6, 1986. Reuter tm/inst/texts/acq/reut-00004.xml0000644000175100001440000000562412074065306015754 0ustar hornikusers 26-FEB-1987 15:51:17.84 acq usa F f0881 reute u f BC-CHEMLAWN-<CHEM>-RISES 02-26 0106 CHEMLAWN <CHEM> RISES ON HOPES FOR HIGHER BIDS By Cal Mankowski, Reuters NEW YORK, Feb 26 - ChemLawn Corp <CHEM> could attract a higher bid than the 27 dlrs per share offered by Waste Management Inc <WNX>, Wall Street arbitrageurs said. Shares of ChemLawn shot up 11-5/8 to 29-3/8 in over-the-counter- trading with 3.8 mln of the company's 10.1 mln shares changing hands by late afternoon. "This company could go for 10 times cash flow or 30 dlrs, maybe 32 dollars depending on whether there is a competing bidder," an arbitrageur said. Waste Management's tender offer, announced before the opening today, expires March 25. "This is totally by surprise," said Debra Strohmaier, a ChemLawn spokeswoman. The company's board held a regularly scheduled meeting today and was discussing the Waste Management announcement. She said a statement was expected but it was not certain when it would be ready. She was unable to say if there had been any prior contact between Waste Management and ChemLawn officials. "I think they will resist it," said Elliott Schlang, analyst at Prescott, Ball and Turben Inc. "Any company that doesn't like a surprise attack would." Arbitrageurs pointed out it is difficult to resist tender offers for any and all shares for cash. Schlang said ChemLawn could try to find a white knight if does not want to be acquired by Waste Management. Analyst Rosemarie Morbelli of Ingalls and Snyder said ServiceMaster Companies L.P. <SVM> or Rollins Inc <ROL> were examples of companies that could be interested. ChemLawn, with about two mln customers, is the largest U.S. company involved in application of fertilizers, pesticides and herbicides on lawns. Waste Management is involved in removal of wastes. Schlang said ChemLawn's customer base could be valuable to another company that wants to capitalize on a strong residential and commercial distribution system. Both Schlang and Morbelli noted that high growth rates had catapulted ChemLawn's share price into the mid-30's in 1983 but the stock languished as the rate of growth slowed. Schlang said the company's profits are concentrated in the fourth quarter. In 1986 ChemLawn earned 1.19 dlrs per share for the full year, and 2.58 dlrs in the fourth quarter. Morbelli noted ChemLawn competes with thousands of individual entrepreuers who offer lawn and garden care sevice. Reuter tm/inst/texts/acq/reut-00035.xml0000644000175100001440000000222212074065306015747 0ustar hornikusers 2-MAR-1987 09:03:18.94 acq uk usa F f0414 reute d f BC-SENIOR-ENGINEERING-MA 03-02 0117 SENIOR ENGINEERING MAKES 12.5 MLN DLR US PURCHASE LONDON, March 2 - <Senior Engineering Group Plc> said it reached agreement with <Cronus Industries Inc> to acquire the whole share capital of <South Western Engineering Co> for 12.5 mln dlrs cash. This sum is being financed by a term loan. South Western is one of the U.S.'s leading manufacturers of heat transfer equipment, with a turnover of 54.86 mln dlrs and pre-tax profits of 1.72 mln in 1986. Completion of the deal is conditional on approval under U.S. Hart-Scott-Rodino regulations which is expected within 30 days. Some 350,000 dlrs is payable immediately, 12 mln dlrs payable on completion with the balance due by June 30, 1987. Reuter tm/inst/texts/acq/reut-00024.xml0000644000175100001440000000212512074065306015747 0ustar hornikusers 2-MAR-1987 06:58:00.68 acq usa uk F f0032 reute u f BC-COLOROLL-AGREES-TO-BU 03-02 0109 COLOROLL AGREES TO BUY U.S. WALLCOVERINGS COMPANY LONDON, March 2 - <Coloroll Group Plc> said it has entered into a conditional agreement to acquire the business and assets of <Wallco Inc> and related companies for 14.5 mln dlrs. Miami-based Wallco manufactures and distributes wallcoverings and showed a pretax profit of 1.5 mln dlrs on turnover of 37 mln in the year ending June 1986. The total U.S. Market was estimated to be worth 840 mln dlrs in 1986, having grown by 47 pct in the previous five years, Coloroll said. The combined sales and profit of the enlarged Coloroll U.S. Business would be 67 mln and four mln dlrs respectively. REUTER tm/inst/texts/acq/reut-00009.xml0000644000175100001440000000136512074065306015757 0ustar hornikusers 26-FEB-1987 17:01:28.10 acq usa F f0121 reute u f BC-LIEBERT-CORP-<LIEB>-A 02-26 0051 LIEBERT CORP <LIEB> APPROVES MERGER COLUMBUS, Ohio, Feb 26 - Liebert Corp said its shareholders approved the merger of a wholly-owned subsidiary of Emerson Electric Co <EMR>. Under the terms of the merger, each Liebert shareholder will receive .3322 shares of Emerson stock for each Liebert share. Reuter tm/inst/texts/acq/reut-00031.xml0000644000175100001440000000133712074065306015751 0ustar hornikusers 2-MAR-1987 08:41:41.32 acq usa F f0358 reute r f BC-FINANCIAL-SANTA-BARBA 03-02 0048 FINANCIAL SANTA BARBARA <FSB> TO MAKE PURCHASE SANTA BARBARA, Calif., March 2 - Financial Corp of Santa Barbara said it has signed a definitive agreement to purchase Stanwell Financial, the lending operations unit of mortgage banking company <Stanwell Mortgage>, for undisclosed terms. Reuter tm/inst/texts/acq/reut-00056.xml0000644000175100001440000000313212074065306015753 0ustar hornikusers 2-MAR-1987 11:29:26.84 acq usa F f0981 reute r f BC-CARBIDE-<UK>-LOOKS-TO 03-02 0095 CARBIDE <UK> LOOKS TO ACQUISITIONS FOR GROWTH NEW YORK, March 2 - Union Carbide Corp is looking to acquisitions and joint ventures to aid its chemicals and plastics growth, according the H.W. Lichtenberger, president of Chemicals and Plastics. Describing this as a major departure in the company's approach to commercial development, he told the annual new business forum of the Commercial Development Association "We are looking to acquisitions and joint ventures when they look like the fastest and most promising routes to the growth markets we've identified." Not very long ago Union Carbide had the attitude "that if we couldn't do it ourselves, it wasn't worth doing. Or, if it was worth doing, we had to go it alone," Lichtenberger explained. He said "there are times when exploiting a profitable market is done best with a partner. Nor do we see any need to plow resources into a technology we may not have if we can link up profitably with someone who is already there." He said Carbide has extended its catalyst business that way and is now extending its specialty chemicals business in the same way. Reuter tm/inst/texts/acq/reut-00051.xml0000644000175100001440000000203412074065306015746 0ustar hornikusers 2-MAR-1987 10:59:28.36 acq usa sweden F f0833 reute r f BC-ESSELTE-BUSINESS-<ESB 03-02 0097 ESSELTE BUSINESS <ESB> UNIT BUYS ANTONSON UNIT GARDEN CITY, N.Y., March 2 - Esselte Business Systems Inc's Esselte Meto division said it has acquired the Antonson America Co, a subsidiary of <Antonson Machines AB>, of Sweden. Esselte said the Antonson unit, based in LaPorte, Indiana, manufactures scales and label printers. The company said the purchase is part of a plan to increase the range of retail electronic scales being offered by Esselte in the U.S. It said the acquisition will enble Esselte to increase its distribution base in its effort to grow in the U.S. Reuter tm/inst/texts/acq/reut-00008.xml0000644000175100001440000000207412074065306015754 0ustar hornikusers 26-FEB-1987 16:59:25.38 acq usa F f0116 reute d f BC-WRATHER 02-26 0109 HONG KONG FIRM UPS WRATHER<WCO> STAKE TO 11 PCT WASHINGTON, Feb 26 - Industrial Equity (Pacific) Ltd, a Hong Kong investment firm, said it raised its stake in Wrather Corp to 816,000 shares, or 11.3 pct of the total outstanding common stock, from 453,300 shares, or 6.3 pct. In a filing with the Securities and Exchange Commission, Industrial Equity, which is principally owned by Brierley Investments Ltd, a publicly held New Zealand company, said it bought 362,700 Wrather common shares between Feb 13 and 24 for 6.6 mln dlrs. When it first disclosed its stake in Wrather earlier this month, it said it bought the stock for investment purposes. Reuter tm/inst/texts/acq/reut-00014.xml0000644000175100001440000000136312074065306015751 0ustar hornikusers 26-FEB-1987 17:43:59.12 acq usa F f0235 reute h f BC-SUFFIELD-FINANCIAL-<S 02-26 0050 SUFFIELD FINANCIAL <SSBK> GETS FED APPROVAL SUFFIELD, Conn., Feb 26 - Suffield Financial Corp said the Federal Reserve Board approved its application to acquire Coastal Bancorp <CSBK>, Portland, Me. Suffield said it still needs the approval of the superintendent of Maine's banking department. Reuter tm/inst/texts/acq/reut-00001.xml0000644000175100001440000000343012074065306015742 0ustar hornikusers 26-FEB-1987 15:18:06.67 acq usa F f0767 reute d f BC-COMPUTER-TERMINAL-SYS 02-26 0107 COMPUTER TERMINAL SYSTEMS <CPML> COMPLETES SALE COMMACK, N.Y., Feb 26 - Computer Terminal Systems Inc said it has completed the sale of 200,000 shares of its common stock, and warrants to acquire an additional one mln shares, to <Sedio N.V.> of Lugano, Switzerland for 50,000 dlrs. The company said the warrants are exercisable for five years at a purchase price of .125 dlrs per share. Computer Terminal said Sedio also has the right to buy additional shares and increase its total holdings up to 40 pct of the Computer Terminal's outstanding common stock under certain circumstances involving change of control at the company. The company said if the conditions occur the warrants would be exercisable at a price equal to 75 pct of its common stock's market price at the time, not to exceed 1.50 dlrs per share. Computer Terminal also said it sold the technolgy rights to its Dot Matrix impact technology, including any future improvements, to <Woodco Inc> of Houston, Tex. for 200,000 dlrs. But, it said it would continue to be the exclusive worldwide licensee of the technology for Woodco. The company said the moves were part of its reorganization plan and would help pay current operation costs and ensure product delivery. Computer Terminal makes computer generated labels, forms, tags and ticket printers and terminals. Reuter tm/inst/texts/acq/reut-00022.xml0000644000175100001440000000166512074065306015755 0ustar hornikusers 2-MAR-1987 05:48:46.98 acq usa uk F f0923 reute u f BC-SALE-TILNEY-BUYS-STAK 03-02 0083 SALE TILNEY BUYS STAKE IN U.S. INSURANCE BROKER LONDON, March 2 - <Sale Tilney Plc> said it has purchased 80 pct of the ordinary share capital of <B and R International Inc.>, a U.S. Insurance broker, for 5.6 mln dlrs. Sale is paying 3.6 mln dlrs in cash on completion, with the balance plus interest to be paid in equal instalments over the next six years. B and R posted pretax profit of 855,000 dlrs in the year to Dec 31, 1986 when it had net tangible assets of 563,000 dlrs. REUTER tm/inst/texts/acq/reut-00026.xml0000644000175100001440000000777612074065306015772 0ustar hornikusers 2-MAR-1987 08:17:56.66 acq usa F f0274 reute u f PM-SHEARSON 03-02 0105 AMERICAN EXPRESS <AXP> VIEWING SHEARSON OPTIONS By Patti Domm, Reuters NEW YORK, March 2 - American Express Co, rumored to be considering a spinoff of part of Shearson Lehman Brothers Inc, said it is studying a range of options for its brokerage unit that could improve Shearon's access to capital and help it meet broadening international competition. In a joint statement, American Express and Shearson said the actions under consideration are an integral part of American Express' worldwide financial services strategy and that the two companies have been having both internal and external discussions on the matters. American Express said no decision has been reached on the strategic options and that it and Shearson could ultimately decide to follow growth plans already in place. Last week, rumors circulated on Wall Street that the financial services giant was considering a spinoff of part of Shearson and there was speculation it may be considering selling a stake to a Japanese firm. Analysts said the speculation also focused on American Express selling 20 pct of the profitable brokerage firm to the public. There was some speculation that American Express had also considered a total spinoff of Shearson, but the plan was considered highly unlikely, analysts said. American Express said in the statement on Sunday that it will not comment on rumors and speculation and a spokesman would not go beyond the statement. The company also remained silent last Thursday and Friday, as rumors drove American Express stock up a total of 5-1/2 dlrs in two days to bring it to a Friday close at 74. It said it issued the statement on Sunday because a similar statement was being circulated to employees. Analysts have been divided on whether it makes sense for American Express to give up a stake in the wholly-owned brokerage, which improved its after-tax earnings by about 50 pct in the last year. Some analysts said American Express may consider spinning off part of Shearson because it is concerned that its stock price does not fully reflect the value of the brokerage firm. Shearson contributed 316 mln dlrs of American Express' 1.25 billion dlr net in 1986. American Express' ambitious plans for international growth may be also enhanced by the added cash that spinning out part of Shearson would bring. Analysts speculated that all of Shearson would have a market value of about 3.5 billion dlrs. To some however, the need for added capital is puzzling. "(American) Express is in a position where they can raise capital if they need to," said Larry Eckenfelder of Prudential-Bache Securities. Analysts said rumors were fed by the reorganization of Shearson management Wednesday. Chief operating officer Jeffrey Lane got the added, previously vacant, post of president. The reorganization also created four new positions for chairmen of Shearson's operating divisions, a move analysts speculated would allow Shearson to be a stand alone company. Analysts, contacted on Sunday said the statement does little to clarify last week's market speculation. It does confirm, however, that the financial services firm, which unsuccessfully attempted to expand Shearson with a major acquisition last year, is looking beyond its own walls for growth and positioning in the global market competition. Late last year, Shearson's takeover offer to the E.F. Hutton Group Inc was rejected by Hutton, and analysts said there had been speculation that Shearson also was rebuffed when it approached another major Wall Street brokerage. Reuter tm/inst/texts/acq/reut-00007.xml0000644000175100001440000001026112074065306015750 0ustar hornikusers 26-FEB-1987 16:43:13.65 acq usa F f0061 reute u f BC-AMERICAN-EXPRESS-<AXP 02-26 0108 AMERICAN EXPRESS <AXP> SEEN IN POSSIBLE SPINNOFF By Patti Domm, Reuter New York, Feb 26 - American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock. American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express. The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend. American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume. American Express would not comment on the rumors or its stock activity. Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes. At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts. Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant. It also created four new positions for chairmen of its operating divisions. Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff. Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year. "I think it is highly unlikely that American Express is going to sell shearson," said Perrin Long of Lipper Analytical. He questioned what would be a better investment than "a very profitable securities firm." Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset. But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market. Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past. "Shearson being as profitable as it is would have fetched a big premium in the market place. Shearson's book value is in the 1.4 mln dlr range. Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization," said Eckenfelder. Some analysts said American Express could use capital since it plans to expand globally. "They have enormous internal growth plans that takes capital. You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road," said E.F. Hutton Group analyst Michael Lewis. "They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena," said Lewis. "...That does not preclude acquisitions and divestitures along the way," he said. Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business. "It could find its true water mark with a lesser exposure to brokerage. The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company," he said. Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985. Reuter tm/inst/texts/acq/reut-00045.xml0000644000175100001440000000245612074065306015761 0ustar hornikusers 2-MAR-1987 10:20:41.80 acq usa F A RM f0657 reute u f BC-BANK-OF-NEW-YORK-<BK> 03-02 0054 BANK OF NEW YORK <BK> TO HAVE GAIN ON UNIT SALE NEW YORK, March 2 - Bank of New York Co said it and the management of RMJ Securities Corp have agreed to sell 80 pct of their interests in RMJ Holding Corp to <British and Commonwealth Holdings PLC> and Bank of New York expects to realize a substantial gain on the transaction. RMJ Holding is the holding company for RMJ Securities, a large broker of U.S. government securities and agency obligations Bank of New York owns a majority interest in RMJ Holding and management of RMJ Securities the remainder. Bank of New York said the sale is expected to be completed during the second quarter. It said it and RMJ Securities management will continue to own 20 pct of RMJ Holding for now, but the agreement provides for the sale of that remaining interest to British and Commonwealth over the next six years. Reuter tm/inst/texts/acq/reut-00002.xml0000644000175100001440000000243712074065306015751 0ustar hornikusers 26-FEB-1987 15:19:15.45 earn acq usa F f0773 reute u f BC-OHIO-MATTRESS-<OMT>-M 02-26 0095 OHIO MATTRESS <OMT> MAY HAVE LOWER 1ST QTR NET CLEVELAND, Feb 26 - Ohio Mattress Co said its first quarter, ending February 28, profits may be below the 2.4 mln dlrs, or 15 cts a share, earned in the first quarter of fiscal 1986. The company said any decline would be due to expenses related to the acquisitions in the middle of the current quarter of seven licensees of Sealy Inc, as well as 82 pct of the outstanding capital stock of Sealy. Because of these acquisitions, it said, first quarter sales will be substantially higher than last year's 67.1 mln dlrs. Noting that it typically reports first quarter results in late march, said the report is likely to be issued in early April this year. It said the delay is due to administrative considerations, including conducting appraisals, in connection with the acquisitions. Reuter tm/inst/texts/acq/reut-00029.xml0000644000175100001440000000227012074065306015755 0ustar hornikusers 2-MAR-1987 08:26:35.85 acq usa F f0305 reute d f BC-<DALE-BURDETT-INC>-FA 03-02 0126 <DALE BURDETT INC> FACES DAMAGE CLAIM WESTMINSTER, Calif., March 2 - Dale Burdett Inc said it faces damages claims totalling about 420,000 dlrs from the former owners of Burdett Publications Inc. The company said on February 20, 1986, its predecessor Nolex Development Inc acquired Burdett Publications Inc in an exchange of 17 mln common shares for all Burdett Publications shares, but the transaction was not qualified with the California Department of Corporations. As a result, it said, the former Burdett Publications owners have a claim for damages against Dale Burdett as successor to Nolex for one yuear starting January 21, 1987, with the damages measured by the difference in values of shares exchanged plus interest from February 20, 1986. Reuter tm/inst/texts/acq/reut-00030.xml0000644000175100001440000000714112074065306015747 0ustar hornikusers 2-MAR-1987 08:29:05.15 acq usa F f0315 reute u f PM-PUROLATOR 03-02 0102 PUROLATOR <PCC> IN BUYOUT WITH HUTTON <EFH> By Patti Domm NEW YORK, March 2 - New Jersey-based overnight messenger Purolator Courier Corp said it has agreed to be acquired for about 265 mln dlrs by a company formed by E.F. Hutton LBO Inc and certain managers of Purolator's U.S. courier business. Analysts have said that Purolator has been for sale for some time. Purolator announced earlier it was mulling a takeover bid, but analysts wrongly predicted the offer was from another courier company. Hutton LBO, a wholly owned subsidiary of E.F. Hutton Group Inc, will be majority owner of the company. Hutton said the acquiring company, PC Acquisition Inc, is paying 35 dlrs cash per share for 83 pct of Purolator's stock in a tender offer to begin Thursday. The rest of the shares will be purchased for securities and warrants to buy stock in a subsidiary of PC Acquisition, containing Purolator's U.S. courier operations. If all the shares of Purolator are tendered, shareholders would receive for each share 29 dlrs cash, six dlrs in debentures, and a warrant to buy shares in a subsidiary of PC Acquisition containing the U.S. courier operations. Hutton said in the merger shareholders would get 46 mln dlrs aggregate amount of guaranteed debentures due 2002 of PC Acquisition and warrants to buy 15 pct of the common stock of the PC courier subsidiary. Hutton said the company has valued the warrants at two to three dlrs per share. Purolator's stock price closed at 35.125 dlrs on Friday. While some analysts estimated the company was worth in the mid 30s, at least one said it would be worth 38 to 42 dlrs. This follows sales of two other Purolator units. It agreed recently to sell its Canadian Courier unit to Onex Capital for 170 mln dlrs, and previously sold its auto filters business. Purolator retains its Stant division, which makes closure caps for radiators and gas tanks. A Hutton spokesman said the firm is reviewing its options on Stant. Purolator's courier business has been lagging that of its U.S. rivals because of the high price it paid in the past several years to add air delivery to its ground fleet. E.F. Hutton will provide 279 mln dlrs of its funds to complete the transaction. This so-called "bridge" financing will be replaced later with long-term debt most likely in the form of bank loans, Hutton said. Hutton LBO is committed to keeping the courier business, its president Warren Idsal said. "Purolator lost 120 mln dlrs over the last two years largely due to U.S. courier operations, which we believe the management is turning around. We belive it will be a very serious competitor in the future," said Idsal. William Taggart, chief executive officer of U.S. Courier division, will be chief executive officer of the new company. The tender offer will be conditioned on a minimum of two thirds of the common stock being tendered and not withdrawn to the expiration of the offer as well as certain other conditions. The offer will begin Thursday, subject to clearances from the staff of the Interstate Commerce Commission and will expire 20 business days after commencement unless extended. Reuter tm/inst/texts/acq/reut-00027.xml0000644000175100001440000000255312074065306015757 0ustar hornikusers 2-MAR-1987 08:22:40.30 acq usa F f0290 reute r f BC-ROPAK-<ROPK>-HAS-34-P 03-02 0109 ROPAK <ROPK> HAS 34 PCT OF BUCKHORN <BKN> FULLERTON, Calif., March 2 - Ropak Corp said it received and accepted about 456,968 common shares and 527,035 Series A convertible preferred shares of Buckhorn Inc at four dlrs and 5.75 dlrs each respectively in response to its tender offer that expired Friday, and it now owns 34.4 pct of Buckhorn voting power. The company had owned 63,000 common and 25,100 preferred shares before starting the hostile tender. Ropak said it is borrowing the funds needed to buy the Buckhorn shares from its bank lender and will not need to use any funds that another bank had committed to provide under a margin loan. Ropak said it waived minimum acceptance requirements to buy the shares and intends to evaluate a number of possible ways of completing an acquisition of Buckhorn. It said it hopes that Buckhorn's board will reevaluate its position and enter into meaningful negotiations. Reuter tm/inst/texts/acq/reut-00023.xml0000644000175100001440000000457012074065306015754 0ustar hornikusers 2-MAR-1987 06:54:19.43 acq uk usa RM F f0026 reute u f BC-EXCO-BUYS-U.S.-GOVERN 03-02 0114 EXCO BUYS U.S. GOVERNMENT SECURITIES BROKER LONDON, Mar 2 - <Exco International Plc>, a subsidiary of British and Commonwealth Shipping Co Plc <BCOM.L>, said it had agreed in principle to buy an 80 pct stake in <RMJ Holdings Corp> for about 79 mln dlrs. Exco Chairman Richard Lacy told Reuters the acquisition was being made from Bank of New York Co Inc <BK.N>, which currently holds a 50.1 pct, and from RMJ partners who hold the remainder. Bank of New York and the partners will retain about 10 pct each and these stakes will be bought over the next six years. RMJ is the holding company of RMJ Securities, one of the largest U.S. Government securities brokers. It is also involved in broking notes, obligations and other instruments sponsored by U.S. Federal agencies. Lacy said Exco had been considering buying a U.S. Government securities broker for the past four years and had made an offer for RMJ when it was sold by Security Pacific Corp <SPC.N> in 1985. RMJ was then valued at about 50 mln dlrs. B and C managing director Peter Goldie said RMJ would be bought at about the same multiple as Exco, suggesting net income of around 16 mln dlrs. The company's earnings had not been hit by the halving of brokerage fees some 14 months ago as volumes had since doubled. Lacy said that RMJ employed some 300 people, with 200 in the brokerage business and about 70 in its <SMS> unit, which provided computer software for the financial services community. RMJ Securities had offices in New York, where total market turnover of U.S. Government securities was 110 billion dlrs a day, and in London where it has 15 billion. It was also given permission last week to open an office in Tokyo where total market turnover had lifted rapidly to about five billion dlrs a day. The acquisition would contribute between five and 10 pct of B and C's share earnings in 1987 on a proforma basis. REUTER tm/inst/texts/acq/reut-00048.xml0000644000175100001440000000417412074065306015763 0ustar hornikusers 2-MAR-1987 10:36:13.53 gold acq platinum canada brazil E F f0710 reute r f BC-cons-tvx-to-buy 03-02 0090 CONSOLIDATED TVX TO BUY BRAZIL GOLD MINE STAKES TORONTO, March 2 - <Consolidated TVX Mining Corp> said it agreed to issue 7.8 mln treasury shares to acquire interests in three gold mining companies in Brazil and an option to increase the company's interest in a platinum property. The company said the transactions will bring immediate production and earnings to Consolidated TVX, enhance its precious metal potential and is expected to improve cash flow and earnings on a per share basis. The company did not give specific figures. Consolidated TVX said it will acquire 29 pct of CMP, a public gold mining company in which TVX already holds a 15 pct interest, making TVX the largest single shareholder. The company also agreed to acquire a 19 pct stake in Novo Astro, a private company, and a 16 pct interest in Teles Pires Mining, increasing the TVX's ownership to 51 pct. In addition, Consolidated TVX said it will acquire the right to add a 10 pct interest to a platinum property in which it already owns a 29.4 pct stake. CMP earned 11 mln Canadian dlrs in 1986 and expects to produce 42,000 ounces of gold in 1987 at a cost of 160 U.S. dlrs an ounce, Consolidated TVX said. Novo Astro operates Brazil's richest gold mine located in Amapa State, with an average grade of 0.8 ounces of gold a ton in a hardrock quartz vein, Consolidated TVX said. Mining of eluvial surface material produced 25,000 ounces in 1986 and is expected to produce 60,000 ounces in 1987. It also said Teles Pires Mining controls rights to a 350 kilometer section of the Teles Pires River, where one dredge is expected to produce 10,000 ounces of gold in 1987. Reuter tm/inst/texts/acq/reut-00016.xml0000644000175100001440000000122012074065306015743 0ustar hornikusers 26-FEB-1987 18:12:51.94 acq canada E f0301 reute r f BC-VIDEOTRON-BUYS-INTO-E 02-26 0036 VIDEOTRON BUYS INTO EXHIBIT COMPANY MONTREAL, Feb 26 - (Groupe Videotron Ltd) said it agreed to buy 50 pct of (Groupe Promexpo Inc), a company which specializes in product exhibits, for three mln dlrs. Reuter tm/inst/texts/acq/reut-00017.xml0000644000175100001440000000200712074065306015750 0ustar hornikusers 26-FEB-1987 18:27:56.14 acq usa F f0324 reute d f BC-CIRCUIT-SYSTEMS-<CSYI 02-26 0098 CIRCUIT SYSTEMS <CSYI> BUYS BOARD MAKER ADDISON, Ill., Feb 26 - Circuit Systems Inc said it has bought all of the stock of (Ionic Industries Inc) in exchange for 3,677,272 shares of its common. Following the exchange there will be 4,969,643 shares of Circuit Systems stock outstanding. Ionic holders will own about 74 pct of the outstanding stock of Circuit Systems, it said. Ionic, a maker of circuit boards, had revenues of 8.4 mln dlrs and pretax profits of 232,000 dlrs in 1986, up from revenues of 5.9 mln and pretax profits of 204,000 dlrs in 1985, Circuit Systems said. Reuter tm/inst/texts/acq/reut-00047.xml0000644000175100001440000000155012074065306015755 0ustar hornikusers 2-MAR-1987 10:36:04.57 acq usa F f0709 reute r f BC-BALLY-<BLY>-COMPLETES 03-02 0071 BALLY <BLY> COMPLETES PURCHASE OF GOLDEN NUGGET CHICAGO, March 2 - Bally Manufacturing Corp said it completed the acquisition of the Golden Nugget Casino Hotel in Atlantic City, New Jersey from Golden Nugget Inc. Bally also acquired from Golden Nugget various parcels of real estate in Atlantic City, it noted. The transaction included 140 mln dlrs in cash and stock and the assumption of a 299 mln dlrs mortgage. Reuter tm/inst/texts/acq/reut-00028.xml0000644000175100001440000000221712074065306015755 0ustar hornikusers 2-MAR-1987 08:25:56.49 acq usa F f0301 reute r f BC-PENRIL-<PNL>-SEEKS-TO 03-02 0101 PENRIL <PNL> SEEKS TO SELL TWO UNITS ROCKVILLE, Md., March 2 - Penril Corp said it is seeking to sell its Triplett Electrical Instrument Corp subsidiary in Bluffton, Ohio, and Triplett's Alltest division in Hoffman Estates, Ill., as part of a plan to concentrate on its three profitable division and reduce its debt load. The company also said it is evaluating a plan to satisfy its obligations under its 10-7/8 pct subordinated notes but gave no details. Interest on the notes is due today. Penril further said director Clifford L. Alexander Jr. has resigned from the board. It gave no reason. Penril said shareholders at the annual meeting approved the limitation of directors' liability. Reuter tm/inst/texts/acq/reut-00043.xml0000644000175100001440000000170612074065306015754 0ustar hornikusers 2-MAR-1987 10:06:32.63 acq usa F f0625 reute u f BC-THE-JAPAN-FUND-<JPN> 03-02 0085 THE JAPAN FUND <JPN> GETS BUYOUT OFFER NEW YORK, March 2 - The Japan Fund Inc said it has received an unsolicited offer from <Sterling Grace Capital Management LP>, acting together with certain other persons and entities, to purchase all the assets of the fund at five pct below its aggregate net asset value. The Japan Find said tne deal is subject to obtaining satisfactory financing and a due diligence review. It added that the proposal has been referred to its Board of Directors for consideration. Reuter tm/inst/texts/acq/reut-00005.xml0000644000175100001440000000152112074065306015745 0ustar hornikusers 26-FEB-1987 16:08:33.15 acq usa F f0949 reute r f BC-<COFAB-INC>-BUYS-GULF 02-26 0066 <COFAB INC> BUYS GULFEX FOR UNDISCLOSED AMOUNT HOUSTON, Feb 26 - CoFAB Inc said it acquired <Gulfex Inc>, a Houston-based fabricator of custom high-pressure process vessels for the energy and petrochemical industries. CoFAB said its group of companies manufacture specialized cooling and lubricating systems for the oil and gas, petrochemical, utility, pulp and paper and marine industries. Reuter tm/inst/texts/acq/reut-00049.xml0000644000175100001440000000146112074065306015760 0ustar hornikusers 2-MAR-1987 10:50:34.12 acq usa F f0802 reute w f BC-AMERICAN-NURSERY-<ANS 03-02 0060 AMERICAN NURSERY <ANSY> BUYS FLORIDA NURSERY TAHLEQUAH, OKLA., March 2 - American Nursery Products Inc said it purchased Miami-based Heinl's Nursery Inc, for undisclosed terms. Heinl's Nursery has sales of about 4.5 mln dlrs and owns 100 acres, of which 75 are in shade houses and about 58,300 square feet cover greenhouses, shipping and office facilities. Reuter tm/inst/texts/acq/reut-00052.xml0000644000175100001440000000205512074065306015752 0ustar hornikusers 2-MAR-1987 11:09:06.82 acq canada E F f0882 reute r f BC-FOUR-SEASONS-BUYING-M 03-02 0100 FOUR SEASONS BUYING MARRIOTT <MHS> HOTEL TORONTO, March 2 - <Four Seasons Hotels Inc> and VMS Realty Partners said they agreed to acquire the Santa Barbara Biltmore Hotel in California from Marriott Corp, for undisclosed terms. Closing was expected by March 31, they added. The companies said they would jointly own the hotel and rename it the Four Seasons Biltmore at Santa Barbara. They said they would spend more than 13 mln U.S. dlrs "to enhance the Biltmore's position as one of the finest resort hotels in North America." Chicago-based VMS Realty is a real estate and development firm. Reuter tm/inst/texts/acq/reut-00011.xml0000644000175100001440000000213112074065306015740 0ustar hornikusers 26-FEB-1987 17:09:47.78 acq usa F f0146 reute r f BC-ROBESON 02-26 0113 INVESTMENT GROUP RAISES ROBESON <RBSN> STAKE WASHINGTON, Feb 26 - A group of affiliated Miami-based investment firms led by Fundamental Management Corp said it raised its stake in Robeson Industries Corp to 238,000 shares, or 14.6 pct of the total, from 205,000 or 12.8 pct. In a filing with the Securities and Exchange Commission, the group said it bought 32,800 Robeson common shares between Jan 26 and Feb 9 for 175,691 dlrs. The group said it may buy more shares and plans to study Robeson's operations. Afterwards it may recommend that management make changes in its operations. Fundamental Management Chairman Carl Singer was recently elected to the Robeson board. Reuter tm/inst/texts/acq/reut-00015.xml0000644000175100001440000000134412074065306015751 0ustar hornikusers 26-FEB-1987 18:12:35.70 acq canada E F f0300 reute r f BC-VERSATILE-TO-SELL-UNI 02-26 0049 VERSATILE TO SELL UNIT TO VICON VANCOUVER, British Columbia, Feb 26 - <Versatile Corp> said it agreed in principle to sell its Alberta-based Versatile Noble Cultivators Co division to Vicon Inc, of Ontario, for undisclosed terms. The division manufactures tillage and spraying equipment. Reuter tm/inst/texts/acq/reut-00050.xml0000644000175100001440000000275612074065306015760 0ustar hornikusers 2-MAR-1987 10:59:16.80 earn acq E F f0832 reute r f BC-multi-step-to-sell 03-02 0108 MULTI-STEP TO SELL LADDER UNIT, CANCEL SHARES TORONTO, March 2 - <Multi-Step Products Inc>, earlier reporting an initial six month loss, said it agreed to sell wholly owned Multi-Step Manufacturing Inc for 100,000 dlrs cash, subject to shareholder and regulatory approval. Multi-Step also said it will pay 900,000 dlrs to cancel 711,192 of its own shares, which will be acquired from Michael Penhale and his benficiaries. Penhale will control and manage Multi-Step Manufacturing, following the transactions. Multi-Step had a 739,146 dlr loss for the six months ended December 31. The company received its initial public listing in December. The company said its ladder-making unit has been losing 300,000 dlrs quarterly. The sale, expected to close in April, also calls for retirement of the unit's 400,000 dlr bank debt, Multi-Step said. The unit also has agreed to pay a debt of 400,000 dlrs to Tarxien Company Ltd, which is 40 pct owned by Multi-Step. Multi-Step previously said it agreed to acquire the remaining 60 pct of Tarxien it does not already own. Reuter tm/inst/texts/acq/reut-00053.xml0000644000175100001440000000667512074065306015767 0ustar hornikusers 2-MAR-1987 11:23:31.27 acq usa F f0955 reute u f BC-VIACOM 03-02 0104 REDSTONE DETAILS SWEETENED VIACOM <VIA> OFFER WASHINGTON, March 2 - Investor Sumner Redstone, who leads one of the two groups vying for control of Viacom International Inc, offered to sweeten his bid for the company by 1.50 dlrs a share cash and 1.50 dlrs in securities. In a filing with the Securities and Exchange Commission, Redstone, who controls Dedham, Mass.,-based National Amusements Inc, a theater chain operator, offered to raise the cash portion of its Viacom offer to 42 dlrs a share from 40.50 dlrs. Redstone also raised the face value of the preferred stock he is offering to 7.50 dlrs from six dlrs. The Redstone offer, which is being made through Arsenal Holdings Inc, a National Amusements subsidiary set up for that purpose, which also give Viacom shareholders one-fifth of a share of Arsenal common stock after the takeover. Viacom said earlier today it received revised takeover bids from Redstone and MCV Holdings Inc, a group led by Viacom management which is competing with Redstone for control of the company and already has a formal merger agreement with Viacom. The company did not disclose the details of the revised offers, but said a special committee of its board would review them later today. The Redstone group, which has a 19.5 pct stake in Viacom, and the management group, which has a 5.4 pct stake, have both agreed not to buy more shares of the company until a merger is completed, unless the purchases are part of a tender offer for at least half of the outstanding stock. The two rivals also signed confidentiality agreements, which give them access to Viacom's financial records provided they keep the information secret. In his SEC filing, Redstone, who estimated his cost of completing the takeover at 2.95 billion dlrs, said Bank of America is confident it can raise 2.275 billion dlrs. Besides the financing it would raise through a bank syndicate, Bank of America has also agreed to provide a separate 25 mln dlr for the limited purpose of partial financing and has committed to provide another 592 mln dlrs, Redstone said. Merrill Lynch, Pierce Fenner and Smith Inc has increased its underwriting commitment to 175 mln dlrs of subordinated financing debt for the Viacom takeover, from the 150 mln dlrs it agreed to underwrite earlier, Redstone said. Redstone said his group would contribute more than 475 mln dlrs in equity toward the takeover. The Redstone equity contribution to the takeover would consist of all of his group's 6,881,800 Viacom common shares and at least 118 mln dlrs cash, he said. The new offer, the second sweetened deal Redstone has proposed in his month-long bidding war with management, also contains newly drawn up proposed merger documents, he said. Last week, the management group submitted what it called its last offer for the company, valued at 3.1 mln dlrs and consisting of 38.50 dlrs a share cash, preferred stock valued at eight dlrs a share and equity in the new company. Redstone's previous offer had been valued at 3.2 billion dlrs. Reuter tm/inst/texts/acq/reut-00010.xml0000644000175100001440000000133512074065306015744 0ustar hornikusers 26-FEB-1987 17:08:27.52 acq usa F f0143 reute d f BC-GULF-APPLIED-TECHNOLO 02-26 0049 GULF APPLIED TECHNOLOGIES <GATS> SELLS UNITS HOUSTON, Feb 26 - Gulf Applied Technologies Inc said it sold its subsidiaries engaged in pipeline and terminal operations for 12.2 mln dlrs. The company said the sale is subject to certain post closing adjustments, which it did not explain. Reuter tm/inst/texts/acq/reut-00046.xml0000644000175100001440000000166712074065306015765 0ustar hornikusers 2-MAR-1987 10:29:07.31 acq usa F f0682 reute b f BC-CORNING-<GLW>,-HAZLET 03-02 0083 CORNING <GLW>, HAZLETON <HLC> SET EXCAHNGE RATIO CORNING, N.Y., March 2 - Corning Glass Works said the exchange ratio for its previously announced acquisition of Hazleton Laboratories Corp has been established at 0.5165 Corning common share for each Hazleton common share. Corning said the prospectus regarding the merger is expected to be mailed tomorrow to all Hazleton holders of record February 18. Hazleton shareholders will vote on the proposed merger at a special meeting on March 31. Reuter tm/inst/texts/acq/reut-00034.xml0000644000175100001440000000166612074065306015761 0ustar hornikusers 2-MAR-1987 09:02:51.89 acq usa F f0411 reute u f BC-LAROCHE-STARTS-BID-FO 03-02 0058 LAROCHE STARTS BID FOR NECO <NPT> SHARES NEW YORK, March 2 - Investor David F. La Roche of North Kingstown, R.I., said he is offering to purchase 170,000 common shares of NECO Enterprises Inc at 26 dlrs each. He said the successful completion of the offer, plus shares he already owns, would give him 50.5 pct of NECO's 962,016 common shares. La Roche said he may buy more, and possible all NECO shares. He said the offer and withdrawal rights will expire at 1630 EST/2130 gmt, March 30, 1987. Reuter tm/inst/texts/acq/reut-00020.xml0000644000175100001440000000573112074065306015751 0ustar hornikusers 2-MAR-1987 04:45:57.78 acq sweden F f0812 reute b f BC-WALLENBERGS-FIGHT-BID 03-02 0115 WALLENBERGS FIGHT BID FOR SWEDISH MATCH STAKE STOCKHOLM, March 2 - Sweden's Wallenberg group fought back a bid by the London-based Swedish financier Erik Penser to secure a large stake in Swedish Match <SMBS ST>, one of the companies at the core of their business empire. A statement issued by the Wallenberg holding companies AB Investor and Forvaltnings AB Providentia said they had taken over an option held by Nobel Industrier Sweden AB to acquire 33 pct of the voting rights in Swedish Match. Thre Wallenbergs paid Nobel Industrier <NOBL ST>, in which Penser group has a 72 pct stake, about 20 pct over the market price for the Swedish Match option, the statement said. Swedish Match's B shares open to foreign buyers closed at 424 crowns on Friday. The A shares -- with increased voting rights -- closed at 450 crowns for the restricted and 455 for the free shares. The statement said the deal increased Investor's stake to 49.4 pct of the voting rights and 14.8 pct of the share capital while Providentia is left holding 34.1 pct of the voting rights and 14.5 pct of the share capital in Swedish Match. The Wallenbergs' stake in Swedish Match had previously amounted to 52 pct of the voting rights in the company. The Swedish Match deal will cost the Wallenbergs about 400 mln crowns, share analysts said, making it one of the most expensise moves the group has undertaken in the last four years to defend its far-flung interests from outside predators. The Wallenbergs originally sold Nobel Industrier, an arms and chemicals group, to Penser in 1984 to pay for buying Volvo <VOLV ST> out of two other key group companies, Atlas Copco <ASTS ST> and Stora Koppabergs <SKPS ST>. Since then, the Wallenbergs were ousted as the largest shareholders in SKF (SKFR ST> by Skanska AB <SKBS ST> and Frederik Lundberg wrested control of Incentive AB from them. Lundberg, a Zurich-based Swedish property tycoon, also managed to acquire a 25 pct stake in another Wallenberg company, the diary equipment firm Alfa -Laval AB <ALFS ST>. During 1986, the Wallenbergs have been concentrating on building up their stake in Investor and Providentia to prevent any raid on the heart of their business empire. But analysts say the Wallenbergs' position in the electrical engineering firm ASEA AB <ASEA ST> is also too small at 12.6 pct of the voting rights and there has been growing speculation that the group will be forced to sell off fringe interests to protect its core activities. REUTER tm/inst/texts/acq/reut-00012.xml0000644000175100001440000000211212074065306015740 0ustar hornikusers 26-FEB-1987 17:36:22.14 acq usa F f0204 reute r f BC-EPSILON-DATA 02-26 0110 DREXEL OFFICIAL HAS STAKE IN EPSILON DATA <EPSI> WASHINGTON, Feb 26 - A senior official of Drexel Burnham Lambert Inc and his father told the Securities and Exchange Commission they have acquired 258,591 shares of Epsilon Data Management Inc, or 9.4 pct of the total outstanding. Kenneth Thomas, senior vice president-investments at Drexel's Los Angeles office, and his father, retired university professor C.A. Thomas, said they bought the stake for 2.1 mln dlrs primarily for investment purposes. They said they may buy more stock or sell some or all of their stake, depending on market conditions, but have no plans to seek control of the company. Reuter tm/inst/texts/acq/reut-00025.xml0000644000175100001440000000212312074065306015746 0ustar hornikusers 2-MAR-1987 08:16:59.80 acq usa F f0267 reute r f BC-SCIENTIFIC-MICRO-SYST 03-02 0111 SCIENTIFIC MICRO SYSTEMS <SMSI> ACUIRES SUPERMAC NEW YORK, March 2 - Scientific Micro Systems Inc said it has acquired Supermac Technology, a rapidly growing supplier of enhancement products and disc drive subsystems for the Apple personal computer market. Scientific Micro said it acquired all the common stock of Supermac in exchange for 1.05 mln shares of its own common stock. The stock closed at 5.50 dlrs bid on Friday. Supermac, a privately held firm based in Mountain View, California, as is Scientific Micro, reported a net profit of 300,000 dlrs on revenue of 9.5 mln dlrs in fiscal 1986. It expects its revenue to approximately double in 1987. Reuter tm/inst/texts/acq/reut-00006.xml0000644000175100001440000000154012074065306015747 0ustar hornikusers 26-FEB-1987 16:32:37.30 acq usa F f0024 reute u f BC-CYCLOPS 02-26 0073 INVESTMENT FIRMS CUT CYCLOPS <CYL> STAKE WASHINGTON, Feb 26 - A group of affiliated New York investment firms said they lowered their stake in Cyclops Corp to 260,500 shares, or 6.4 pct of the total outstanding common stock, from 370,500 shares, or 9.2 pct. In a filing with the Securities and Exchange Commission, the group, led by Mutual Shares Corp, said it sold 110,000 Cyclops common shares on Feb 17 and 19 for 10.0 mln dlrs. Reuter tm/inst/texts/acq/reut-00032.xml0000644000175100001440000000150612074065306015750 0ustar hornikusers 2-MAR-1987 08:43:25.91 acq usa F f0362 reute d f BC-MARRIOTT-<MHS>-TO-SEL 03-02 0063 MARRIOTT <MHS> TO SELL HOTEL TORONTO, March 2 - <Four Seasons Hotels> said it and <VMS Realty Partners> of Chicago have agreed to purchase the Santa Barbara Biltmore Hotel from Marriott Corp for an undisclosed amount. It said the venture will rename the hotel the Four Seasons Biltmore at Santa Barbara and invest over 13 mln dlrs in improvements on the 228-room property. Reuter tm/inst/texts/acq/reut-00003.xml0000644000175100001440000000203612074065306015745 0ustar hornikusers 26-FEB-1987 15:49:56.01 acq ship usa F f0874 reute r f BC-MCLEAN'S-<MII>-U.S.-L 02-26 0094 MCLEAN'S <MII> U.S. LINES SETS ASSET TRANSFER CRANFORD, N.J., Feb 26 - McLean Industries Inc's United States Lines Inc subsidiary said it has agreed in principle to transfer its South American service by arranging for the transfer of certain charters and assets to <Crowley Mariotime Corp>'s American Transport Lines Inc subsidiary. U.S. Lines said negotiations on the contract are expected to be completed within the next week. Terms and conditions of the contract would be subject to approval of various regulatory bodies, including the U.S. Bankruptcy Court. Reuter tm/inst/texts/acq/reut-00036.xml0000644000175100001440000000373212074065306015757 0ustar hornikusers 2-MAR-1987 09:16:08.70 acq usa F f0448 reute b f BC-/VIACOM-<VIA>-RECEIVE 03-02 0045 VIACOM <VIA> RECEIVES TWO REVISED OFFERS NEW YORK, March 2 - Viacom International Inc said it received revised merger offers from <National Amusements Inc> and <MCV Holdings Inc>. The company said the special committee plans to meet later today to review both offers. Viacom said National Amusements' Arsenal Holdings Inc raised the value of its offer for the Viacom shares not held by National Amusements in three areas. National Amusements holds 19.6 pct of Viacom's stock. The cash value of the offer was raised to 42.00 dlrs from the 40.50 dlrs a Viacom share offered February 23 while the value of the fraction of a share of exchangeable preferred being offered was increased to 7.50 dlrs a share from six dlrs. The interest rate to be used to increase the cash value of the merger, if delayed beyond April 30, was raised to nine pct from eight pct and 12 pct after May 31. A Viacom spokesman said the Arsenal Holdings's offer continues to include a 20 pct interest in Arsenal for present Viacom shareholders. Viacom said MCV Holdings, a group which includes the company's senior management and the Equitable Life Assurance Society of the United States, raised the value of its offer by increasing the value of the preferred being offered to 8.50 dlrs from 8.00 dlrs a share and raising the ownership in the new company to be held by present Viacom shareholders to 45 pct from 25 pct. MCV called its previous offer, made February 26, the "final" proposed revision of its agreement with Viacom. Reuter tm/inst/texts/acq/reut-00013.xml0000644000175100001440000000257212074065306015753 0ustar hornikusers 26-FEB-1987 17:38:47.04 acq canada F E f0214 reute d f BC-<NOVA>-WINS-GOVERNMEN 02-26 0106 <NOVA> WINS GOVERNMENT OKAY FOR HUSKY <HYO> DEAL CALGARY, Alberta, Feb 26 - Nova, the Canadian company that owns 56 pct of Husky Oil Ltd, said it received government approval for a transaction under which <Union Faith Canada Holding Ltd> would buy a 43 pct stake in Husky. Nova said the Minister of Regional and Industrial Expansion, Michel Cote, ruled that Union Faith's purchase of the Husky stake would not result in Husky ceding control to a non-Canadian company. It said this ruling was a key condition in completing the deal. Union Faith is equally owned by <Hutchison Whampoa Ltd> and <Hong Kong Electric Holdings Ltd>. Under the agreement with Union Faith, Husky will become a private company with Union Faith and Nova each holding 43 pct of its stock. Nine pct of Husky would be owned by relatives of Li Ka-Shing, chairman of Hutchison, and five pct by the Canadian Imperial Bank of Commerice. Reuter tm/inst/texts/acq/reut-00055.xml0000644000175100001440000000152312074065306015754 0ustar hornikusers 2-MAR-1987 11:24:06.09 acq usa F f0958 reute r f BC-UTILICORP-<UCU>-COMPL 03-02 0066 UTILICORP <UCU> COMPLETES ACQUISITION KANSAS CITY, March 2 - UtiliCorp United Inc said it completed the acquisition of West Virginia Power from Dominion Resources for about 21 mln dlrs. The sale was approved by the West Virginia Public Service Commission in January and became effective March one. West Virginia's management will continue to be responsible for operating the utility, it said. Reuter tm/inst/texts/acq/reut-00040.xml0000644000175100001440000000303612074065306015747 0ustar hornikusers 2-MAR-1987 09:33:32.93 acq usa F f0501 reute u f BC-PITTSTON-<PCO>-AGREES 03-02 0111 PITTSTON <PCO> AGREES TO ACQUIRE WTC <WAF> STAMFORD, Conn., March 2 - Pittston Co said it has tentatively agreed to acquire WTC International N.V. in a tax-free exchange of stock. Pittston said it agreed to exchange 0.523 common share for each of the about 8,612,000 WTC common shares outstanding. Pittston said WTC's three principal shareholders, who own 62 pct of its stock, are parties to this agreement. They have granted Pittston the right of first refusal to their shares. WTC has granted Pittston an option to buy WTC shares equal to 18.5 poct of its outstanding stock. The agreement is subject to approval of both boards and WTC shareholders. Pittston said described WTC as a fast growing air freight forwarding company with operations throughout the world. Its revenues totaled nearly 200 mln dlrs in the year ended November 30 and for the quarter ended on that date it earned 1.3 mln dlrs on revenues of 55.8 mln dlrs. Pittston said its Burlington Air Express subsidiary generates about two-thirds of its 450 mln dlrs in annual revenes with its domestic air freight services. Reuter tm/inst/texts/acq/reut-00039.xml0000644000175100001440000000241612074065306015760 0ustar hornikusers 2-MAR-1987 09:28:21.66 acq usa F f0482 reute u f BC-MILLER-TABAK-HAS-91.8 03-02 0057 MILLER TABAK HAS 91.8 PCT OF PENN TRAFFIC <PNF> NEW YORK, March 2 - <Miller Tabak Hirsch and Co> said it has received an accepted 3,424,729 common shares of Penn Traffic Co in response to its 31.60 dlr per share tender offer that expired Friday, and together with the 380,728 shares it already owned, it now has about 91.8 pct of Penn Traffic. The company said Penn Traffic is expected to hold a special shareholders' meeting later this month to approve a merger into Miller Tabak at the tender price. It said two Miller Tabak representatives will be named to the Penn Traffic board on March Four to serve as the only directors with Penn Traffic president and chief executive officer Guido Malacarne. The company said it received financing for the transaction from First National Bank of Minneapolis and Salomon Inc <SB>. Reuter tm/inst/texts/acq/reut-00054.xml0000644000175100001440000000201012074065306015743 0ustar hornikusers 2-MAR-1987 11:23:45.24 acq italy spain F f0956 reute d f BC-MONTEDISON-CONCLUDES 03-02 0093 MONTEDISON CONCLUDES TALKS WITH ANTIBIOTICOS MILAN, March 2 - Montedison Spa <MONI.MI> said it has concluded its negotiations with Spanish pharmaceuticals company <Antibioticos SA>. A company spokesman told Reuters "We have concluded the talks and we are now awaiting authorization from Spanish authorities." He declined to comment further. Earlier today the Italian company postponed a scheduled press conference on its talks with Antibioticos. An Italian press report today said Montedison has agreed to acquire Antibioticos for 500 billion lire. REUTER tm/inst/texts/acq/reut-00021.xml0000644000175100001440000000275412074065306015754 0ustar hornikusers 2-MAR-1987 04:52:58.27 acq uk F f0825 reute b f BC-SHV-SAYS-IT-MAKING-TE 03-02 0061 SHV SAYS IT MAKING TENDER OFFER FOR IC GAS LONDON, March 2 - <SHV (United Kingdom) Holding Co Ltd> said it was making a tender offer for up to 33 mln ordinary shares in Imperial Continental Gas Association.<ICGS.L>. It said in a statement the offer was on the basis of 700p for each IC Gas ordinary and 252p for every one stg nominal of IC Gas loan stock. SHV already holds 6.8 mln IC Gas ordinary stock units representing around 4.9 pct of the current issued share capital. Successful completion of the offer would increase SHV's stake in IC Gas to 39.8 mln shares, representing around 27.9 pct of issued share capital, it said. The offer capitalises IC Gas at around one billion stg. It said it was tendering for both ordinary stock and loan stock, which when fully converted, gave a total of 33 mln IC Gas ordinary. It is making the tender offer through N.M. Rothschilds. IC Gas said in a statement it noted the SHV tender offer and the terms were being considered. It said a further statement would be made as soon as possible. REUTER... tm/inst/texts/acq/reut-00018.xml0000644000175100001440000000256612074065306015763 0ustar hornikusers 1-MAR-1987 22:20:43.45 acq japan M C f0515 reute u f BC-NIPPON-KOKAN-STEEL-AF 03-01 0113 NIPPON KOKAN STEEL AFFILIATES CONSIDERING MERGER TOKYO, March 2 - Toshin Steel Co Ltd <TOSS.T> and <Azuma Steel Co Ltd>, affiliates of Nippon Kokan KK <NKKT.T>, are considering a merger, company spokesmen said. Toshin Steel, owned 41.9 pct by Nippon Kokan, and Azuma Steel, owned 41.3 pct by Nippon Kokan, are expected to decide by the end of March, they said. Both firms have been struggling with losses caused by the recession in the steel industry and the yen's appreciation. Azuma Steel's current losses are estimated at 3.1 billion yen in the year ending March 31 against a 6.99 billion loss a year earlier, a spokesman said. The firm employs 1,100 workers Toshin Steel, with 1,700 workers, has given no forecast for the year ending March 31. But industry sources said they expected the company to show current losses of about five billion yen or more in 1986/87 compared with a 2.98 billion loss in 1985/86. REUTER tm/inst/texts/rcv1_2330.xml0000644000175100001440000000265512074065307015114 0ustar hornikusers USA: Tylan stock jumps; weighs sale of company. Tylan stock jumps; weighs sale of company. SAN DIEGO

The stock of Tylan General Inc. jumped Tuesday after the maker of process-management equipment said it is exploring the sale of the company and added that it has already received some inquiries from potential buyers.

Tylan was up $2.50 to $12.75 in early trading on the Nasdaq market.

The company said it has set up a committee of directors to oversee the sale and that Goldman, Sachs & Co. has been retained as its financial adviser.

(c) Reuters Limited 1996 tm/inst/texts/reuters-21578.xml0000644000175100001440000004050412074065307015742 0ustar hornikusers 26-FEB-1987 15:01:01.79 cocoa el-salvadorusauruguay C T f0704reute u f BC-BAHIA-COCOA-REVIEW 02-26 0105 BAHIA COCOA REVIEW SALVADOR, Feb 26 - Showers continued throughout the week in the Bahia cocoa zone, alleviating the drought since early January and improving prospects for the coming temporao, although normal humidity levels have not been restored, Comissaria Smith said in its weekly review. The dry period means the temporao will be late this year. Arrivals for the week ended February 22 were 155,221 bags of 60 kilos making a cumulative total for the season of 5.93 mln against 5.81 at the same stage last year. Again it seems that cocoa delivered earlier on consignment was included in the arrivals figures. Comissaria Smith said there is still some doubt as to how much old crop cocoa is still available as harvesting has practically come to an end. With total Bahia crop estimates around 6.4 mln bags and sales standing at almost 6.2 mln there are a few hundred thousand bags still in the hands of farmers, middlemen, exporters and processors. There are doubts as to how much of this cocoa would be fit for export as shippers are now experiencing dificulties in obtaining +Bahia superior+ certificates. In view of the lower quality over recent weeks farmers have sold a good part of their cocoa held on consignment. Comissaria Smith said spot bean prices rose to 340 to 350 cruzados per arroba of 15 kilos. Bean shippers were reluctant to offer nearby shipment and only limited sales were booked for March shipment at 1,750 to 1,780 dlrs per tonne to ports to be named. New crop sales were also light and all to open ports with June/July going at 1,850 and 1,880 dlrs and at 35 and 45 dlrs under New York july, Aug/Sept at 1,870, 1,875 and 1,880 dlrs per tonne FOB. Routine sales of butter were made. March/April sold at 4,340, 4,345 and 4,350 dlrs. April/May butter went at 2.27 times New York May, June/July at 4,400 and 4,415 dlrs, Aug/Sept at 4,351 to 4,450 dlrs and at 2.27 and 2.28 times New York Sept and Oct/Dec at 4,480 dlrs and 2.27 times New York Dec, Comissaria Smith said. Destinations were the U.S., Covertible currency areas, Uruguay and open ports. Cake sales were registered at 785 to 995 dlrs for March/April, 785 dlrs for May, 753 dlrs for Aug and 0.39 times New York Dec for Oct/Dec. Buyers were the U.S., Argentina, Uruguay and convertible currency areas. Liquor sales were limited with March/April selling at 2,325 and 2,380 dlrs, June/July at 2,375 dlrs and at 1.25 times New York July, Aug/Sept at 2,400 dlrs and at 1.25 times New York Sept and Oct/Dec at 1.25 times New York Dec, Comissaria Smith said. Total Bahia sales are currently estimated at 6.13 mln bags against the 1986/87 crop and 1.06 mln bags against the 1987/88 crop. Final figures for the period to February 28 are expected to be published by the Brazilian Cocoa Trade Commission after carnival which ends midday on February 27. Reuter 26-FEB-1987 15:02:20.00 usa F Y f0708reute d f BC-STANDARD-OIL-<SRD>-TO 02-26 0082 STANDARD OIL <SRD> TO FORM FINANCIAL UNIT CLEVELAND, Feb 26 - Standard Oil Co and BP North America Inc said they plan to form a venture to manage the money market borrowing and investment activities of both companies. BP North America is a subsidiary of British Petroleum Co Plc <BP>, which also owns a 55 pct interest in Standard Oil. The venture will be called BP/Standard Financial Trading and will be operated by Standard Oil under the oversight of a joint management committee. Reuter 26-FEB-1987 15:03:27.51 usa F A f0714reute d f BC-TEXAS-COMMERCE-BANCSH 02-26 0064 TEXAS COMMERCE BANCSHARES <TCB> FILES PLAN HOUSTON, Feb 26 - Texas Commerce Bancshares Inc's Texas Commerce Bank-Houston said it filed an application with the Comptroller of the Currency in an effort to create the largest banking network in Harris County. The bank said the network would link 31 banks having 13.5 billion dlrs in assets and 7.5 billion dlrs in deposits. Reuter 26-FEB-1987 15:07:13.72 usabrazil F f0725 reute u f BC-TALKING-POINT/BANKAME 02-26 0105 TALKING POINT/BANKAMERICA <BAC> EQUITY OFFER by Janie Gabbett, Reuters LOS ANGELES, Feb 26 - BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance, banking analysts said. Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering, which has yet to be approved by the Securities and Exchange Commission. BankAmerica stock fell this week, along with other banking issues, on the news that Brazil has suspended interest payments on a large portion of its foreign debt. The stock traded around 12, down 1/8, this afternoon, after falling to 11-1/2 earlier this week on the news. Banking analysts said that with the immediate threat of the First Interstate Bancorp <I> takeover bid gone, BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term. BankAmerica filed the offer on January 26. It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9. A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated. "The circumstances at the time will determine what we do," said Arthur Miller, BankAmerica's Vice President for Financial Communications, when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval. "I'd put it off as long as they conceivably could," said Lawrence Cohn, analyst with Merrill Lynch, Pierce, Fenner and Smith. Cohn said the longer BankAmerica waits, the longer they have to show the market an improved financial outlook. Although BankAmerica has yet to specify the types of equities it would offer, most analysts believed a convertible preferred stock would encompass at least part of it. Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders, noted Daniel Williams, analyst with Sutro Group. Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter, the initial shock reaction is likely to ease over the coming weeks. Nevertheless, BankAmerica, which holds about 2.70 billion dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt, and as much as 200 mln dlrs if Brazil pays no interest for a year, said Joseph Arsenio, analyst with Birr, Wilson and Co. He noted, however, that any potential losses would not show up in the current quarter. With other major banks standing to lose even more than BankAmerica if Brazil fails to service its debt, the analysts said they expect the debt will be restructured, similar to way Mexico's debt was, minimizing losses to the creditor banks. Reuter 26-FEB-1987 15:10:44.60 grainwheatcornbarleyoatsorghum usa C G f0738 reute u f BC-average-prices 02-26 0095 NATIONAL AVERAGE PRICES FOR FARMER-OWNED RESERVE WASHINGTON, Feb 26 - The U.S. Agriculture Department reported the farmer-owned reserve national five-day average price through February 25 as follows (Dlrs/Bu-Sorghum Cwt) - Natl Loan Release Call Avge Rate-X Level Price Price Wheat 2.55 2.40 IV 4.65 -- V 4.65 -- VI 4.45 -- Corn 1.35 1.92 IV 3.15 3.15 V 3.25 -- X - 1986 Rates. Natl Loan Release Call Avge Rate-X Level Price Price Oats 1.24 0.99 V 1.65 -- Barley n.a. 1.56 IV 2.55 2.55 V 2.65 -- Sorghum 2.34 3.25-Y IV 5.36 5.36 V 5.54 -- Reserves I, II and III have matured. Level IV reflects grain entered after Oct 6, 1981 for feedgrain and after July 23, 1981 for wheat. Level V wheat/barley after 5/14/82, corn/sorghum after 7/1/82. Level VI covers wheat entered after January 19, 1984. X-1986 rates. Y-dlrs per CWT (100 lbs). n.a.-not available. Reuter 26-FEB-1987 15:14:36.41 veg-oillinseedlin-oilsoy-oilsun-oilsoybeanoilseedcornsunseedgrainsorghumwheat argentina G f0754 reute r f BC-ARGENTINE-1986/87-GRA 02-26 0066 ARGENTINE 1986/87 GRAIN/OILSEED REGISTRATIONS BUENOS AIRES, Feb 26 - Argentine grain board figures show crop registrations of grains, oilseeds and their products to February 11, in thousands of tonnes, showing those for futurE shipments month, 1986/87 total and 1985/86 total to February 12, 1986, in brackets: Bread wheat prev 1,655.8, Feb 872.0, March 164.6, total 2,692.4 (4,161.0). Maize Mar 48.0, total 48.0 (nil). Sorghum nil (nil) Oilseed export registrations were: Sunflowerseed total 15.0 (7.9) Soybean May 20.0, total 20.0 (nil) The board also detailed export registrations for subproducts, as follows, SUBPRODUCTS Wheat prev 39.9, Feb 48.7, March 13.2, Apr 10.0, total 111.8 (82.7) . Linseed prev 34.8, Feb 32.9, Mar 6.8, Apr 6.3, total 80.8 (87.4). Soybean prev 100.9, Feb 45.1, MAr nil, Apr nil, May 20.0, total 166.1 (218.5). Sunflowerseed prev 48.6, Feb 61.5, Mar 25.1, Apr 14.5, total 149.8 (145.3). Vegetable oil registrations were : Sunoil prev 37.4, Feb 107.3, Mar 24.5, Apr 3.2, May nil, Jun 10.0, total 182.4 (117.6). Linoil prev 15.9, Feb 23.6, Mar 20.4, Apr 2.0, total 61.8, (76.1). Soybean oil prev 3.7, Feb 21.1, Mar nil, Apr 2.0, May 9.0, Jun 13.0, Jul 7.0, total 55.8 (33.7). REUTER 26-FEB-1987 15:14:42.83 usa F f0755 reute d f BC-RED-LION-INNS-FILES-P 02-26 0082 RED LION INNS FILES PLANS OFFERING PORTLAND, Ore., Feb 26 - Red Lion Inns Limited Partnership said it filed a registration statement with the Securities and Exchange Commission covering a proposed offering of 4,790,000 units of limited partnership interests. The company said it expects the offering to be priced at 20 dlrs per unit. It said proceeds from the offering, along with a 102.5 mln dlr mortgage loan, will be used to finance its planned acquisition of 10 Red Lion hotels. Reuter 26-FEB-1987 15:15:40.12 usa F A RM f0758 reute u f BC-USX-<X>-DEBT-DOWGRADE 02-26 0103 USX <X> DEBT DOWGRADED BY MOODY'S NEW YORK, Feb 26 - Moody's Investors Service Inc said it lowered the debt and preferred stock ratings of USX Corp and its units. About seven billion dlrs of securities is affected. Moody's said Marathon Oil Co's recent establishment of up to one billion dlrs in production payment facilities on its prolific Yates Field has significant negative implications for USX's unsecured creditors. The company appears to have positioned its steel segment for a return to profit by late 1987, Moody's added. Ratings lowered include those on USX's senior debt to BA-1 from BAA-3. Reuter 26-FEB-1987 15:17:11.20 earn usa F f0762 reute r f BC-CHAMPION-PRODUCTS-<CH 02-26 0067 CHAMPION PRODUCTS <CH> APPROVES STOCK SPLIT ROCHESTER, N.Y., Feb 26 - Champion Products Inc said its board of directors approved a two-for-one stock split of its common shares for shareholders of record as of April 1, 1987. The company also said its board voted to recommend to shareholders at the annual meeting April 23 an increase in the authorized capital stock from five mln to 25 mln shares. Reuter 26-FEB-1987 15:18:06.67 acq usa F f0767 reute d f BC-COMPUTER-TERMINAL-SYS 02-26 0107 COMPUTER TERMINAL SYSTEMS <CPML> COMPLETES SALE COMMACK, N.Y., Feb 26 - Computer Terminal Systems Inc said it has completed the sale of 200,000 shares of its common stock, and warrants to acquire an additional one mln shares, to <Sedio N.V.> of Lugano, Switzerland for 50,000 dlrs. The company said the warrants are exercisable for five years at a purchase price of .125 dlrs per share. Computer Terminal said Sedio also has the right to buy additional shares and increase its total holdings up to 40 pct of the Computer Terminal's outstanding common stock under certain circumstances involving change of control at the company. The company said if the conditions occur the warrants would be exercisable at a price equal to 75 pct of its common stock's market price at the time, not to exceed 1.50 dlrs per share. Computer Terminal also said it sold the technolgy rights to its Dot Matrix impact technology, including any future improvements, to <Woodco Inc> of Houston, Tex. for 200,000 dlrs. But, it said it would continue to be the exclusive worldwide licensee of the technology for Woodco. The company said the moves were part of its reorganization plan and would help pay current operation costs and ensure product delivery. Computer Terminal makes computer generated labels, forms, tags and ticket printers and terminals. Reuter tm/inst/texts/custom.xml0000644000175100001440000000070512074065307015076 0ustar hornikusers Ano Nymous The Invisible Man A story about an invisible man. Science fiction Sokrates Scio Nescio I know that I know nothing. Classics tm/inst/texts/loremipsum.txt0000644000175100001440000000622212074065307015777 0ustar hornikusersLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed at ante. Mauris eleifend, quam a vulputate dictum, massa quam dapibus leo, eget vulputate orci purus ut lorem. In fringilla mi in ligula. Pellentesque aliquam quam vel dolor. Nunc adipiscing. Sed quam odio, tempus ac, aliquam molestie, varius ac, tellus. Vestibulum ut nulla aliquam risus rutrum interdum. Pellentesque lorem. Curabitur sit amet erat quis risus feugiat viverra. Pellentesque augue justo, sagittis et, lacinia at, venenatis non, arcu. Nunc nec libero. In cursus dictum risus. Etiam tristique nisl a nulla. Ut a orci. Curabitur dolor nunc, egestas at, accumsan at, malesuada nec, magna. Nulla facilisi. Nunc volutpat. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Ut sit amet orci vel mauris blandit vehicula. Nullam quis enim. Integer dignissim viverra velit. Curabitur in odio. In hac habitasse platea dictumst. Ut consequat, tellus eu volutpat varius, justo orci elementum dolor, sed imperdiet nulla tellus ut diam. Vestibulum ipsum ante, malesuada quis, tempus ac, placerat sit amet, elit. Sed eget turpis a pede tempor malesuada. Vivamus quis mi at leo pulvinar hendrerit. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Pellentesque aliquet lacus vitae pede. Nullam mollis dolor ac nisi. Phasellus sit amet urna. Praesent pellentesque sapien sed lacus. Donec lacinia odio in odio. In sit amet elit. Maecenas gravida interdum urna. Integer pretium, arcu vitae imperdiet facilisis, elit tellus tempor nisi, vel feugiat ante velit sit amet mauris. Vivamus arcu. Integer pharetra magna ac lacus. Aliquam vitae sapien in nibh vehicula auctor. Suspendisse leo mauris, pulvinar sed, tempor et, consequat ac, lacus. Proin velit. Nulla semper lobortis mauris. Duis urna erat, ornare et, imperdiet eu, suscipit sit amet, massa. Nulla nulla nisi, pellentesque at, egestas quis, fringilla eu, diam. Donec semper, sem nec tristique tempus, justo neque commodo nisl, ut gravida sem tellus suscipit nunc. Aliquam erat volutpat. Ut tincidunt pretium elit. Aliquam pulvinar. Nulla cursus. Suspendisse potenti. Etiam condimentum hendrerit felis. Duis iaculis aliquam enim. Donec dignissim augue vitae orci. Curabitur luctus felis a metus. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. In varius neque at enim. Suspendisse massa nulla, viverra in, bibendum vitae, tempor quis, lorem. Donec dapibus orci sit amet elit. Maecenas rutrum ultrices lectus. Aliquam suscipit, lacus a iaculis adipiscing, eros orci pellentesque nisl, non pharetra dolor urna nec dolor. Integer cursus dolor vel magna. Integer ultrices feugiat sem. Proin nec nibh. Duis eu dui quis nunc sagittis lobortis. Fusce pharetra, enim ut sodales luctus, lectus arcu rhoncus purus, in fringilla augue elit vel lacus. In hac habitasse platea dictumst. Aliquam erat volutpat. Fusce iaculis elit id tellus. Ut accumsan malesuada turpis. Suspendisse potenti. Vestibulum lacus augue, lobortis mattis, laoreet in, varius at, nisi. Nunc gravida. Phasellus faucibus. In hac habitasse platea dictumst. Integer tempor lacus eget lectus. Praesent fringilla augue fringilla dui. tm/inst/texts/txt/0000755000175100001440000000000012213264556013661 5ustar hornikuserstm/inst/texts/txt/ovid_4.txt0000644000175100001440000000137412074065306015610 0ustar hornikusers scit bene venator, cervis ubi retia tendat, scit bene, qua frendens valle moretur aper; aucupibus noti frutices; qui sustinet hamos, novit quae multo pisce natentur aquae: tu quoque, materiam longo qui quaeris amori, ante frequens quo sit disce puella loco. non ego quaerentem vento dare vela iubebo, nec tibi, ut invenias, longa terenda via est. Andromedan Perseus nigris portarit ab Indis, raptaque sit Phrygio Graia puella viro, tot tibi tamque dabit formosas Roma puellas, 'Haec habet' ut dicas 'quicquid in orbe fuit.' Gargara quot segetes, quot habet Methymna racemos, aequore quot pisces, fronde teguntur aves, quot caelum stellas, tot habet tua Roma puellas: tm/inst/texts/txt/ovid_2.txt0000644000175100001440000000131612074065306015602 0ustar hornikusers quas Hector sensurus erat, poscente magistro verberibus iussas praebuit ille manus. Aeacidae Chiron, ego sum praeceptor Amoris: saevus uterque puer, natus uterque dea. sed tamen et tauri cervix oneratur aratro, frenaque magnanimi dente teruntur equi; et mihi cedet Amor, quamvis mea vulneret arcu pectora, iactatas excutiatque faces. quo me fixit Amor, quo me violentius ussit, hoc melior facti vulneris ultor ero: non ego, Phoebe, datas a te mihi mentiar artes, nec nos aëriae voce monemur avis, nec mihi sunt visae Clio Cliusque sorores servanti pecudes vallibus, Ascra, tuis: usus opus movet hoc: vati parete perito; tm/inst/texts/txt/ovid_3.txt0000644000175100001440000000134412074065306015604 0ustar hornikusers vera canam: coeptis, mater Amoris, ades! este procul, vittae tenues, insigne pudoris, quaeque tegis medios, instita longa, pedes. nos venerem tutam concessaque furta canemus, inque meo nullum carmine crimen erit. principio, quod amare velis, reperire labora, qui nova nunc primum miles in arma venis. proximus huic labor est placitam exorare puellam: tertius, ut longo tempore duret amor. hic modus, haec nostro signabitur area curru: haec erit admissa meta terenda rota. dum licet, et loris passim potes ire solutis, elige cui dicas 'tu mihi sola places.' haec tibi non tenues veniet delapsa per auras: quaerenda est oculis apta puella tuis. tm/inst/texts/txt/ovid_1.txt0000644000175100001440000000126412074065306015603 0ustar hornikusers Si quis in hoc artem populo non novit amandi, hoc legat et lecto carmine doctus amet. arte citae veloque rates remoque moventur, arte leves currus: arte regendus amor. curribus Automedon lentisque erat aptus habenis, Tiphys in Haemonia puppe magister erat: me Venus artificem tenero praefecit Amori; Tiphys et Automedon dicar Amoris ego. ille quidem ferus est et qui mihi saepe repugnet: sed puer est, aetas mollis et apta regi. Phillyrides puerum cithara perfecit Achillem, atque animos placida contudit arte feros. qui totiens socios, totiens exterruit hostes, creditur annosum pertimuisse senem. tm/inst/texts/txt/ovid_5.txt0000644000175100001440000000131612074065306015605 0ustar hornikusers mater in Aeneae constitit urbe sui. seu caperis primis et adhuc crescentibus annis, ante oculos veniet vera puella tuos: sive cupis iuvenem, iuvenes tibi mille placebunt. cogeris voti nescius esse tui: seu te forte iuvat sera et sapientior aetas, hoc quoque, crede mihi, plenius agmen erit. tu modo Pompeia lentus spatiare sub umbra, cum sol Herculei terga leonis adit: aut ubi muneribus nati sua munera mater addidit, externo marmore dives opus. nec tibi vitetur quae, priscis sparsa tabellis, porticus auctoris Livia nomen habet: quaque parare necem miseris patruelibus ausae Belides et stricto stat ferus ense pater. tm/inst/stopwords/0000755000175100001440000000000012327630227013735 5ustar hornikuserstm/inst/stopwords/portuguese.dat0000644000175100001440000000236312156574727016651 0ustar hornikusersde a o que e do da em um para com não uma os no se na por mais as dos como mas ao ele das à seu sua ou quando muito nos já eu também só pelo pela até isso ela entre depois sem mesmo aos seus quem nas me esse eles você essa num nem suas meu às minha numa pelos elas qual nós lhe deles essas esses pelas este dele tu te vocês vos lhes meus minhas teu tua teus tuas nosso nossa nossos nossas dela delas esta estes estas aquele aquela aqueles aquelas isto aquilo estou está estamos estão estive esteve estivemos estiveram estava estávamos estavam estivera estivéramos esteja estejamos estejam estivesse estivéssemos estivessem estiver estivermos estiverem hei há havemos hão houve houvemos houveram houvera houvéramos haja hajamos hajam houvesse houvéssemos houvessem houver houvermos houverem houverei houverá houveremos houverão houveria houveríamos houveriam sou somos são era éramos eram fui foi fomos foram fora fôramos seja sejamos sejam fosse fôssemos fossem for formos forem serei será seremos serão seria seríamos seriam tenho tem temos tém tinha tínhamos tinham tive teve tivemos tiveram tivera tivéramos tenha tenhamos tenham tivesse tivéssemos tivessem tiver tivermos tiverem terei terá teremos terão teria teríamos teriam tm/inst/stopwords/french.dat0000644000175100001440000000150512156574723015705 0ustar hornikusersau aux avec ce ces dans de des du elle en et eux il je la le leur lui ma mais me même mes moi mon ne nos notre nous on ou par pas pour qu que qui sa se ses son sur ta te tes toi ton tu un une vos votre vous c d j l à m n s t y été étée étées étés étant suis es est sommes êtes sont serai seras sera serons serez seront serais serait serions seriez seraient étais était étions étiez étaient fus fut fûmes fûtes furent sois soit soyons soyez soient fusse fusses fût fussions fussiez fussent ayant eu eue eues eus ai as avons avez ont aurai auras aura aurons aurez auront aurais aurait aurions auriez auraient avais avait avions aviez avaient eut eûmes eûtes eurent aie aies ait ayons ayez aient eusse eusses eût eussions eussiez eussent ceci cela celà cet cette ici ils les leurs quel quels quelle quelles sans soi tm/inst/stopwords/hungarian.dat0000644000175100001440000000231312156574725016414 0ustar hornikusersa ahogy ahol aki akik akkor alatt által általában amely amelyek amelyekben amelyeket amelyet amelynek ami amit amolyan amíg amikor át abban ahhoz annak arra arról az azok azon azt azzal azért aztán azután azonban bár be belül benne cikk cikkek cikkeket csak de e eddig egész egy egyes egyetlen egyéb egyik egyre ekkor el elég ellen elő először előtt első én éppen ebben ehhez emilyen ennek erre ez ezt ezek ezen ezzel ezért és fel felé hanem hiszen hogy hogyan igen így illetve ill. ill ilyen ilyenkor ison ismét itt jó jól jobban kell kellett keresztül keressünk ki kívül között közül legalább lehet lehetett legyen lenne lenni lesz lett maga magát majd majd már más másik meg még mellett mert mely melyek mi mit míg miért milyen mikor minden mindent mindenki mindig mint mintha mivel most nagy nagyobb nagyon ne néha nekem neki nem néhány nélkül nincs olyan ott össze ő ők őket pedig persze rá s saját sem semmi sok sokat sokkal számára szemben szerint szinte talán tehát teljes tovább továbbá több úgy ugyanis új újabb újra után utána utolsó vagy vagyis valaki valami valamint való vagyok van vannak volt voltam voltak voltunk vissza vele viszont volna tm/inst/stopwords/swedish.dat0000644000175100001440000000105712156574731016107 0ustar hornikusersoch det att i en jag hon som han på den med var sig för så till är men ett om hade de av icke mig du henne då sin nu har inte hans honom skulle hennes där min man ej vid kunde något från ut när efter upp vi dem vara vad över än dig kan sina här ha mot alla under någon eller allt mycket sedan ju denna själv detta åt utan varit hur ingen mitt ni bli blev oss din dessa några deras blir mina samma vilken er sådan vår blivit dess inom mellan sådant varför varje vilka ditt vem vilket sitta sådana vart dina vars vårt våra ert era vilkas tm/inst/stopwords/norwegian.dat0000644000175100001440000000152312156574726016434 0ustar hornikusersog i jeg det at en et den til er som på de med han av ikke ikkje der så var meg seg men ett har om vi min mitt ha hadde hun nå over da ved fra du ut sin dem oss opp man kan hans hvor eller hva skal selv sjøl her alle vil bli ble blei blitt kunne inn når være kom noen noe ville dere som deres kun ja etter ned skulle denne for deg si sine sitt mot å meget hvorfor dette disse uten hvordan ingen din ditt blir samme hvilken hvilke sånn inni mellom vår hver hvem vors hvis både bare enn fordi før mange også slik vært være båe begge siden dykk dykkar dei deira deires deim di då eg ein eit eitt elles honom hjå ho hoe henne hennar hennes hoss hossen ikkje ingi inkje korleis korso kva kvar kvarhelst kven kvi kvifor me medan mi mine mykje no nokon noka nokor noko nokre si sia sidan so somt somme um upp vere vore verte vort varte vart tm/inst/stopwords/russian.dat0000644000175100001440000000250512156574727016131 0ustar hornikusersи в во не что он на я с со как а то все она так его но да ты к у же вы за бы по только ее мне было вот от меня еще нет о из ему теперь когда даже ну вдруг ли если уже или ни быть был него до вас нибудь опять уж вам сказал ведь там потом себя ничего ей может они тут где есть надо ней для мы тебя их чем была сам чтоб без будто человек чего раз тоже себе под жизнь будет ж тогда кто этот говорил того потому этого какой совсем ним здесь этом один почти мой тем чтобы нее кажется сейчас были куда зачем сказать всех никогда сегодня можно при наконец два об другой хоть после над больше тот через эти нас про всего них какая много разве сказала три эту моя впрочем хорошо свою этой перед иногда лучше чуть том нельзя такой им более всегда конечно всю между tm/inst/stopwords/italian.dat0000644000175100001440000000316612156574725016070 0ustar hornikusersad al allo ai agli all agl alla alle con col coi da dal dallo dai dagli dall dagl dalla dalle di del dello dei degli dell degl della delle in nel nello nei negli nell negl nella nelle su sul sullo sui sugli sull sugl sulla sulle per tra contro io tu lui lei noi voi loro mio mia miei mie tuo tua tuoi tue suo sua suoi sue nostro nostra nostri nostre vostro vostra vostri vostre mi ti ci vi lo la li le gli ne il un uno una ma ed se perché anche come dov dove che chi cui non più quale quanto quanti quanta quante quello quelli quella quelle questo questi questa queste si tutto tutti a c e i l o ho hai ha abbiamo avete hanno abbia abbiate abbiano avrò avrai avrà avremo avrete avranno avrei avresti avrebbe avremmo avreste avrebbero avevo avevi aveva avevamo avevate avevano ebbi avesti ebbe avemmo aveste ebbero avessi avesse avessimo avessero avendo avuto avuta avuti avute sono sei è siamo siete sia siate siano sarò sarai sarà saremo sarete saranno sarei saresti sarebbe saremmo sareste sarebbero ero eri era eravamo eravate erano fui fosti fu fummo foste furono fossi fosse fossimo fossero essendo faccio fai facciamo fanno faccia facciate facciano farò farai farà faremo farete faranno farei faresti farebbe faremmo fareste farebbero facevo facevi faceva facevamo facevate facevano feci facesti fece facemmo faceste fecero facessi facesse facessimo facessero facendo sto stai sta stiamo stanno stia stiate stiano starò starai starà staremo starete staranno starei staresti starebbe staremmo stareste starebbero stavo stavi stava stavamo stavate stavano stetti stesti stette stemmo steste stettero stessi stesse stessimo stessero stando tm/inst/stopwords/english.dat0000644000175100001440000000167212156574722016075 0ustar hornikusersi me my myself we our ours ourselves you your yours yourself yourselves he him his himself she her hers herself it its itself they them their theirs themselves what which who whom this that these those am is are was were be been being have has had having do does did doing would should could ought i'm you're he's she's it's we're they're i've you've we've they've i'd you'd he'd she'd we'd they'd i'll you'll he'll she'll we'll they'll isn't aren't wasn't weren't hasn't haven't hadn't doesn't don't didn't won't wouldn't shan't shouldn't can't cannot couldn't mustn't let's that's who's what's here's there's when's where's why's how's a an the and but if or because as until while of at by for with about against between into through during before after above below to from up down in out on off over under again further then once here there when where why how all any both each few more most other some such no nor not only own same so than too very tm/inst/stopwords/dutch.dat0000644000175100001440000000070512156574722015547 0ustar hornikusersde en van ik te dat die in een hij het niet zijn is was op aan met als voor had er maar om hem dan zou of wat mijn men dit zo door over ze zich bij ook tot je mij uit der daar haar naar heb hoe heeft hebben deze u want nog zal me zij nu ge geen omdat iets worden toch al waren veel meer doen toen moet ben zonder kan hun dus alles onder ja eens hier wie werd altijd doch wordt wezen kunnen ons zelf tegen na reeds wil kon niets uw iemand geweest andere tm/inst/stopwords/finnish.dat0000644000175100001440000000056012156574723016076 0ustar hornikusersolla olen olet on olemme olette ovat ole oli olisi olisit olisin olisimme olisitte olisivat olit olin olimme olitte olivat ollut olleet en et ei emme ette eivät minä sinä hän me te he tämä tuo se nämä nuo ne kuka ketkä mikä mitkä joka jotka että ja jos koska kuin mutta niin sekä sillä tai vaan vai vaikka kanssa mukaan noin poikki yli kun niin nyt itse tm/inst/stopwords/german.dat0000644000175100001440000000250512156574724015713 0ustar hornikusersaber alle allem allen aller alles als also am an ander andere anderem anderen anderer anderes anderm andern anderr anders auch auf aus bei bin bis bist da damit dann der den des dem die das daß derselbe derselben denselben desselben demselben dieselbe dieselben dasselbe dazu dein deine deinem deinen deiner deines denn derer dessen dich dir du dies diese diesem diesen dieser dieses doch dort durch ein eine einem einen einer eines einig einige einigem einigen einiger einiges einmal er ihn ihm es etwas euer eure eurem euren eurer eures für gegen gewesen hab habe haben hat hatte hatten hier hin hinter ich mich mir ihr ihre ihrem ihren ihrer ihres euch im in indem ins ist jede jedem jeden jeder jedes jene jenem jenen jener jenes jetzt kann kein keine keinem keinen keiner keines können könnte machen man manche manchem manchen mancher manches mein meine meinem meinen meiner meines mit muss musste nach nicht nichts noch nun nur ob oder ohne sehr sein seine seinem seinen seiner seines selbst sich sie ihnen sind so solche solchem solchen solcher solches soll sollte sondern sonst über um und uns unse unsem unsen unser unses unter viel vom von vor während war waren warst was weg weil weiter welche welchem welchen welcher welches wenn werde werden wie wieder will wir wird wirst wo wollen wollte würde würden zu zum zur zwar zwischen tm/inst/stopwords/catalan.dat0000644000175100001440000001066412074065306016040 0ustar hornikusersa abans abans-d'ahir abintestat ací adesiara adés adéu adàgio ah ahir ai aitambé aitampoc aitan aitant aitantost aixà això així aleshores algun alguna algunes alguns algú alhora allà allèn allò allí almenys alto altra altre altres altresí altri alça al·legro amargament amb ambdues ambdós amunt amén anc andante andantino anit ans antany apa aprés aqueix aqueixa aqueixes aqueixos aqueixs aquell aquella aquelles aquells aquest aquesta aquestes aquests aquèn aquí ara arran arrera arrere arreu arri arruix atxim au avall avant aviat avui açò bah baix baldament ballmanetes banzim-banzam bastant bastants ben bis bitllo-bitllo bo bé ca cada cal cap car caram catorze cent centes cents cerca cert certa certes certs cinc cinquanta cinquena cinquenes cinquens cinquè com comsevulla contra cordons corrents cric-crac d daixonses daixò dallonses dallò dalt daltabaix damunt darrera darrere davall davant de debades dedins defora dejorn dejús dellà dementre dempeus demés demà des desena desenes desens després dessobre dessota dessús desè deu devers devora deçà diferents dinou dins dintre disset divers diversa diverses diversos divuit doncs dos dotze dues durant ecs eh el ela elis ell ella elles ells els em emperò en enans enant encara encontinent endalt endarrera endarrere endavant endebades endemig endemés endemà endins endintre enfora engir enguany enguanyasses enjús enlaire enlloc enllà enrera enrere ens ensems ensota ensús entorn entre entremig entretant entrò envers envides environs enviró ençà ep ep era eren eres ergo es escar essent esser est esta estada estades estan estant estar estaran estarem estareu estaria estarien estaries estaré estarà estaràs estaríem estaríeu estat estats estava estaven estaves estem estes esteu estic estiguem estigueren estigueres estigues estiguessis estigueu estigui estiguin estiguis estigué estiguérem estiguéreu estigués estiguí estos està estàs estàvem estàveu et etc etcètera ets excepte fins fora foren fores força fos fossin fossis fou fra fui fóra fórem fóreu fóreu fóssim fóssiu gaire gairebé gaires gens girientorn gratis ha hagi hagin hagis haguda hagudes hagueren hagueres haguessin haguessis hagut haguts hagué haguérem haguéreu hagués haguéssim haguéssiu haguí hala han has hauran haurem haureu hauria haurien hauries hauré haurà hauràs hauríem hauríeu havem havent haver haveu havia havien havies havíem havíeu he hem heu hi ho hom hui hàgim hàgiu i igual iguals inclusive ja jamai jo l la leri-leri les li lla llavors llevat lluny llur llurs lo los ls m ma mai mal malament malgrat manco mant manta mantes mantinent mants massa mateix mateixa mateixes mateixos me mentre mentrestant menys mes meu meua meues meus meva meves mi mig mil mitges mitja mitjançant mitjos moixoni molt molta moltes molts mon mos més n na ne ni ningú no nogensmenys només noranta nos nosaltres nostra nostre nostres nou novena novenes novens novè ns nòs nós o oh oi oidà on onsevulga onsevulla onze pas pengim-penjam per perquè pertot però piano pla poc poca pocs poques potser prest primer primera primeres primers pro prompte prop prou puix pus pàssim qual quals qualsevol qualsevulla qualssevol qualssevulla quan quant quanta quantes quants quaranta quart quarta quartes quarts quasi quatre que quelcom qui quin quina quines quins quinze quisvulla què ran re rebé renoi rera rere res retruc s sa salvament salvant salvat se segon segona segones segons seguida seixanta sempre sengles sens sense ser seran serem sereu seria serien series seré serà seràs seríem seríeu ses set setanta setena setenes setens setze setè seu seua seues seus seva seves si sia siau sic siguem sigues sigueu sigui siguin siguis sinó sis sisena sisenes sisens sisè sobre sobretot sol sola solament soles sols som son sos sota sots sou sovint suara sí sóc són t ta tal tals també tampoc tan tanmateix tant tanta tantes tantost tants te tercer tercera terceres tercers tes teu teua teues teus teva teves ton tos tost tostemps tot tota total totes tothom tothora tots trenta tres tret tretze tu tururut u uf ui uix ultra un una unes uns up upa us va vagi vagin vagis vaig vair vam van vares vas vau vem verbigràcia vers vet veu vint vora vos vosaltres vostra vostre vostres vostè vostès vuit vuitanta vuitena vuitenes vuitens vuitè vés vàreig vàrem vàreu vós xano-xano xau-xau xec érem éreu és ésser àdhuc àlies ça ço òlim ídem últim última últimes últims únic única únics úniques tm/inst/stopwords/romanian.dat0000644000175100001440000000341712327630227016240 0ustar hornikusersa abia acea aceasta această această aceea aceia acel acela acelaşi acelaşi acele acelea aceluiaşi acest acesta aceste acestea acestei aceşti aceştia acestor acestora acestui acolo acum adică ai aia aici al ăla alături ale alt alta altă altceva alte altele altfel alţi alţii altul am anume apoi ar are aş aşa asemenea asta astăzi astfel asupra atare atât atâta atâtea atâţi atâţia aţi atît atîti atîţia atunci au avea avem avut azi ba bine ca că cam când care căreia cărora căruia cât câtă câte câţi către ce cea ceea cei ceilalţi cel cele celelalte celor ceva chiar ci cînd cine cineva cît cîte cîteva cîţi cîţiva cu cui cum cumva da daca dacă dar de deasupra decât deci decît deja deşi despre din dintr dintre doar după ea ei el ele era este eu fără fecăreia fel fi fie fiecare fiecărui fiecăruia fiind foarte fost i-au iar ieri îi îl îmi împotriva în în înainte înapoi înca încît însă însă însuşi într între între îşi îţi l-am la le li lor lui mă mai mare mereu mod mult multă multe mulţi ne nici niciodata nimeni nimic nişte noi noştri noştri nostru nouă nu numai o oarecare oarece oarecine oarecui or orice oricum până pe pentru peste pînă plus poată prea prin printr-o puţini s-ar sa să să-i să-mi să-şi să-ţi săi sale sau său se şi sînt sîntem sînteţi spre sub sunt suntem sunteţi te ţi toată toate tocmai tot toţi totul totuşi tu tuturor un una unde unei unele uneori unii unor unui unul va vă voi vom vor vreo vreun tm/inst/stopwords/danish.dat0000644000175100001440000000065012156574721015704 0ustar hornikusersog i jeg det at en den til er som på de med han af for ikke der var mig sig men et har om vi min havde ham hun nu over da fra du ud sin dem os op man hans hvor eller hvad skal selv her alle vil blev kunne ind når være dog noget ville jo deres efter ned skulle denne end dette mit også under have dig anden hende mine alt meget sit sine vor mod disse hvis din nogle hos blive mange ad bliver hendes været thi jer sådan tm/inst/stopwords/SMART.dat0000644000175100001440000000700512074065306015316 0ustar hornikusersa a's able about above according accordingly across actually after afterwards again against ain't all allow allows almost alone along already also although always am among amongst an and another any anybody anyhow anyone anything anyway anyways anywhere apart appear appreciate appropriate are aren't around as aside ask asking associated at available away awfully b be became because become becomes becoming been before beforehand behind being believe below beside besides best better between beyond both brief but by c c'mon c's came can can't cannot cant cause causes certain certainly changes clearly co com come comes concerning consequently consider considering contain containing contains corresponding could couldn't course currently d definitely described despite did didn't different do does doesn't doing don't done down downwards during e each edu eg eight either else elsewhere enough entirely especially et etc even ever every everybody everyone everything everywhere ex exactly example except f far few fifth first five followed following follows for former formerly forth four from further furthermore g get gets getting given gives go goes going gone got gotten greetings h had hadn't happens hardly has hasn't have haven't having he he's hello help hence her here here's hereafter hereby herein hereupon hers herself hi him himself his hither hopefully how howbeit however i i'd i'll i'm i've ie if ignored immediate in inasmuch inc indeed indicate indicated indicates inner insofar instead into inward is isn't it it'd it'll it's its itself j just k keep keeps kept know knows known l last lately later latter latterly least less lest let let's like liked likely little look looking looks ltd m mainly many may maybe me mean meanwhile merely might more moreover most mostly much must my myself n name namely nd near nearly necessary need needs neither never nevertheless new next nine no nobody non none noone nor normally not nothing novel now nowhere o obviously of off often oh ok okay old on once one ones only onto or other others otherwise ought our ours ourselves out outside over overall own p particular particularly per perhaps placed please plus possible presumably probably provides q que quite qv r rather rd re really reasonably regarding regardless regards relatively respectively right s said same saw say saying says second secondly see seeing seem seemed seeming seems seen self selves sensible sent serious seriously seven several shall she should shouldn't since six so some somebody somehow someone something sometime sometimes somewhat somewhere soon sorry specified specify specifying still sub such sup sure t t's take taken tell tends th than thank thanks thanx that that's thats the their theirs them themselves then thence there there's thereafter thereby therefore therein theres thereupon these they they'd they'll they're they've think third this thorough thoroughly those though three through throughout thru thus to together too took toward towards tried tries truly try trying twice two u un under unfortunately unless unlikely until unto up upon us use used useful uses using usually uucp v value various very via viz vs w want wants was wasn't way we we'd we'll we're we've welcome well went were weren't what what's whatever when whence whenever where where's whereafter whereas whereby wherein whereupon wherever whether which while whither who who's whoever whole whom whose why will willing wish with within without won't wonder would would wouldn't x y yes yet you you'd you'll you're you've your yours yourself yourselves z zero tm/inst/stopwords/spanish.dat0000644000175100001440000000412212156574730016101 0ustar hornikusersde la que el en y a los del se las por un para con no una su al lo como más pero sus le ya o este sí porque esta entre cuando muy sin sobre también me hasta hay donde quien desde todo nos durante todos uno les ni contra otros ese eso ante ellos e esto mí antes algunos qué unos yo otro otras otra él tanto esa estos mucho quienes nada muchos cual poco ella estar estas algunas algo nosotros mi mis tú te ti tu tus ellas nosotras vosotros vosotras os mío mía míos mías tuyo tuya tuyos tuyas suyo suya suyos suyas nuestro nuestra nuestros nuestras vuestro vuestra vuestros vuestras esos esas estoy estás está estamos estáis están esté estés estemos estéis estén estaré estarás estará estaremos estaréis estarán estaría estarías estaríamos estaríais estarían estaba estabas estábamos estabais estaban estuve estuviste estuvo estuvimos estuvisteis estuvieron estuviera estuvieras estuviéramos estuvierais estuvieran estuviese estuvieses estuviésemos estuvieseis estuviesen estando estado estada estados estadas estad he has ha hemos habéis han haya hayas hayamos hayáis hayan habré habrás habrá habremos habréis habrán habría habrías habríamos habríais habrían había habías habíamos habíais habían hube hubiste hubo hubimos hubisteis hubieron hubiera hubieras hubiéramos hubierais hubieran hubiese hubieses hubiésemos hubieseis hubiesen habiendo habido habida habidos habidas soy eres es somos sois son sea seas seamos seáis sean seré serás será seremos seréis serán sería serías seríamos seríais serían era eras éramos erais eran fui fuiste fue fuimos fuisteis fueron fuera fueras fuéramos fuerais fueran fuese fueses fuésemos fueseis fuesen siendo sido tengo tienes tiene tenemos tenéis tienen tenga tengas tengamos tengáis tengan tendré tendrás tendrá tendremos tendréis tendrán tendría tendrías tendríamos tendríais tendrían tenía tenías teníamos teníais tenían tuve tuviste tuvo tuvimos tuvisteis tuvieron tuviera tuvieras tuviéramos tuvierais tuvieran tuviese tuvieses tuviésemos tuvieseis tuviesen teniendo tenido tenida tenidos tenidas tened tm/tests/0000755000175100001440000000000013065660374012064 5ustar hornikuserstm/tests/testthat.R0000644000175100001440000000006013065660374014043 0ustar hornikuserslibrary(testthat) library(tm) test_check("tm") tm/tests/testthat/0000755000175100001440000000000013204066220013705 5ustar hornikuserstm/tests/testthat/test-TermDocumentMatrix.R0000644000175100001440000000243213065660374020620 0ustar hornikuserscontext("Term-document matrices") test_that("construction works", { vs <- VectorSource(c("one two two three three three", "This is a short text with a few words")) scorpus <- Corpus(vs) vcorpus <- VCorpus(vs) ms <- TermDocumentMatrix(scorpus) mv <- TermDocumentMatrix(vcorpus) terms <- c("few", "one", "short", "text", "this", "three", "two", "with", "words") docs <- c("1", "2") expect_equal(sort(Terms(ms)), terms) expect_equal(sort(Terms(mv)), terms) expect_equal(Docs(ms), docs) expect_equal(Docs(mv), docs) m <- matrix(c(0, 1, 0, 0, 0, 3, 2, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1), ncol = 2, dimnames = list("Terms" = terms, "Docs" = docs)) expect_equal(as.matrix(ms[order(Terms(ms)), ]), m) expect_equal(as.matrix(mv), m) }) test_that("construction with control arguments works", { vs <- VectorSource("one two two three three three") scorpus <- Corpus(vs) vcorpus <- VCorpus(vs) docs <- "1" ctrl <- list(dictionary = c("three", "two", "zero")) ms <- TermDocumentMatrix(scorpus, ctrl) mv <- TermDocumentMatrix(vcorpus, ctrl) m <- matrix(c(3, 2, 0), dimnames = list("Terms" = ctrl$dictionary, "Docs" = docs)) expect_equal(as.matrix(ms[order(Terms(ms)), ]), m) expect_equal(as.matrix(mv), m) }) tm/tests/testthat/test-Source.R0000644000175100001440000000117213110235234016245 0ustar hornikuserscontext("Sources") test_that("DataframeSource works", { txt <- c("First document.", "Second document.") dm1 <- 1:2 dm2 <- letters[1:2] df <- data.frame(doc_id = c("doc_1", "doc_2"), text = txt, dmeta1 = dm1, dmeta2 = dm2, stringsAsFactors = FALSE) ds <- DataframeSource(df) scorpus <- Corpus(ds) vcorpus <- VCorpus(ds) expect_equal(as.character(scorpus[[2]]), as.character(vcorpus[[2]])) expect_equal(as.character(scorpus[[2]]), txt[2]) expect_equal(meta(scorpus), meta(vcorpus)) expect_equal(meta(scorpus), data.frame(dmeta1 = dm1, dmeta2 = dm2, stringsAsFactors = FALSE)) }) tm/src/0000755000175100001440000000000013204065716011503 5ustar hornikuserstm/src/tokenizer.cpp0000644000175100001440000000104713204065716014223 0ustar hornikusers// [[Rcpp::depends(BH)]] #include #include using namespace Rcpp; // [[Rcpp::export]] StringVector Boost_Tokenizer(const StringVector strings) { std::vector tokens; for (unsigned int index = 0; index < strings.size(); index++) { std::string s = std::string(strings(index)); boost::tokenizer<> tok(s); for (boost::tokenizer<>::iterator it = tok.begin(); it != tok.end(); ++it) tokens.push_back(*it); } return wrap(tokens); } tm/src/copy.c0000644000175100001440000000012713204065716012621 0ustar hornikusers#include void _tm_copyCorpus(SEXP x, SEXP y) { copyVector(x, y); } tm/src/scan.c0000644000175100001440000000312713204065716012576 0ustar hornikusers#include #include #include int is_space_or_punct(int c) { return(isspace(c) || ispunct(c)); } SEXP _tm_scan(SEXP x, SEXP which) { SEXP y, this; Rboolean skip; int size = 256, i, j, nb = 0, ne = 0, u, v, w; int *beg, *end; const char *s; char c, *t, *p; cetype_t e; int (*test) () = isspace; if(LENGTH(which) > 0) { PROTECT(this = AS_INTEGER(which)); w = INTEGER(this)[0]; if(w == 1) test = is_space_or_punct; UNPROTECT(1); } if(LENGTH(x) < 1) error("invalid '%s' argument", "x"); PROTECT(x = AS_CHARACTER(x)); this = STRING_ELT(x, 0); if(this == NA_STRING) { UNPROTECT(1); return NA_STRING; } beg = Calloc(size, int); end = Calloc(size, int); e = getCharCE(this); s = CHAR(this); i = 0; skip = TRUE; while((c = *s++) != '\0') { if(skip && !test(c)) { skip = FALSE; if(nb >= size) { if(size > INT_MAX / 2) error("too many items"); size *= 2; beg = Realloc(beg, size, int); end = Realloc(end, size, int); } beg[nb] = i; nb++; } else if(!skip && test(c)) { skip = TRUE; end[ne] = i - 1; ne++; } i++; } if(ne < nb) end[ne] = i - 1; PROTECT(y = NEW_CHARACTER(nb)); s = CHAR(this); v = -1; for(i = 0; i < nb; i++) { u = beg[i]; s += (u - v - 1); v = end[i]; w = v - u + 1; p = t = (char *) R_alloc(w + 1, sizeof(char)); for(j = 0; j < w; j++) { *t++ = *s++; } *t = '\0'; SET_STRING_ELT(y, i, mkCharCE(p, e)); } Free(beg); Free(end); UNPROTECT(2); return y; } tm/src/tdm.cpp0000644000175100001440000000460413204065716012777 0ustar hornikusers// [[Rcpp::depends(BH)]] // [[Rcpp::plugins(cpp11)]] #include #include using namespace Rcpp; // [[Rcpp::export]] List tdm(const StringVector strings, const bool remove_digits, const std::vector stopwords, const std::vector dictionary, const unsigned int min_term_freq, const unsigned int max_term_freq, const unsigned int min_word_length, const unsigned int max_word_length) { unsigned int column = 1; std::map line, terms_pos; std::set dict(dictionary.begin(), dictionary.end()), sw(stopwords.begin(), stopwords.end()); std::vector i, j, v; std::vector terms; for (unsigned int index = 0; index < strings.size(); index++) { std::string s = std::string(strings(index)); boost::tokenizer<> tok(s); line.clear(); for (boost::tokenizer<>::iterator it = tok.begin(); it != tok.end(); ++it) { std::string token = *it; if (remove_digits) token.erase( std::remove_if(token.begin(), token.end(), &isdigit), token.end()); if ((dict.empty() || dict.count(token)) && min_word_length <= token.length() && token.length() <= max_word_length && !sw.count(token)) line[token]++; } for (std::map::iterator it = line.begin(); it != line.end(); ++it) { std::string term = it->first; unsigned int freq = it->second; if (min_term_freq <= freq && freq <= max_term_freq) { if (!terms_pos.count(term)) { terms_pos[term] = column++; terms.push_back(term); } i.push_back(terms_pos[term]); j.push_back(index + 1); v.push_back(freq); } } } for (const std::string &term : dictionary) if (std::find(terms.begin(), terms.end(), term) == terms.end()) terms.push_back(term); return List::create(Named("i") = i, Named("j") = j, Named("v") = v, Named("terms") = terms); } tm/src/remove.c0000644000175100001440000000151113204065716013142 0ustar hornikusers#include #include #include SEXP _tm_remove_chars(SEXP x, SEXP which) { SEXP y, this; int n, i, w; const char *s; char c, *t, *p; cetype_t e; int (*test) () = ispunct; if(LENGTH(which) > 0) { PROTECT(this = AS_INTEGER(which)); w = INTEGER(this)[0]; if(w == 1) test = isdigit; UNPROTECT(1); } PROTECT(x = AS_CHARACTER(x)); n = LENGTH(x); PROTECT(y = NEW_CHARACTER(n)); for(i = 0; i < n; i++) { this = STRING_ELT(x, i); if(this == NA_STRING) { SET_STRING_ELT(y, i, NA_STRING); continue; } e = getCharCE(this); s = CHAR(this); t = p = (char *) R_alloc(strlen(s) + 1, sizeof(char)); while((c = *s++) != '\0') { if(!test(c)) *t++ = c; } *t = '\0'; SET_STRING_ELT(y, i, mkCharCE(p, e)); } UNPROTECT(2); return y; } tm/src/init.c0000644000175100001440000000156413204065716012620 0ustar hornikusers#include #include #include void _tm_copyCorpus(SEXP x, SEXP y); SEXP _tm_remove_chars(SEXP x, SEXP which); SEXP _tm_scan(SEXP x, SEXP which); SEXP _tm_tdm(SEXP stringsSEXP, SEXP remove_digitsSEXP, SEXP stopwordsSEXP, SEXP dictionarySEXP, SEXP min_term_freqSEXP, SEXP max_term_freqSEXP, SEXP min_word_lengthSEXP, SEXP max_word_lengthSEXP); SEXP _tm_Boost_Tokenizer(SEXP stringsSEXP); static const R_CallMethodDef CallEntries[] = { {"_tm_copyCorpus", (DL_FUNC) &_tm_copyCorpus, 2}, {"_tm_remove_chars", (DL_FUNC) &_tm_remove_chars, 2}, {"_tm_scan", (DL_FUNC) &_tm_scan, 2}, {"_tm_tdm", (DL_FUNC) &_tm_tdm, 8}, {"_tm_Boost_Tokenizer", (DL_FUNC) &_tm_Boost_Tokenizer, 1}, {NULL, NULL, 0} }; void R_init_tm(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } tm/src/RcppExports.cpp0000644000175100001440000000410313204065716014476 0ustar hornikusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; // tdm List tdm(const StringVector strings, const bool remove_digits, const std::vector stopwords, const std::vector dictionary, const unsigned int min_term_freq, const unsigned int max_term_freq, const unsigned int min_word_length, const unsigned int max_word_length); RcppExport SEXP _tm_tdm(SEXP stringsSEXP, SEXP remove_digitsSEXP, SEXP stopwordsSEXP, SEXP dictionarySEXP, SEXP min_term_freqSEXP, SEXP max_term_freqSEXP, SEXP min_word_lengthSEXP, SEXP max_word_lengthSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const StringVector >::type strings(stringsSEXP); Rcpp::traits::input_parameter< const bool >::type remove_digits(remove_digitsSEXP); Rcpp::traits::input_parameter< const std::vector >::type stopwords(stopwordsSEXP); Rcpp::traits::input_parameter< const std::vector >::type dictionary(dictionarySEXP); Rcpp::traits::input_parameter< const unsigned int >::type min_term_freq(min_term_freqSEXP); Rcpp::traits::input_parameter< const unsigned int >::type max_term_freq(max_term_freqSEXP); Rcpp::traits::input_parameter< const unsigned int >::type min_word_length(min_word_lengthSEXP); Rcpp::traits::input_parameter< const unsigned int >::type max_word_length(max_word_lengthSEXP); rcpp_result_gen = Rcpp::wrap(tdm(strings, remove_digits, stopwords, dictionary, min_term_freq, max_term_freq, min_word_length, max_word_length)); return rcpp_result_gen; END_RCPP } // Boost_Tokenizer StringVector Boost_Tokenizer(const StringVector strings); RcppExport SEXP _tm_Boost_Tokenizer(SEXP stringsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const StringVector >::type strings(stringsSEXP); rcpp_result_gen = Rcpp::wrap(Boost_Tokenizer(strings)); return rcpp_result_gen; END_RCPP } tm/NAMESPACE0000644000175100001440000002012113176776374012150 0ustar hornikusersuseDynLib("tm", .registration = TRUE) importFrom("NLP", "content", "content<-", "meta", "meta<-", "words", "as.Token_Tokenizer", "is.Span_Tokenizer", "Token_Tokenizer", "TaggedTextDocument") importFrom("Rcpp", "evalCpp") importFrom("graphics", "abline", "plot") importFrom("parallel", "parLapply") importFrom("stats", "coef", "cor", "lm", "setNames") importFrom("utils", "download.file", "getS3method", "unzip") importFrom("slam", "as.simple_triplet_matrix", "col_sums", "crossapply_simple_triplet_matrix", "read_stm_MC", "rollup", "row_sums", "simple_triplet_matrix") importFrom("xml2", "read_xml", "xml_contents", "xml_find_all", "xml_missing", "xml_text") export("as.DocumentTermMatrix", "as.TermDocumentMatrix", "as.VCorpus", "Boost_tokenizer", "content_transformer", "Corpus", "DataframeSource", "DirSource", "Docs", "DocumentTermMatrix", "DublinCore", "DublinCore<-", "eoi", "FunctionGenerator", "getElem", "getMeta", "Heaps_plot", "findAssocs", "findFreqTerms", "findMostFreqTerms", "getReaders", "getSources", "getTokenizers", "getTransformations", "inspect", "MC_tokenizer", "nDocs", "nTerms", "PCorpus", "pGetElem", "PlainTextDocument", "read_dtm_Blei_et_al", "read_dtm_MC", "readDataframe", "readDOC", "reader", "readPlain", "readReut21578XML", "readReut21578XMLasPlain", "readRCV1", "readRCV1asPlain", "readPDF", "readTagged", "readXML", "removeNumbers", "removePunctuation", "removeSparseTerms", "removeWords", "scan_tokenizer", "SimpleCorpus", "SimpleSource", "stemCompletion", "stemDocument", "stepNext", "stopwords", "stripWhitespace", "TermDocumentMatrix", "termFreq", "Terms", "tm_filter", "tm_index", "tm_map", "tm_parLapply", "tm_parLapply_engine", "tm_reduce", "tm_term_score", "URISource", "VCorpus", "VectorSource", "WeightFunction", "weightTf", "weightTfIdf", "weightBin", "weightSMART", "writeCorpus", "XMLSource", "XMLTextDocument", "Zipf_plot", "ZipSource") S3method("removeNumbers", "character") S3method("removePunctuation", "character") S3method("removeWords", "character") S3method("stemDocument", "character") S3method("stripWhitespace", "character") S3method("words", "character") S3method("[", "DataframeSource") S3method("[[", "DataframeSource") S3method("getElem", "DataframeSource") S3method("getMeta", "DataframeSource") S3method("pGetElem", "DataframeSource") S3method("[", "DirSource") S3method("[[", "DirSource") S3method("getElem", "DirSource") S3method("pGetElem", "DirSource") S3method("[", "DocumentTermMatrix") S3method("c", "DocumentTermMatrix") S3method("dimnames<-", "DocumentTermMatrix") S3method("findAssocs", "DocumentTermMatrix") S3method("findMostFreqTerms", "DocumentTermMatrix") S3method("inspect", "DocumentTermMatrix") S3method("plot", "DocumentTermMatrix") S3method("print", "DocumentTermMatrix") S3method("t", "DocumentTermMatrix") S3method("tm_term_score", "DocumentTermMatrix") S3method("as.VCorpus", "list") S3method("tm_term_score", "term_frequency") S3method("[", "PCorpus") S3method("[[", "PCorpus") S3method("[[<-", "PCorpus") S3method("as.list", "PCorpus") S3method("content", "PCorpus") S3method("format", "PCorpus") S3method("inspect", "PCorpus") S3method("length", "PCorpus") S3method("meta", "PCorpus") S3method("meta<-", "PCorpus") S3method("names", "PCorpus") S3method("names<-", "PCorpus") S3method("print", "PCorpus", .print_via_format) S3method("TermDocumentMatrix", "PCorpus") S3method("tm_filter", "PCorpus") S3method("tm_index", "PCorpus") S3method("tm_map", "PCorpus") S3method("as.character", "PlainTextDocument") S3method("content", "PlainTextDocument") S3method("content<-", "PlainTextDocument") S3method("format", "PlainTextDocument") S3method("meta", "PlainTextDocument") S3method("meta<-", "PlainTextDocument") S3method("print", "PlainTextDocument", .print_via_format) S3method("removeNumbers", "PlainTextDocument") S3method("removePunctuation", "PlainTextDocument") S3method("removeWords", "PlainTextDocument") S3method("stemDocument", "PlainTextDocument") S3method("stripWhitespace", "PlainTextDocument") S3method("tm_term_score", "PlainTextDocument") S3method("words", "PlainTextDocument") S3method("[", "SimpleCorpus") S3method("[[", "SimpleCorpus") S3method("[[<-", "SimpleCorpus") S3method("as.list", "SimpleCorpus") S3method("content", "SimpleCorpus") S3method("format", "SimpleCorpus") S3method("inspect", "SimpleCorpus") S3method("length", "SimpleCorpus") S3method("meta", "SimpleCorpus") S3method("meta<-", "SimpleCorpus") S3method("names", "SimpleCorpus") S3method("print", "SimpleCorpus", .print_via_format) S3method("TermDocumentMatrix", "SimpleCorpus") S3method("tm_filter", "SimpleCorpus") S3method("tm_index", "SimpleCorpus") S3method("tm_map", "SimpleCorpus") S3method("close", "SimpleSource") S3method("eoi", "SimpleSource") S3method("length", "SimpleSource") S3method("open", "SimpleSource") S3method("reader", "SimpleSource") S3method("stepNext", "SimpleSource") S3method("c", "TermDocumentMatrix") S3method("[", "TermDocumentMatrix") S3method("dimnames<-", "TermDocumentMatrix") S3method("findAssocs", "TermDocumentMatrix") S3method("findMostFreqTerms", "TermDocumentMatrix") S3method("inspect", "TermDocumentMatrix") S3method("plot", "TermDocumentMatrix") S3method("print", "TermDocumentMatrix") S3method("t", "TermDocumentMatrix") S3method("tm_term_score", "TermDocumentMatrix") S3method("c", "term_frequency") S3method("findMostFreqTerms", "term_frequency") S3method("c", "TextDocument") S3method("inspect", "TextDocument") S3method("print", "TextDocumentMeta") S3method("[", "URISource") S3method("[[", "URISource") S3method("getElem", "URISource") S3method("pGetElem", "URISource") S3method("[", "VCorpus") S3method("[[", "VCorpus") S3method("[[<-", "VCorpus") S3method("as.list", "VCorpus") S3method("as.VCorpus", "VCorpus") S3method("c", "VCorpus") S3method("content", "VCorpus") S3method("format", "VCorpus") S3method("inspect", "VCorpus") S3method("length", "VCorpus") S3method("meta", "VCorpus") S3method("meta<-", "VCorpus") S3method("names", "VCorpus") S3method("names<-", "VCorpus") S3method("print", "VCorpus", .print_via_format) S3method("TermDocumentMatrix", "VCorpus") S3method("tm_filter", "VCorpus") S3method("tm_index", "VCorpus") S3method("tm_map", "VCorpus") S3method("[", "VectorSource") S3method("[[", "VectorSource") S3method("getElem", "VectorSource") S3method("pGetElem", "VectorSource") S3method("getElem", "XMLSource") S3method("as.character", "XMLTextDocument") S3method("content", "XMLTextDocument") S3method("content<-", "XMLTextDocument") S3method("format", "XMLTextDocument") S3method("meta", "XMLTextDocument") S3method("meta<-", "XMLTextDocument") S3method("print", "XMLTextDocument", .print_via_format) S3method("close", "ZipSource") S3method("getElem", "ZipSource") S3method("open", "ZipSource") S3method("pGetElem", "ZipSource") S3method("as.DocumentTermMatrix", "DocumentTermMatrix") S3method("as.DocumentTermMatrix", "TermDocumentMatrix") S3method("as.DocumentTermMatrix", "default") S3method("as.DocumentTermMatrix", "term_frequency") S3method("as.DocumentTermMatrix", "textcnt") S3method("as.TermDocumentMatrix", "TermDocumentMatrix") S3method("as.TermDocumentMatrix", "DocumentTermMatrix") S3method("as.TermDocumentMatrix", "default") S3method("as.TermDocumentMatrix", "term_frequency") S3method("as.TermDocumentMatrix", "textcnt") S3method("Docs", "DocumentTermMatrix") S3method("Docs", "TermDocumentMatrix") S3method("Terms", "DocumentTermMatrix") S3method("Terms", "TermDocumentMatrix") S3method("nDocs", "DocumentTermMatrix") S3method("nDocs", "TermDocumentMatrix") S3method("nTerms", "DocumentTermMatrix") S3method("nTerms", "TermDocumentMatrix") tm/data/0000755000175100001440000000000012315572766011637 5ustar hornikuserstm/data/crude.rda0000644000175100001440000002701213204065716013421 0ustar hornikusers}r#G-iF;vǎpFG!lB6HCje*A԰P諹¯+?~wNfUD8c{=KFH @Vɓ;'/ƣOǏ5>n|F~E`K__>&q=MZut2*3ƨ< B@EN/3'qj?W4MI* #5ZxΦ 4SZt)NJMPIFi_e<,rx<+jh;sNMQMitey2fTY\gjiaߍ«I(iyX,M-LMjnumS^'SLfɵTǵ8o>Ό^Nؾx{=EIʏ,.?2>zIY~h547rO[&Y>|/O?Xi@ : |Á^\q7X#+lc?=Y?^tVy|rcݽqLW\/H|y@&/r=7*03o?bzG0~W UG,3ye(tS*IП"M$=$r̼'4 6-QI_LM\ ,dDXaGu0]b=~:}?uIz,ԈO"Ԙޡ7^OHFo k2$u|6(O_`*#|Sshr tHІ(l0,O$u+aJaI`% L-<)A|YhA-.uاA+ &/%)"rVK(2ӌ4HqSaJ$< wa2eDu,iEGVqZ"CFR /{?KjVfjjQ>@F|iVD_]ܞ{t 3ܘD@:'/yu(ɓkZD tW/҄VEqLXSlbj0*=m=Ō=== /oZ}p)VWd+CڛiءD|B m61v&qJVIVQxM:"`DMDԄ4KL4tguґh=YՓz1_ILHE6 Q6ӳr&&d|пW{ǍŌ 7Y4͍μvٸ3ɓ!_FD^ld2.u}T79]G|n ,M14 Z,\Nj{^'B&En\ katJ2&!fD"ՊXy/ $9O[0[-Sr FHb4NR>58L?:Yb0cϒtVT'hb8,9&i DY#%&RҰ֎aP)l1 Ҥ[X+P2Yְ)Z%3]֯`aN2/2 D,r:7/^a@<M7-9Oߜw6m?VGzjW_ZOSmY`x|q_UwY~?J,C'9LaŹ7+e7Odr>aYHey,ly9':."Eri2%KlLMF;BK/)WAIƹQQV'}{4ķ,%IM<'3 O0ңHT 撧rq/䩙cvQ2htRG7%i왚Ok "m?y'<pa89B=kxP`H|:Mbл4`xyZu'a`u1wVփh|sO2aLbi D2oV"1uAADXRʚ3I5 m@! }daŜibo579z:QHƷc=գَ`J|y >;XPuQzrVK٘5rguvMn HMوCzБ?+3,X"PlƤg<ܴ#|4 d37),liN1t{{#$3]e p?-(͑o~KeL< *fD% dZ& 3r'g^ui\qQld'#՛t LM)%ȩ+̕^B 12rH? k+y{--Ԧ$ͥmlMYb w^Cda[PwDlfU'!H#{vcspX/X&0w e(җi>=^)De>Xgk9Yks2c:œIIS%#ŐjR 3c"'U+hΦ&-<5'*9ϜƏ3XQq. \PeɖBX'N;A  &$hl{KhTn]b̼~o~K")+5h![ǶQjeZ2UBWi2XpD<dyL^"li#ҸHF˻2A$^yeR4':xybYyú/dvGl[ȐkiPJv'lG,hmYLس >0|cTd\E~-cH X3«ӒaZr hV֔"Ouf|vhH;>a +֋RXp3$&I," 0`~,t~4T'h";N٬#-v<Pqds[OٍEq2 "ޔC pl{ŏ:/9qc3ޱiƷ:C5:? aBN{/{g^ɷ[ۻoo9?ۑkxFDGc}|;&Or<԰ gm@N42?[Cv&,sUDG(gyZM2]HKrzSr2x 6OP"*R) bY€2 4RXy,Τ<3BH(QBȳ0.c9\.I;7ıxu:B쒘`V_{ə GT%tVp[2@QN|  2A.a.$d@y/0vTfRh['[%wqt5(,Rmg*o賁Co+@vg-Xwqg'9 |w +9:Iqjma<ⱪ]TpO<ʅΖ P&l)Y6e]>} -Yr{4;ڲ&[D_dN|Β!HKe-"ep+~c DX,Yݛ0k&ķ">\3ceUY33; 871;K`ވܸ\k'"n}dl68z>$sO[Ƽ^q셎#zCBfŔC%W6=aSY5¶ P@;ޛ£M$E^^d7a#D(uA!{ɗ%Y`@Q@%#Y*Ua{\&  k|߬Ry𣉍%Er)zi% q\ȃ\4 6w73'Ӭ2[V~Wy48l@lw;P3+l'_TX^Py#c)LNXK΋! i+lsgc=˿%m 0a)B$ |-%`q.3!xK<%;c]Lړ)'4 (P^ΡW*(!F5SNeP0IBI)YUreNO⍚˸Q2P$-&zM{9\rPX/uh=BL!TBZMV-1 Q;Ew^q~2q$L݊Db!#T,e7M='\AM-ѯ`EEV @B 1<oonVZ\6T-3ETbERE^"NL4[ᘻi .eI `Z֙b!F1y7p?W}x>>;5Tg9w=rqWu:g/{ٺgVHY»3) 7>?"[H%QCI>ׅAI@ĦS.mp錈Yu|¢Ix,JNIZI*ةh"4@{Y붺l]keJ %pH,qEI$D]{9ns6jiZN]/^fӫܯo~;CED '* WddCp'K\@Qd\ d5$:@. '8y+;ow7_>k/ }>8;rEM%/,}VWɒ+Xq7T$26V"CO]%Ac/{CyX :ֳ^-KaeqP`x$D5L5W6"HOӥ`Omg1 OV :L3'C*[udpީ;p)q-SPX#,H^X u'l `"uyjTG(Zƌ?2,ޡuuX6iٿI)`~i OY+y! ,/ҎB2LEN^~' IMp@zeU$ժM\q|(a'˗Xڐ{yt:-;/[`- KH 3#J4A(qQWI!1.VSaP/q`L^gQ1mKPV^R Wı#_zR9ێ=a9 =(z{VQ+ ,ZY~480<9 \ 1Y6gbOs;\Qa,c t=KM\릌Q.ˆ(Iq=q5bzgqج qrsC,땄6(Cײ#pO!, *̤ZH,eh$qX{cd0 ( E@9[Ijd,V (OhU% nHuP\qzU2CṵV ݓقz'FM)yz\t^whi8ퟞO{gIOWjs/3:۫$D9͍hućRwj}HAd{\4wkZOӐ :]4kyZ:&Yu;{ !8!R-퀿)TpNgSxiR#T [gK-FXX\sIbʽ#ue@˘Vҹh))QB@G{",Ͼ]͞LBtMsѹ]vNPk#`Ie:rcQ"dQJv~7O1df\ ] ̵s^")*]t(#jooj18 #pc/ o\賭-PKa.D/[xdc PӬ*@L֑>qVDL@[xU`ΒI.[4wڔy&m6t]R\MvGp?Nƃ ki*Hu-i{T Y:EU겇 _ꗠDl lWWŒ5[ oՏ?nH,3)MFk8U=zһP6&Ǎ.Eq(zj˙Cd8E7B_f2[j,ݒﮒ`eJVI7&Zz4jUܤ/@N$|> #Plbn$5끵.HcH2n_TMMԯfG6I if;Ð@,獯Φ]c]Ώ[AB&`ep&-+JD`!;Bǥn  ڴӓnA#ΝYܾ"]^L<ȗcLH7=v  jz39Q|cݙ@=np]hr=%UͳY]͍YmFFRBAY -)I?.M+q$%"]& "D:5ϺVucAPuOށuoICu/:w jhgsu;X>\8;lH'Bmb O{D^!>w58B(@P a"%Yc̆EsOiu3ߊ岹\Rk#"{t\rt[SM6ݽMkH2i,Er"uMzG#^%\6|R̙t*E@ 9- oմmrRLZңSAQb3a OIfE5$(+ιݪ B?mJm7M8(KR6_h-qŚ}e]4r2-]QX;rB~3m̪%|̪UUS36;+4QuKz]飯6ǎk^ikau޶ Tknw /_K{@2_!A[}ƳswV:܏˃}йep(s\miw!$v[{o3lᐙU16A@nغLFBnj;21Ҳ'5:pA[#Hg LH4ܫTH_4׿p ,xJVU*ټ?)Ձ*\|cEf0F%Ux\*fኴJ]U]j\QVZ# HE6:TfƇ+ ,FS0zvAi , c=M|x͕,"F]"+>eK5hffI5ՏU9:{}^BBqEdCZ_&\rT׻].,+XA-&, zr~nNxt4aFi U ڑs6tTIpȈs`:`/Pl&@g*azwk׬)ZM^WYvCHr8)z"%x01*[֦ \=mIG t,yV7=2Hvq! 0*8kSL "sw}T*o{bZ2lbgW.!PYeXj/f|-3dAT S*ۍ!LXKv%9l'o+׽󦚢;Ng`U\(EQ "#O b붣z!'-,S;ϕqVl$3lh'5YvorMDDuz\ݓt5,ɷj|q^-SLnZ>47a&ˁ dmY{T Oh!_[nj9ms=X NtnXC GͫW+w5o^,ԋesڢN8jxw²ONTفn//d'vƢgb-ޞk^7=7?tpUs;fk l{((=n̓#*@QUw={Rpמ޻=yn;pc(TRUw69sX (EtnIPm j!PB݁/m6j @HE*H5PV(X&4THhh4iDLTޓHz'SBL<ҟiChj4O5M=h#CF44h 450#Fd1F!hD!LJ=OSM驈zA=M zD&de<5=) 242JЃ_u<y|.7F} \x^??'+ϭ3'ldhc(<*%'E}P !@ 7h.*y"(X UR R*E (P`p A!$@b *"E B{~sR^GJ?orf_|f'{u>/g]S>~QPPD "!S?4>xnAeFT@ʁ ǁž$HʴДnK$fQWrbݪTQGc;"pyATV@GXY#19͹Yoe @Fu} ~B.|NTBD4bHHB1dcA 6Bj "(b =HT ^djP(F=*\TAAU!  Q5 8W2 *fʊ* H ""H 5b#3qHn? Ҁ!y;= :7pXAb p-D8ƢHBBbLlA @l ;QSXP`D AN$BB B"(`@AA,b1!",DA$@wŷI~:=q3*>\5H&5ja}[C=)Ⱥ@|Y|yz=gl G`sDzOZ_%z?qwi>;/_get]u{;b']WcS׸m%α!);{d]p=vvr,_nʪ#Fͻ5a@ΓoMEboo,~&qѳț]U{4cf{!Qpxfzμ{v_,]{yu0x̖c,6 (!h۫Kܸl䭤 2<zDltHxn>jA"K9wjXyX#O3Pvt&Եbzӹ|v.uO] ;}5qFnގ0wrGvc`{LֹUf/ ^xRn`ONJrw^c0q~ӻ:gQTҚkQ˓Qc/ek 4D^1#Co~3:?"Z.~mϏ+/;{z;,NIJiV+esx(%l< ,|W^Yl,n*iz9hCb<Ц]m8zRx+7ٱ@f؛qQdD oc(6Y伧#Z/5:ns@mvTx.q]C_>'< 71q/DFkErMͧR]ѻGaȇLpI@Qm؋WA@S7燗 /GB>gej8l G$ gX#j<.a3|N6/dH6ȵ$Q̊2#hB4)E('緬sWM]u3ѭԤ$O0|*E~PA}U9bXyLllK0nTa_ >|˾8=媇ͯJ\ VC"ep/AQTYSoɗm0"ݻ鎞U|ү_OՆU(ܮ*[mpuNgf4f!x8Ɗ9AGk:Tx|`3~6'uvA{=ʪCkPvώ ;bp%TN}{vb`go}8b g>%5Hm_h1e4\llw-s6_2 mW79I@HȄ&<0VÂ)LY;p(I6t)CۮMogqeRP<\u25NY* noer!Y+bKc=Uԙf0hI'eҤP^d܊t#Gp:[tR[j-`Α")on`eQ£4 B cm+9z>>{VQ D^!E<33t܋q4{>ΫKB ^]}ԏsϼ\$;kXN<|t8i3ݫqJƖƇ g$Cm4XjXj,N4&0דqoW? DwWauss6[p* zCidwh6^Z&xם7DO9B{4OL+JtEؔ㟉Ԋ^kt2H0eHcyÕUIC'í7E%zHT:1s9N"]7}5b٩(1c?7rk&ZPH^E]Y9+3Pp.Bs饺N[ L͙c1Ae&6{nVqD]Њ- MRyȬom=MSmAΫ:j9gzdVi~:|9PrXb;geó*y>F a!n*Hbʨʊ ׿Q\dY\3!7:D_κS`6=-SMŚ%eqYp6C:$}6D!db]sa%md/NjdH+Xn͌ˆ~ylz]N^tCb2SdiH%&6ON{]5ƪ:@E\0eE9ԉ;dp~\7UAU(1x慗ֹ=$1\0ԢƹH?(52`KkҠ|ghp97E5C BO\IvN*&HIEò|9v&w$FkژnD8#` Fy죿gq\{`Tqͽ;|Mզă 2}* 9r&4&,*AEg;"" o 8-Yrawv|7qZIo͖ ,hl{dÄf+\~kaoYn4QC9# AYWEBN6ȨPD>ٱL:,ȺdN3Ƨ\ $2R @Y\DEd(lK0#6nW0ǥ0YPnZ^BԾ@hDAG*}^+' ڗyu~S V%a(IxpQ|>~*^~u:n]6,Qcֻp"nrTIC*3̟v^B lσMtoMU2G}f.߳7Ϫ*Y7ߍDbU&껶. ,jg&uX\ܶ*aطb7 r\Q!loO.O0|vg D1+c(;VU6ַi-ѲpZvR6;\=Xۛ>АPc L7xu>#@%# vkv]-T}=w7s~Mw=}|pZpPn, XGu|Z 5 ';s~d$">ϯq c/v{x#{qM,O;֩Kd r }'.u( +ReG2B1s㮐"[9^rhH]Yk!a+ql5M`} yXg*^}oyr;N?Je|++r9>>bE!du.qP fnˢpw@s>؈>͞x9sc1Vn/z]!rh^/mEBAeT| @VHc @F`3deca/`3g`4E4bgkƖ\p P9j5V-e%V4U料%),GPxsM;u9~|~ D3"՛E8 CV!UZ4@PRG5p5qNTZι;SPN tc>b`k{Skʛ&Y}U:nW ӦT f{whqPTVb- 8m\vBq0p6Tm-Oڌ7Kg[We>4Qb5gHH:q=C6:U;AyS` qLR0W7x=.\ACaNfSÌUH\r/>=NcnkQn)Pbmr#(Lnyȳf! H {ZOu)1LQtZ3?LWa+Tu>_?̿<NCe_r|`NuVtCx5v}@E~XjcV3Lsf0ra~~Ann$G@8 ^zFOQD8j$&Ƈc< FT۰WM :WJva|-E~W/bT?Wi+l-i>o<;D&I{.x6I+'ԪVTJw)%Wk~|SDRB[$۰z1꛴S˸ ȋf=Aي Ҋ2'lT>FC[?Jg:%VFm 7a!Z9Fnk ԨUW3nwjD}`/'h\|WdгgWn-1{K2\0|9F)ՅYN8g!CasJR1K߭Q3u>!ENvNN'YQ *khgL<:UiPk.D hp3xzӵߜ8g0$=˫"DТ`Dux'S;8:i\ո njŚczzN޾7"}ӽ`p*?U*Tgj~& b@ (ⴜ>zuEHL c2Sj{.sZ7R)ߨ>5DUngm{!}F^Ą!Amu\SHz5'f+ȏ@ "X'KL BCQh7tP;6:Iɇw>R~f3ٞŹjŵ~37Xf|o*}m*?NUnT0y54& #x%9N7/H{`Ε'_?ԠxPU'UTܲBfp#Z{*}vȗΨ} #;ȶ4S 8Api`?eer}Ox@7o#NW7:i~8ŬN$;N&"`I 12 삓 @_Cz:~ Ru0z@Hxx>tqL[)"\!U"cRCb%}mPAHa?iZ 4C=G|\YR KUT>?^h$Q!AGr=}L6e)7%rr6Desb$(v6 ʶCJ^WcRp lx2_n0'd:6 rr/s@?Mz"HN>@tf/ֳh֓h>Q{8\~?k^;Cd_~A\AͻC~-@A$$V*[h*KAeIOW 1`xKfwk8>&#PT?z jIW?*vDd>뭎yɴrS!.8-v0 (Arpa+ ˆw qF. HB UE"7W);:k!i9Cm$${N~XmOKH&d?S~˗ xw=;wJr I*- NgpoڙmYS#w9~hB9fx[D/'eP{H.7ĆT9P]A I&T4?WifW_.ɛ-C0sv-$Й#Izڌ|r`9hbIh0B/l!ᔂȼqEALsOju(KQB;cSU~C7 fxr(c&>/OܒdCG.osM'ц,!LΟ+bd4,S6MhfelzH,yҎɴ墆 zFwbp¢'t+%I&0ŷ\wyIu8Qʪ 퇛䜄ekǔ$rUxW}0Ԥ< B$H1hp#ݠk 9@'8 uC=-kJH 6-(!;9ՈzWˠ=sZx,!Q>6_nǙ_p0~P1BSEtԝIR7;umdMY.RԬV4fD̈067N)yگ4AYT4Wzv;zoiA_7Nptwj"$TPFFgg'<}7_*P0C"V&Qgnj؟bm||^?b &I!D-rh1燓#$\% z?)KPP٭Ӷ}A>/ؖVW'‒//|b FLͅAM}](va[$roJ4d52^Cfc0~^#8&rDRPXĒ'`|hn}U *E=M9N'vDUk$m@5x7 3Zb YDẼLhg7t NQw&s#V3ffئ$)њž'$m2#;\ɨ9.FG0SsFa+ɜA~S7&gG9 E篝xyQ[uVT$쀯",,X, BTEV0R@V@O-p:9PP:CISH:rٻLUI@*, vjCx)Xb0XSA@ scr<7vD^2UZX5t;q mXWVTV(\J[mZ 7]4Mci]W3Z0!l.6,Wv7…0pD0Sh3.Uf)*J 3&5TG30@*ֳ2-7Lփk4ⒺE*Wmeɫ\\jVn 6JZ7TDLr3B6Pd:QV&Q[KB[Q)Pm8Cw\Cv<.c"~ a. SŚNA2}AL*t&e rKNmA|vQark>mz b0z@֩&+Rb8c#;>fȽyoH0@Ye/9A$ %$2]s+EBidc_ٸk -1>0';0\Z8']#xO0Dv  K#`{q밚[QrRN;pBl dS 䱮ZC`iqp9 s D(sӶ1dᘌ !F?S{w8p*28NN=, '*QdO-'-6.TF 9 hrڐpcbGGr*=  N#}l6L 픆 XTmB?ǘRr) ߯iq1 H<5l$I2ĂIðC*nݤ0d 4- i.o ؅PKUӬbΆׄ| &`59]|d8 0$#ٻ`ʼD2Cc㯰\vDʥ! /KijIoE,6N_g%M(9X=kA>c{ǂ˭⎴4۪/`b3!ؗ;S\fc "O٢<[ߞVc!'r(3ѣ8bZj`Z\8K[12Do+*pK'zb5 8*rKAD4#[q}iGHA0Z vnYANe Prl(f[aqYbI1Us|l|iM ;u\{@gJ)H-۲[YW90EIOxu 0 d8b(xGų8"b`z m]zE=JwL@tr@`Λ('N5@s74lom3v @?}*D CX/ʝ$ 5P ɤAN adAi/; ]!5O9倪}{fBf5t){X0AtNX`4 h@IK4@_[żBALk8Yo3U5G >BU$AZk.bzkzSo="Յ~ITrնHp{ DVFnmFQ]Su:{ye{YbܒKvF2BcrgKeتd %J%h҈JbLQU\J&[0L’eQEnYrAxW n +1G$Qu;"@I( B-$Q@X)AF2AIB-XLCIXEfBB([dPC4#Nmed5x@61I LVFMf9xŸh&<kG9==I6d)8N Ҷ7, nD,;PvgpȗBibR'[>zyI0[q5ˉ~Y-uO3ep$GYҡm)BNMyc~#=֜" p?A/8@T)INܿnvJ7LV+X3,rw>LXQ/>QTA|ai%Vo;P硌aU,Cp& öv9m`&T D>yRy o' zGg'p+LKh leACL&313~B$6E&F{ 4 A2ˢbW #Auqd -V2 oL4:.bH!yZ" D0/-`7z @{QonETvzp`C.Rz '-`@ W(\t`!B7%ށׇ]䝕J$ uם{sΠfRRI컮]ꗈv:Ji!;wo(WMmٶi?;e\XaslLn.Anh7na>̀J+5/b l=Z۲`ȐMII"t'A"tSJ״` ^N4" 5,\̊Ք\Mʧ,h1AQ GVlmU]5VjDE ivj"}<&|^r- ÿԟA%íUoӎvl)*Ӧ 収iٓEĕl20=I B@㼙V0mP!HUbS!^fi9B\,Jq.pgp `ʠbD +%V}{gw.9N{\mR[  EPˆR 5P2𿡛 7iIA\hORe&TS6 i@WmVeMX|t k9FP{X %`lQ x!BfQ9 ;nwx1;n&R@=jm)di2O6Dwe{CrTHC|6jRߙgd1.="k2D6Pۍ Ddy0i|t@Y=oh@c5#SxaDl-P lw) /i2 <{ 2x/xQVr2>4kQ@5M 28{%bC㽂'٫$C99(Pߴk3X Ѓ *z-y8'qcL'xܡysұf6])35`ZQE;<nqC s/8fn+XaR Mm6DSmLMWIm̲l6Nn̛JRH`+@Q8eZi,+Vբ;bsWZq$4]~9Sl,嘁Ra]&%ߪ%Wwp&" AGg?7 $'ZEj-{Uˮ~Ci8c m0$ B$5`iBTt"6B~v`P4 ɨr 5h)+=0@FEay>=PfR֬1jA ݻT>G`"#ͨ)2$IRkt,ݬ_g !d ]?;4xyي3F:LW|T!6ȡ`ͲMK9&KQuކ<<'|3Nol7h%Co]^%W x}R'|P}z[`C(wp99x* 2H*H ߾ZT$A5I#tG j77CT`_r_-`M54f1ٲpmaȠ(ShY~S^X=!$@} xQ>>xxIRH=^2c+ rC8Kˉx)tO{^ѓb֫bߜAק2~d='va#1yr)J٬Anbe&7`}%dekɥUWn"9(rMzBI3'^/8//}l<>ɂŊ*yC{P~ U Lx(%n/scH -x B𽶏&$LuT Bޥdz)&#tzAMSq <g{3hv(i;˪`ŶBFnv2)2SB(*PŝZ*P,*lVPX{," ]8cQIɋZ#27naHiS-!f˧JPV$F\J‹.%ch S>>to՟Ħݰ:FS>F{j ZM! &WM')E%p3BCD*m.?m5AbH2kvh6TۅKpupaޙ߳〕uKz^lEx2ݪoj-6ֵ9 UgI8n%ƖћW-3H T(sP>[)ivKA;3@@QBJ V'& K,J|w.ZbD'{X1 &"AdzOoVUCԞC^/&S=A8ck>=nшoL\X r1r*:ZÅz>/]4w= f; >=#釵)B%82Mp^O)f(}Z\701a)ZTY%hLWYZcbB50CC}0fdXKJk `!(ȴg8$>u@, .U"\A {gw߬֩t9J̽DddSb! mٖLńS#*#[`lPX"A#0 B6@qЌrg<^۝c|`p{ôG+bg9 %"yAѺ9<[.8a̅)Itd7Y(2.HjZQaXٙ,A xژ2 &aoց$l#[c)mO@(ŒH ˚v u]hrgA.LiUPQN櫕@q'x)3d>I9L'.I kdS) sa[:K/}bhm9 u&|$3aD3JQXr`ݽF%V9gWm)lu-*XV1LRYi2HTj!)7ɢώaiQ hC{T:7'j_x;53ArNm6a1FHx=!U6 =d$BD<7Q1~"nHEVwnaMt75x@û';M(sB.8-(<߮a ˚Cs]y엱HsܧLT 0AH$Y@dBne,eҎ }燏F1PJ3ª(,X7c:!3FJ3{0edTM!|83 R, `.jGbL%x1%#{a ]DD:RPPW):_⍰R<wc8p-yrir(<UY3p:wam^\(QVw.#z 8 Mij leUMCڵɣ5sfKm srZVub@.±UuDP hMuJi:kpvfmGvjTRJt1ݭcfH`@H, lubsV |P;Mghyd-@Ra"I  ߉'f@4Ȥ ! `XM'jQI =;1QZ݌B͇pgBJqN:u`sT˫E gؙ ) D7K^Ty/!, ,av1Gn;y*6 r.Uh̃~ ,xG`!%/ & -[h 0ŏT'^΀q1 Aӣ{/e}y~Wyw8 `s\7`i2~{-rZHFj}Y\"8=eU$dCʿB||0ce-{m[`3ᘄFۜŻk<-٢M| >?"eZZFysq@R1CT:&c~sZ0xuu)E (4 ".p tm/R/0000755000175100001440000000000013202006317011103 5ustar hornikuserstm/R/utils.R0000644000175100001440000000323513177022675012411 0ustar hornikusers## Helper functions .print_via_format <- function(x, ...) { writeLines(format(x, ...)) invisible(x) } ## Efficient alternative to table() proposed by Kurt Hornik .table <- function(x) { u <- sort(unique(x)) v <- tabulate(match(x, u)) names(v) <- u v } .xml_content <- function(doc, spec) { switch(spec[[1]], node = xml_text(xml_find_all(doc, spec[[2]])), "function" = spec[[2]](doc), unevaluated = spec[[2]]) } IETF_Snowball_map <- list("danish" = c("da", "dan"), "dutch" = c("nl", "nld", "dut"), "english" = c("en", "eng"), "finnish" = c("fi", "fin"), "french" = c("fr", "fra", "fre"), "german" = c("de", "deu", "ger"), "hungarian" = c("hu", "hun"), "italian" = c("it", "ita"), "norwegian" = c("no", "nor"), "portuguese" = c("pt", "por"), "romanian" = c("ro", "ron", "rum"), "russian" = c("ru", "rus"), "spanish" = c("es", "esl", "spa"), "swedish" = c("sv", "swe"), ## Have stopwords but no SnowballC stemmer ... "catalan" = c("ca", "cat"), ## Have SnowballC stemmer but no stopwords ... "turkish" = c("tr", "tur") ) # Map IETF language tags to languages used by the Snowball stemmer project # http://en.wikipedia.org/wiki/IETF_language_tag map_IETF_Snowball <- local({ codes <- unlist(IETF_Snowball_map, use.names = FALSE) names <- rep.int(names(IETF_Snowball_map), lengths(IETF_Snowball_map)) function(code) { code <- as.character(code) if (identical(code, "") || identical(code, character(0)) || is.na(code)) return("porter") names[charmatch(gsub("-.*", "", code), codes)] } }) tm/R/stopwords.R0000644000175100001440000000103213034740255013277 0ustar hornikusersstopwords <- { function(kind = "en") { kind <- as.character(kind) resolved <- map_IETF_Snowball(kind) base <- if (is.na(resolved)) kind else if (identical(resolved, "porter")) "english" else resolved s <- system.file("stopwords", paste0(base, ".dat"), package = "tm") if (identical(s, "")) stop(paste("no stopwords available for '", base, "'", sep = "")) readLines(s, encoding = "UTF-8") } } tm/R/plot.R0000644000175100001440000000531213023472034012211 0ustar hornikusersplot.TermDocumentMatrix <- plot.DocumentTermMatrix <- function(x, terms = sample(Terms(x), 20), corThreshold = 0.7, weighting = FALSE, attrs = list(graph = list(rankdir = "BT"), node = list(shape = "rectangle", fixedsize = FALSE)), ...) { if (system.file(package = "Rgraphviz") == "") stop("Plotting requires package 'Rgraphviz'.") m <- if (inherits(x, "TermDocumentMatrix")) t(x) else x m <- as.matrix(m[, terms]) c <- cor(m) c[c < corThreshold] <- 0 c[is.na(c)] <- 0 diag(c) <- 0 p <- Rgraphviz::plot(methods::as(c, "graphNEL"), attrs = attrs, ...) if (weighting) { i <- 1 lw <- round(c[lower.tri(c) & c >= corThreshold] * 10) for (ae in Rgraphviz::AgEdge(p)) { Rgraphviz::lines(ae, lwd = lw[i], len = 1) i <- i + 1 } } invisible(p) } ## Plotting functions for Zipf's and Heaps'law contributed by Kurt Hornik ## See http://en.wikipedia.org/wiki/Zipf%27s_law Zipf_plot <- function(x, type = "l", ...) { if (inherits(x, "TermDocumentMatrix")) x <- t(x) y <- log(sort(col_sums(x), decreasing = TRUE)) x <- log(seq_along(y)) m <- lm(y ~ x) dots <- list(...) if (is.null(dots$xlab)) dots$xlab <- "log(rank)" if (is.null(dots$ylab)) dots$ylab <- "log(frequency)" do.call(plot, c(list(x, y, type = type), dots)) abline(m) ## ## Perhaps this should (invisibly) return the fitted linear model ## instead of just the coefficients? coef(m) ## } ## http://en.wikipedia.org/wiki/Heaps%27_law ## http://en.wikipedia.org/wiki/Text_corpus ## cum_vocabulary_size <- ## function(m) ## { ## ## Should work in general, but it very slow for large simple triplet ## ## matrices ... ## s <- double(nrow(m)) ## v <- double(ncol(m)) ## for(i in seq_along(s)) { ## v <- pmax(v, c(m[i, ])) ## s[i] <- sum(v > 0) ## } ## s ## } cum_vocabulary_size <- function(m) { ## Only works for simple triplet matrices. i <- sapply(split(m$i, m$j), min) tab <- table(i) v <- double(nrow(m)) v[as.numeric(names(tab))] <- tab cumsum(v) } Heaps_plot <- function(x, type = "l", ...) { if (inherits(x, "TermDocumentMatrix")) x <- t(x) y <- log(cum_vocabulary_size(x)) x <- log(cumsum(row_sums(x))) m <- lm(y ~ x) dots <- list(...) if (is.null(dots$xlab)) dots$xlab <- "log(T)" if (is.null(dots$ylab)) dots$ylab <- "log(V)" do.call(plot, c(list(x, y, type = type), dots)) abline(m) ## ## Perhaps this should (invisibly) return the fitted linear model ## instead of just the coefficients? coef(m) ## } tm/R/foreign.R0000644000175100001440000000313513023471774012677 0ustar hornikusers## Readers and writers (eventually?) for foreign document-term matrix ## format files. ## CLUTO: as we do not know the weighting, there is no high-level DTM ## reader. If the weighting is weightTf, one can do ## as.DocumentTermMatrix(read_stm_CLUTO(file), weightTf) ## as CLUTO always has rows as documents and cols as terms. ## MC: a simple reader for now, could certainly use more effort to name ## the weightings more properly. read_dtm_MC <- function(file, scalingtype = NULL) { m <- read_stm_MC(file, scalingtype) s <- attr(m, "scalingtype") as.DocumentTermMatrix(m, rep.int(s, 2L)) } ## ## To write a decent writer we would need to be able to turn weighting ## information into MC scaling information, which may not even be ## possible. Alternatively, we could always use 'txx', or use this in ## case we cannot map ... ## ## Data files for the Blei et al LDA and CTM codes are in a List of List ## format, with lines ## n j1: x1 j2: x2 ... jn: xn ## (see http://www.cs.princeton.edu/~blei/lda-c/). ## As they are used for topic models, they *always* contain raw term ## frequencies. read_dtm_Blei_et_al <- function(file, vocab = NULL) { x <- scan(file, character(), quiet = TRUE) ind <- grepl(":", x, fixed = TRUE) counts <- x[!ind] i <- rep.int(seq_along(counts), counts) x <- strsplit(x[ind], ":", fixed = TRUE) j <- as.integer(unlist(lapply(x, `[`, 1L))) + 1L x <- as.numeric(unlist(lapply(x, `[`, 2L))) m <- simple_triplet_matrix(i, j, x) if (!is.null(vocab)) colnames(m) <- readLines(vocab) as.DocumentTermMatrix(m, weightTf) } tm/R/score.R0000644000175100001440000000143013023472115012343 0ustar hornikuserstm_term_score <- function(x, terms, FUN) UseMethod("tm_term_score", x) tm_term_score.term_frequency <- function(x, terms, FUN = function(x) sum(x, na.rm = TRUE)) FUN(x[match(terms, names(x), nomatch = 0L)]) tm_term_score.PlainTextDocument <- function(x, terms, FUN = function(x) sum(x, na.rm = TRUE)) tm_term_score(termFreq(x, control = list(tolower = FALSE, removePunctuation = TRUE, wordLengths = c(1, Inf))), terms, FUN) tm_term_score.TermDocumentMatrix <- function(x, terms, FUN = col_sums) FUN(x[match(terms, Terms(x), nomatch = 0L), ]) tm_term_score.DocumentTermMatrix <- function(x, terms, FUN = row_sums) FUN(x[, match(terms, Terms(x), nomatch = 0L)]) tm/R/filter.R0000644000175100001440000000064113036675611012532 0ustar hornikusers# Author: Ingo Feinerer # Filters tm_filter <- function(x, FUN, ...) UseMethod("tm_filter", x) tm_filter.PCorpus <- tm_filter.SimpleCorpus <- tm_filter.VCorpus <- function(x, FUN, ...) x[tm_index(x, FUN, ...)] tm_index <- function(x, FUN, ...) UseMethod("tm_index", x) tm_index.PCorpus <- tm_index.SimpleCorpus <- tm_index.VCorpus <- function(x, FUN, ...) unlist(tm_parLapply(content(x), FUN, ...)) tm/R/meta.R0000644000175100001440000001067113110235234012161 0ustar hornikusers# Author: Ingo Feinerer TextDocumentMeta <- function(author, datetimestamp, description, heading, id, language, origin, ..., meta = NULL) { if (is.null(meta)) meta <- list(author = author, datetimestamp = datetimestamp, description = description, heading = heading, id = id, language = language, origin = origin, ...) stopifnot(is.list(meta)) if (!is.null(meta$author) && !inherits(meta$author, "person")) meta$author <- as.character(meta$author) if (!is.null(meta$datetimestamp) && !inherits(meta$datetimestamp, "POSIXt")) meta$datetimestamp <- as.character(meta$datetimestamp) if (!is.null(meta$description)) meta$description <- as.character(meta$description) if (!is.null(meta$heading)) meta$heading <- as.character(meta$heading) if (!is.null(meta$id)) meta$id <- as.character(meta$id) if (!is.null(meta$language)) meta$language <- as.character(meta$language) if (!is.null(meta$origin)) meta$origin <- as.character(meta$origin) class(meta) <- "TextDocumentMeta" meta } print.TextDocumentMeta <- function(x, ...) { cat(sprintf(" %s: %s", format(names(x), justify = "left"), sapply(x, as.character)), sep = "\n") invisible(x) } CorpusMeta <- function(..., meta = NULL) { if (is.null(meta)) meta <- list(...) stopifnot(is.list(meta)) class(meta) <- "CorpusMeta" meta } meta.SimpleCorpus <- function(x, tag = NULL, type = c("indexed", "corpus"), ...) { if (identical(tag, "id")) { n <- names(content(x)) return(if (is.null(n)) as.character(seq_along(x)) else n) } if (!is.null(tag) && missing(type)) type <- if (tag %in% names(x$meta)) "corpus" else "indexed" type <- match.arg(type) if (identical(type, "indexed")) if (is.null(tag)) x$dmeta else x$dmeta[tag] else if (identical(type, "corpus")) if (is.null(tag)) x$meta else x$meta[[tag]] else stop("invalid type") } meta.VCorpus <- meta.PCorpus <- function(x, tag = NULL, type = c("indexed", "corpus", "local"), ...) { if (!is.null(tag) && missing(type)) { type <- if (tag %in% names(x$dmeta)) "indexed" else if (tag %in% names(x$meta)) "corpus" else "local" } type <- match.arg(type) if (identical(type, "indexed")) if (is.null(tag)) x$dmeta else x$dmeta[tag] else if (identical(type, "corpus")) if (is.null(tag)) x$meta else x$meta[[tag]] else if (identical(type, "local")) lapply(x, meta, tag) else stop("invalid type") } `meta<-.SimpleCorpus` <- function(x, tag, type = c("indexed", "corpus"), ..., value) { type <- match.arg(type) if (identical(type, "indexed")) x$dmeta[, tag] <- value else if (type == "corpus") x$meta[[tag]] <- value else stop("invalid type") x } `meta<-.VCorpus` <- `meta<-.PCorpus` <- function(x, tag, type = c("indexed", "corpus", "local"), ..., value) { type <- match.arg(type) if (identical(type, "indexed")) x$dmeta[, tag] <- value else if (type == "corpus") x$meta[[tag]] <- value else if (identical(type, "local")) { for (i in seq_along(x)) meta(x[[i]], tag) <- value[i] } else stop("invalid type") x } # Simple Dublin Core to tm metadata mapping # http://en.wikipedia.org/wiki/Dublin_core#Simple_Dublin_Core Dublin_Core_tm_map <- list("contributor" = "contributor", "coverage" = "coverage", "creator" = "author", "date" = "datetimestamp", "description" = "description", "format" = "format", "identifier" = "id", "language" = "language", "publisher" = "publisher", "relation" = "relation", "rights" = "rights", "source" = "source", # or better "origin"? "subject" = "subject", "title" = "heading", "type" = "type" ) DublinCore <- function(x, tag = NULL) { tmm <- unlist(Dublin_Core_tm_map, use.names = FALSE) dcm <- names(Dublin_Core_tm_map) if (is.null(tag)) { m <- lapply(tmm, function(t) meta(x, t)) names(m) <- dcm class(m) <- "TextDocumentMeta" m } else meta(x, tmm[charmatch(tolower(tag), dcm)]) } `DublinCore<-` <- function(x, tag, value) { tmm <- unlist(Dublin_Core_tm_map, use.names = FALSE) dcm <- names(Dublin_Core_tm_map) meta(x, tmm[charmatch(tolower(tag), dcm)]) <- value x } tm/R/corpus.R0000644000175100001440000002140613202005444012544 0ustar hornikusers# Author: Ingo Feinerer Corpus <- function(x, readerControl = list(reader = reader(x), language = "en")) { stopifnot(inherits(x, "Source")) readerControl <- prepareReader(readerControl, reader(x)) if ( (inherits(x, "DataframeSource") || inherits(x, "DirSource") || inherits(x, "VectorSource") ) && identical(readerControl$reader, reader(x))) SimpleCorpus(x, readerControl) else VCorpus(x, readerControl) } PCorpus <- function(x, readerControl = list(reader = reader(x), language = "en"), dbControl = list(dbName = "", dbType = "DB1")) { stopifnot(inherits(x, "Source")) readerControl <- prepareReader(readerControl, reader(x)) if (!filehash::dbCreate(dbControl$dbName, dbControl$dbType)) stop("error in creating database") db <- filehash::dbInit(dbControl$dbName, dbControl$dbType) x <- open(x) tdl <- vector("list", length(x)) counter <- 1 while (!eoi(x)) { x <- stepNext(x) elem <- getElem(x) doc <- readerControl$reader(elem, readerControl$language, as.character(counter)) filehash::dbInsert(db, meta(doc, "id"), doc) tdl[[counter]] <- meta(doc, "id") counter <- counter + 1 } x <- close(x) cmeta <- CorpusMeta() dmeta <- data.frame(row.names = seq_along(tdl)) # Check if metadata retrieval is supported if (is.function(getS3method("getMeta", class(x), TRUE))) { m <- getMeta(x) if (!is.null(m$cmeta)) cmeta <- m$cmeta if (!is.null(m$dmeta)) dmeta <- m$dmeta } p <- list(content = tdl, meta = cmeta, dmeta = dmeta, dbcontrol = dbControl) class(p) <- c("PCorpus", "Corpus") p } SimpleCorpus <- function(x, control = list(language = "en")) { stopifnot(inherits(x, "Source")) if (!is.null(control$reader) && !identical(control$reader, reader(x))) warning("custom reader is ignored") content <- if (inherits(x, "VectorSource")) { if (is.character(x$content)) x$content else as.character(x$content) } else if (inherits(x, "DirSource")) { setNames(as.character( lapply(x$filelist, function(f) paste(readContent(f, x$encoding, "text"), collapse = "\n")) ), basename(x$filelist)) } else if (inherits(x, "DataframeSource")) { setNames(as.character(x$content[, "text"]), x$content[, "doc_id"]) } else stop("unsupported source type") dmeta <- if (inherits(x, "DataframeSource")) x$content[, is.na(match(names(x$content), c("doc_id", "text"))), drop = FALSE] else data.frame(row.names = seq_along(x)) s <- list(content = content, meta = CorpusMeta(language = control$language), dmeta = dmeta) class(s) <- c("SimpleCorpus", "Corpus") s } VCorpus <- function(x, readerControl = list(reader = reader(x), language = "en")) { stopifnot(inherits(x, "Source")) readerControl <- prepareReader(readerControl, reader(x)) x <- open(x) tdl <- vector("list", length(x)) # Check for parallel element access if (is.function(getS3method("pGetElem", class(x), TRUE))) tdl <- mapply(function(elem, id) readerControl$reader(elem, readerControl$language, id), pGetElem(x), id = as.character(seq_along(x)), SIMPLIFY = FALSE) else { counter <- 1 while (!eoi(x)) { x <- stepNext(x) elem <- getElem(x) doc <- readerControl$reader(elem, readerControl$language, as.character(counter)) tdl[[counter]] <- doc counter <- counter + 1 } } x <- close(x) cmeta <- CorpusMeta() dmeta <- data.frame(row.names = seq_along(tdl)) # Check if metadata retrieval is supported if (is.function(getS3method("getMeta", class(x), TRUE))) { m <- getMeta(x) if (!is.null(m$cmeta)) cmeta <- m$cmeta if (!is.null(m$dmeta)) dmeta <- m$dmeta } v <- as.VCorpus(tdl) v$meta <- cmeta v$dmeta <- dmeta v } `[.PCorpus` <- `[.SimpleCorpus` <- function(x, i) { if (!missing(i)) { x$content <- x$content[i] x$dmeta <- x$dmeta[i, , drop = FALSE] } x } `[.VCorpus` <- function(x, i) { if (!missing(i)) { x$content <- x$content[i] x$dmeta <- x$dmeta[i, , drop = FALSE] if (!is.null(x$lazy)) x$lazy$index <- x$lazy$index[i] } x } .map_name_index <- function(x, i) { if (is.character(i)) match(i, meta(x, "id", "local")) else i } `[[.PCorpus` <- function(x, i) { i <- .map_name_index(x, i) db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]]) filehash::dbFetch(db, x$content[[i]]) } `[[.SimpleCorpus` <- function(x, i) { i <- .map_name_index(x, i) n <- names(x$content) PlainTextDocument(x$content[[i]], id = if (is.null(n)) i else n[i], language = meta(x, "language")) } `[[.VCorpus` <- function(x, i) { i <- .map_name_index(x, i) if (!is.null(x$lazy)) .Call(`_tm_copyCorpus`, x, materialize(x, i)) x$content[[i]] } `[[<-.SimpleCorpus` <- function(x, i, value) { x$content[i] <- paste0(as.character(value), collapse = "\n") x } `[[<-.PCorpus` <- function(x, i, value) { i <- .map_name_index(x, i) db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]]) db[[x$content[[i]]]] <- value x } `[[<-.VCorpus` <- function(x, i, value) { i <- .map_name_index(x, i) # Mark new objects as inactive for lazy mapping if (!is.null(x$lazy)) x$lazy$index[i] <- FALSE x$content[[i]] <- value x } as.list.PCorpus <- as.list.VCorpus <- function(x, ...) setNames(content(x), as.character(lapply(content(x), meta, "id"))) as.list.SimpleCorpus <- function(x, ...) as.list(content(x)) as.VCorpus <- function(x) UseMethod("as.VCorpus") as.VCorpus.VCorpus <- identity as.VCorpus.list <- function(x) { v <- list(content = x, meta = CorpusMeta(), dmeta = data.frame(row.names = seq_along(x))) class(v) <- c("VCorpus", "Corpus") v } outer_union <- function(x, y, ...) { if (nrow(x) > 0L) x[, setdiff(names(y), names(x))] <- NA if (nrow(y) > 0L) y[, setdiff(names(x), names(y))] <- NA res <- rbind(x, y) if (ncol(res) == 0L) res <- data.frame(row.names = seq_len(nrow(x) + nrow(y))) res } c.VCorpus <- function(..., recursive = FALSE) { args <- list(...) x <- args[[1L]] if (length(args) == 1L) return(x) if (!all(unlist(lapply(args, inherits, class(x))))) stop("not all arguments are of the same corpus type") v <- list(content = do.call("c", lapply(args, content)), meta = CorpusMeta(meta = do.call("c", lapply(args, function(a) meta(a, type = "corpus")))), dmeta = Reduce(outer_union, lapply(args, meta))) class(v) <- c("VCorpus", "Corpus") v } content.VCorpus <- function(x) { if (!is.null(x$lazy)) .Call(`_tm_copyCorpus`, x, materialize(x)) x$content } content.SimpleCorpus <- function(x) x$content content.PCorpus <- function(x) { db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]]) filehash::dbMultiFetch(db, unlist(x$content)) } inspect <- function(x) UseMethod("inspect", x) inspect.PCorpus <- inspect.SimpleCorpus <- inspect.VCorpus <- function(x) { print(x) cat("\n") print(noquote(content(x))) invisible(x) } length.PCorpus <- length.SimpleCorpus <- length.VCorpus <- function(x) length(x$content) names.PCorpus <- names.SimpleCorpus <- names.VCorpus <- function(x) as.character(meta(x, "id", "local")) `names<-.PCorpus` <- `names<-.VCorpus` <- function(x, value) { meta(x, "id", "local") <- as.character(value) x } format.PCorpus <- format.SimpleCorpus <- format.VCorpus <- function(x, ...) { c(sprintf("<<%s>>", class(x)[1L]), sprintf("Metadata: corpus specific: %d, document level (indexed): %d", length(meta(x, type = "corpus")), ncol(meta(x, type = "indexed"))), sprintf("Content: documents: %d", length(x))) } writeCorpus <- function(x, path = ".", filenames = NULL) { filenames <- file.path(path, if (is.null(filenames)) sprintf("%s.txt", as.character(meta(x, "id", "local"))) else filenames) stopifnot(length(x) == length(filenames)) mapply(function(doc, f) writeLines(as.character(doc), f), x, filenames) invisible(x) } tm/R/RcppExports.R0000644000175100001440000000073313150552514013531 0ustar hornikusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 tdm <- function(strings, remove_digits, stopwords, dictionary, min_term_freq, max_term_freq, min_word_length, max_word_length) { .Call(`_tm_tdm`, strings, remove_digits, stopwords, dictionary, min_term_freq, max_term_freq, min_word_length, max_word_length) } Boost_Tokenizer <- function(strings) { .Call(`_tm_Boost_Tokenizer`, strings) } tm/R/weight.R0000644000175100001440000001211512776627444012546 0ustar hornikusers# Author: Ingo Feinerer WeightFunction <- function(x, name, acronym) { class(x) <- c("WeightFunction", "function") attr(x, "name") <- name attr(x, "acronym") <- acronym x } # Actual TermDocumentMatrix weighting functions weightTf <- WeightFunction(function(m) { attr(m, "weighting") <- c("term frequency", "tf") m }, "term frequency", "tf") weightTfIdf <- WeightFunction(function(m, normalize = TRUE) { isDTM <- inherits(m, "DocumentTermMatrix") if (isDTM) m <- t(m) if (normalize) { cs <- col_sums(m) if (any(cs == 0)) warning("empty document(s): ", paste(Docs(m)[cs == 0], collapse = " ")) names(cs) <- seq_len(nDocs(m)) m$v <- m$v / cs[m$j] } rs <- row_sums(m > 0) if (any(rs == 0)) warning("unreferenced term(s): ", paste(Terms(m)[rs == 0], collapse = " ")) lnrs <- log2(nDocs(m) / rs) lnrs[!is.finite(lnrs)] <- 0 m <- m * lnrs attr(m, "weighting") <- c(sprintf("%s%s", "term frequency - inverse document frequency", if (normalize) " (normalized)" else ""), "tf-idf") if (isDTM) t(m) else m }, "term frequency - inverse document frequency", "tf-idf") weightSMART <- WeightFunction(function(m, spec = "nnn", control = list()) { stopifnot(inherits(m, c("DocumentTermMatrix", "TermDocumentMatrix")), is.character(spec), nchar(spec) == 3L, is.list(control)) term_frequency <- match.arg(substr(spec, 1L, 1L), c("n", "l", "a", "b", "L")) document_frequency <- match.arg(substr(spec, 2L, 2L), c("n", "t", "p")) normalization <- match.arg(substr(spec, 3L, 3L), c("n", "c", "u", "b")) isDTM <- inherits(m, "DocumentTermMatrix") if (isDTM) m <- t(m) if (normalization == "b") { ## Need to compute the character lengths of the documents ## before starting the weighting. charlengths <- tapply(nchar(Terms(m))[m$i] * m$v, m$j, sum) } ## Term frequency m$v <- switch(term_frequency, ## natural n = m$v, ## logarithm l = 1 + log2(m$v), ## augmented a = { s <- tapply(m$v, m$j, max) 0.5 + (0.5 * m$v) / s[as.character(m$j)] }, ## boolean b = as.numeric(m$v > 0), ## log ave L = { s <- tapply(m$v, m$j, mean) ((1 + log2(m$v)) / (1 + log2(s[as.character(m$j)]))) }) ## Document frequency rs <- row_sums(m > 0) if (any(rs == 0)) warning("unreferenced term(s): ", paste(Terms(m)[rs == 0], collapse = " ")) df <- switch(document_frequency, ## natural n = 1, ## idf t = log2(nDocs(m) / rs), ## prob idf p = max(0, log2((nDocs(m) - rs) / rs))) df[!is.finite(df)] <- 0 ## Normalization cs <- col_sums(m) if (any(cs == 0)) warning("empty document(s): ", paste(Docs(m)[cs == 0], collapse = " ")) norm <- switch(normalization, ## none n = rep.int(1, nDocs(m)), ## cosine c = sqrt(col_sums(m ^ 2)), ## pivoted unique u = { if (is.null(pivot <- control$pivot)) stop("invalid control argument pivot") if (is.null(slope <- control$slope)) stop("invalid control argument slope") (slope * sqrt(col_sums(m ^ 2)) + (1 - slope) * pivot) }, ## byte size b = { if (is.null(alpha <- control$alpha)) stop("invalid control argument alpha") norm <- double(nDocs(m)) norm[match(names(charlengths), seq_along(norm))] <- charlengths ^ alpha norm }) m <- m * df m$v <- m$v / norm[m$j] attr(m, "weighting") <- c(paste("SMART", spec), "SMART") if (isDTM) t(m) else m }, "SMART", "SMART") weightBin <- WeightFunction(function(m) { m$v <- rep_len(1L, length(m$v)) attr(m, "weighting") <- c("binary", "bin") m }, "binary", "bin") tm/R/hpc.R0000644000175100001440000000075413037140514012012 0ustar hornikuserstm_parLapply_engine <- local({ val <- NULL ## Could do some checking on new if given: should inherit from ## "cluster" or have formals (X, FUN, ...). function(new) { if (missing(new)) val else val <<- new } }) tm_parLapply <- function(X, FUN, ...) { engine <- tm_parLapply_engine() if (inherits(engine, "cluster")) parLapply(engine, X, FUN, ...) else if (is.function(engine)) engine(X, FUN, ...) else lapply(X, FUN, ...) } tm/R/doc.R0000644000175100001440000000577713177022574012031 0ustar hornikusersc.TextDocument <- function(..., recursive = FALSE) { args <- list(...) x <- args[[1L]] if (length(args) == 1L) return(x) if (!all(unlist(lapply(args, inherits, class(x))))) stop("not all arguments are text documents") v <- list(content = args, meta = CorpusMeta(), dmeta = data.frame(row.names = seq_along(args))) class(v) <- c("VCorpus", "Corpus") v } .format_TextDocument <- function(x, ...) c(sprintf("<<%s>>", class(x)[1L]), sprintf("Metadata: %d", length(meta(x)))) inspect.TextDocument <- function(x) { print(x) cat("\n") writeLines(as.character(x)) invisible(x) } PlainTextDocument <- function(x = character(0), author = character(0), datetimestamp = as.POSIXlt(Sys.time(), tz = "GMT"), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0), ..., meta = NULL, class = NULL) { p <- list(content = as.character(x), meta = TextDocumentMeta(author, datetimestamp, description, heading, id, language, origin, ..., meta = meta)) class(p) <- unique(c(class, "PlainTextDocument", "TextDocument")) p } as.character.PlainTextDocument <- function(x, ...) content(x) content.PlainTextDocument <- function(x) x$content `content<-.PlainTextDocument` <- function(x, value) { x$content <- as.character(value) x } format.PlainTextDocument <- function(x, ...) c(.format_TextDocument(x), sprintf("Content: chars: %d", sum(nchar(x$content)))) meta.PlainTextDocument <- function(x, tag = NULL, ...) if (is.null(tag)) x$meta else x$meta[[tag]] `meta<-.PlainTextDocument` <- function(x, tag = NULL, ..., value) { if (is.null(tag)) x$meta <- value else x$meta[[tag]] <- value x } words.character <- words.PlainTextDocument <- function(x, ...) scan_tokenizer(x) XMLTextDocument <- function(x = xml_missing(), author = character(0), datetimestamp = as.POSIXlt(Sys.time(), tz = "GMT"), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0), ..., meta = NULL) { d <- list(content = x, meta = TextDocumentMeta(author, datetimestamp, description, heading, id, language, origin, ..., meta = meta)) class(d) <- c("XMLTextDocument", "TextDocument") d } as.character.XMLTextDocument <- function(x, ...) xml_text(content(x)) content.XMLTextDocument <- function(x) x$content `content<-.XMLTextDocument` <- function(x, value) { x$content <- value x } format.XMLTextDocument <- .format_TextDocument meta.XMLTextDocument <- meta.PlainTextDocument `meta<-.XMLTextDocument` <- `meta<-.PlainTextDocument` tm/R/source.R0000644000175100001440000001775213202005533012541 0ustar hornikusers## Author: Ingo Feinerer ## Sources getSources <- function() c("DataframeSource", "DirSource", "URISource", "VectorSource", "XMLSource", "ZipSource") SimpleSource <- function(encoding = "", length = 0, position = 0, reader = readPlain, ..., class) { if (!is.character(encoding)) stop("invalid encoding") if (!is.numeric(length) || (length < 0)) stop("invalid length entry denoting the number of elements") if (!is.numeric(position)) stop("invalid position") if (!is.function(reader)) stop("invalid default reader") s <- list(encoding = encoding, length = length, position = position, reader = reader, ...) class(s) <- unique(c(class, "SimpleSource", "Source")) s } # A data frame where each row is interpreted as document DataframeSource <- function(x) { stopifnot(all(!is.na(match(c("doc_id", "text"), names(x))))) SimpleSource(length = nrow(x), reader = readDataframe, content = x, class = "DataframeSource") } # A directory with files interpreted as documents DirSource <- function(directory = ".", encoding = "", pattern = NULL, recursive = FALSE, ignore.case = FALSE, mode = "text") { if (!identical(mode, "text") && !identical(mode, "binary") && !identical(mode, "")) stop(sprintf("invalid mode '%s'", mode)) d <- dir(directory, full.names = TRUE, pattern = pattern, recursive = recursive, ignore.case = ignore.case) if (!length(d)) stop("empty directory") isfile <- !file.info(d)[["isdir"]] if (any(is.na(isfile))) stop("non-existent or non-readable file(s): ", paste(d[is.na(isfile)], collapse = " ")) SimpleSource(encoding = encoding, length = sum(isfile), mode = mode, filelist = d[isfile], class = "DirSource") } # Documents identified by a Uniform Resource Identifier URISource <- function(x, encoding = "", mode = "text") { if (!identical(mode, "text") && !identical(mode, "binary") && !identical(mode, "")) stop(sprintf("invalid mode '%s'", mode)) SimpleSource(encoding = encoding, length = length(x), mode = mode, uri = x, class = "URISource") } # A vector where each component is interpreted as document VectorSource <- function(x) SimpleSource(length = length(x), content = x, class = "VectorSource") XMLSource <- function(x, parser = xml_contents, reader) { xmldoc <- read_xml(x) content <- parser(xmldoc) SimpleSource(length = length(content), reader = reader, content = content, uri = x, class = "XMLSource") } # A ZIP file with its compressed files interpreted as documents ZipSource <- function(zipfile, pattern = NULL, recursive = FALSE, ignore.case = FALSE, mode = "text") { if (!identical(mode, "text") && !identical(mode, "binary") && !identical(mode, "")) stop(sprintf("invalid mode '%s'", mode)) SimpleSource(exdir = NULL, files = NULL, mode = mode, pattern = pattern, recursive = recursive, ignore.case = ignore.case, zipfile = zipfile, class = "ZipSource") } # tau:::read_all_bytes read_all_bytes <- function(con, chunksize = 2 ^ 16) { if (is.character(con)) { return(readBin(con, raw(), file.info(con)$size)) } if (!isOpen(con)) { open(con, "rb") on.exit(close(con)) } bytes <- list() repeat { chunk <- readBin(con, raw(), chunksize) bytes <- c(bytes, list(chunk)) if (length(chunk) < chunksize) break } unlist(bytes) } readContent <- function(x, encoding, mode) { if (identical(mode, "text")) iconv(readLines(x, warn = FALSE), encoding, "UTF-8", "byte") else if (identical(mode, "binary")) read_all_bytes(x) else if (identical(mode, "")) NULL else stop("invalid mode") } open.SimpleSource <- close.SimpleSource <- function(con, ...) con open.ZipSource <- function(con, ...) { x <- con exdir <- tempfile("ZipSource") dir.create(exdir, mode = "0700") destfile <- x$zipfile if (!file.exists(destfile)) { destfile <- tempfile() download.file(x$zipfile, destfile) on.exit(file.remove(destfile)) } files <- unzip(destfile, list = TRUE) ## Directories have length 0 files <- files[files$Length > 0, "Name"] ## Idea: Subdirectories contain file separators if (!x$recursive) files <- files[!grepl(.Platform$file.sep, files, fixed = TRUE)] ## Idea: pattern and ignore.case refer to the file name (like basename) ## Cf. also ?dir if (!is.null(x$pattern)) files <- files[grepl(x$pattern, files, ignore.case = x$ignore.case)] unzip(destfile, files, exdir = exdir) x$exdir <- exdir x$files <- files x$length <- length(files) x } close.ZipSource <- function(con, ...) { x <- con if (!is.null(x$exdir)) { unlink(x$exdir, recursive = TRUE) x$exdir <- NULL x$files <- NULL x$length <- 0 } x } eoi <- function(x) UseMethod("eoi", x) eoi.SimpleSource <- function(x) x$length <= x$position getElem <- function(x) UseMethod("getElem", x) getElem.DataframeSource <- function(x) list(content = x$content[x$position, ], uri = NULL) getElem.DirSource <- function(x) { filename <- x$filelist[x$position] list(content = readContent(filename, x$encoding, x$mode), uri = paste0("file://", filename)) } getElem.URISource <- function(x) list(content = readContent(x$uri[x$position], x$encoding, x$mode), uri = x$uri[x$position]) getElem.VectorSource <- function(x) list(content = x$content[x$position], uri = NULL) getElem.XMLSource <- function(x) list(content = x$content[[x$position]], uri = x$uri) getElem.ZipSource <- function(x) { path <- file.path(x$exdir, x$files[x$position]) list(content = readContent(path, x$encoding, x$mode), uri = paste0("file://", path)) } getMeta <- function(x) UseMethod("getMeta", x) getMeta.DataframeSource <- function(x) list(cmeta = NULL, dmeta = x$content[, is.na(match(names(x$content), c("doc_id", "text"))), drop = FALSE]) length.SimpleSource <- function(x) x$length pGetElem <- function(x) UseMethod("pGetElem", x) pGetElem.DataframeSource <- function(x) lapply(seq_len(x$length), function(y) list(content = x$content[y, ], uri = NULL)) `[.DataframeSource` <- function(x, i, j, ...) x$content[i, j, ...] `[[.DataframeSource` <- function(x, ...) x$content[[...]] pGetElem.DirSource <- function(x) lapply(x$filelist, function(f) list(content = readContent(f, x$encoding, x$mode), uri = paste0("file://", f))) `[.DirSource` <- function(x, i, ...) x$filelist[i, ...] `[[.DirSource` <- function(x, i, ...) x$filelist[[i, ...]] pGetElem.URISource <- function(x) lapply(x$uri, function(uri) list(content = readContent(uri, x$encoding, x$mode), uri = uri)) `[.URISource` <- function(x, i, ...) x$uri[i, ...] `[[.URISource` <- function(x, i, ...) x$uri[[i, ...]] pGetElem.VectorSource <- function(x) lapply(x$content, function(y) list(content = y, uri = NULL)) `[.VectorSource` <- function(x, i, ...) x$content[i, ...] `[[.VectorSource` <- function(x, i, ...) x$content[[i, ...]] pGetElem.ZipSource <- function(x) lapply(file.path(x$exdir, x$files), function(f) list(content = readContent(f, x$encoding, x$mode), uri = paste0("file://", f))) reader <- function(x) UseMethod("reader", x) reader.SimpleSource <- function(x) x$reader stepNext <- function(x) UseMethod("stepNext", x) stepNext.SimpleSource <- function(x) { x$position <- x$position + 1 x } tm/R/transform.R0000644000175100001440000001043713164112376013260 0ustar hornikusers# Author: Ingo Feinerer # Transformations tm_map <- function(x, FUN, ...) UseMethod("tm_map", x) tm_map.VCorpus <- function(x, FUN, ..., lazy = FALSE) { # Lazy mapping if (lazy) { fun <- function(x) FUN(x, ...) if (is.null(x$lazy)) x$lazy <- list(index = rep_len(TRUE, length(x)), maps = list(fun)) else x$lazy$maps <- c(x$lazy$maps, list(fun)) } else x$content <- tm_parLapply(content(x), FUN, ...) x } tm_map.SimpleCorpus <- function(x, FUN, ...) { if (inherits(FUN, "content_transformer")) FUN <- get("FUN", envir = environment(FUN)) x$content <- FUN(content(x), ...) x } tm_map.PCorpus <- function(x, FUN, ...) { db <- filehash::dbInit(x$dbcontrol[["dbName"]], x$dbcontrol[["dbType"]]) for (i in seq_along(x)) db[[x$content[[i]]]] <- FUN(x[[i]], ...) filehash::dbReorganize(db) x } # Materialize lazy mappings materialize <- function(x, range = seq_along(x)) { if (!is.null(x$lazy)) { i <- (seq_along(x) %in% range) & x$lazy$index if (any(i)) { x$content[i] <- tm_parLapply(x$content[i], function(d) tm_reduce(d, x$lazy$maps)) x$lazy$index[i] <- FALSE } # Clean up if everything is materialized if (!any(x$lazy$index)) x["lazy"] <- list(NULL) } x } tm_reduce <- function(x, tmFuns, ...) Reduce(function(f, ...) f(...), tmFuns, x, right = TRUE) getTransformations <- function() c("removeNumbers", "removePunctuation", "removeWords", "stemDocument", "stripWhitespace") content_transformer <- function(FUN) { f <- function(x, ...) { content(x) <- FUN(content(x), ...) x } class(f) <- c("content_transformer", "function") f } removeNumbers <- function(x, ...) UseMethod("removeNumbers") removeNumbers.character <- function(x, ucp = FALSE, ...) { if (ucp) gsub("\\p{Nd}+", "", x, perl = TRUE) else .Call(`_tm_remove_chars`, x, 1L) } removeNumbers.PlainTextDocument <- content_transformer(removeNumbers.character) removePunctuation <- function(x, ...) UseMethod("removePunctuation") removePunctuation.character <- function(x, preserve_intra_word_contractions = FALSE, preserve_intra_word_dashes = FALSE, ucp = FALSE, ...) { # Assume there are no ASCII 0x01 (SOH) or ASCII 0x02 (STX) characters. if (preserve_intra_word_contractions) x <- gsub("(\\w)'(\\w)", "\\1\1\\2", x, perl = TRUE) if (preserve_intra_word_dashes) x <- gsub("(\\w)-(\\w)", "\\1\2\\2", x, perl = TRUE) if (ucp) x <- gsub("\\p{P}+", "", x, perl = TRUE) else x <- .Call(`_tm_remove_chars`, x, 0L) if (preserve_intra_word_contractions) x <- gsub("\1", "'", x, fixed = TRUE) if (preserve_intra_word_dashes) x <- gsub("\2", "-", x, fixed = TRUE) x } removePunctuation.PlainTextDocument <- content_transformer(removePunctuation.character) removeWords <- function(x, words) UseMethod("removeWords", x) # Improvements by Kurt Hornik removeWords.character <- function(x, words) gsub(sprintf("(*UCP)\\b(%s)\\b", paste(sort(words, decreasing = TRUE), collapse = "|")), "", x, perl = TRUE) removeWords.PlainTextDocument <- content_transformer(removeWords.character) stemDocument <- function(x, language = "english") UseMethod("stemDocument", x) stemDocument.character <- function(x, language = "english") { s <- unlist(lapply(x, function(line) paste(SnowballC::wordStem(words(line), as.character(language)), collapse = " "))) if (is.character(s)) s else "" } stemDocument.PlainTextDocument <- function(x, language = meta(x, "language")) { language <- as.character(language) if (identical(language, "") || identical(language, character(0)) || is.na(language)) language <- "english" content_transformer(stemDocument.character)(x) } stripWhitespace <- function(x) UseMethod("stripWhitespace", x) stripWhitespace.character <- function(x) gsub("[[:space:]]+", " ", x) stripWhitespace.PlainTextDocument <- content_transformer(stripWhitespace.character) tm/R/pdftools.R0000644000175100001440000001046312776627444013115 0ustar hornikuserspdf_info_via_xpdf <- function(file, options = NULL) { outfile <- tempfile("pdfinfo") on.exit(unlink(outfile)) status <- system2("pdfinfo", c(options, shQuote(normalizePath(file))), stdout = outfile) ## Could check the status ... ## This does not work ... ## info <- as.list(read.dcf(outfile)[1L, ]) tags <- c("Title", "Subject", "Keywords", "Author", "Creator", "Producer", "CreationDate", "ModDate", "Tagged", "Form", "Pages", "Encrypted", "Page size", "File size", "Optimized", "PDF version") re <- sprintf("^(%s)", paste(sprintf("%-16s", sprintf("%s:", tags)), collapse = "|")) lines <- readLines(outfile, warn = FALSE) ind <- grepl(re, lines) tags <- sub(": *", "", substring(lines[ind], 1L, 16L)) info <- split(sub(re, "", lines), cumsum(ind)) names(info) <- tags fmt <- "%a %b %d %X %Y" if (!is.null(d <- info$CreationDate)) info$CreationDate <- strptime(d, fmt) if (!is.null(d <- info$ModDate)) info$ModDate <- strptime(d, fmt) if (!is.null(p <- info$Pages)) info$Pages <- as.integer(p) info } pdf_info_via_gs <- function(file) { file <- normalizePath(file) gs_cmd <- tools::find_gs_cmd() out <- system2(gs_cmd, c("-dNODISPLAY -q", sprintf("-sFile=%s", shQuote(file)), system.file("ghostscript", "pdf_info.ps", package = "tm")), stdout = TRUE) out <- out[cumsum(out == "") == 2L][-1L] val <- sub("^[^:]+:[[:space:]]*", "", out) names(val) <- sub(":.*", "", out) val <- as.list(val) if (!is.null(d <- val$CreationDate)) val$CreationDate <- PDF_Date_to_POSIXt(d) if (!is.null(d <- val$ModDate)) val$ModDate <- PDF_Date_to_POSIXt(d) val } PDF_Date_to_POSIXt <- function(s) { ## Strip optional 'D:' prefix. s <- sub("^D:", "", s) ## Strip apostrophes in offset spec. s <- gsub("'", "", s) if (nchar(s) <= 14L) { s <- sprintf("%s%s", s, substring(" 0101000000", nchar(s) + 1L, 14L)) strptime(s, "%Y%m%d%H%M%S") } else if (substring(s, 15L, 15L) == "Z") { strptime(substring(s, 1L, 14L), "%Y%m%d%H%M%S") } else { strptime(s, "%Y%m%d%H%M%S%z") } } pdf_text_via_gs <- function(file) { file <- normalizePath(file) gs_cmd <- tools::find_gs_cmd() tf <- tempfile("pdf") on.exit(unlink(tf)) ## The current mechanism is first converting PDF to Postscript using ## the ps2write device, and then extract text using the ps2ascii.ps ## program. This fails for some files (e.g., ## /data/rsync/PKGS/AlleleRetain/inst/doc/AlleleRetain_User_Guide.pdf ## which Ghostscript also fails to render. Note that rendering via ## gv works "fine": but this uses the pswrite device which produces ## bitmap (from which no text can be extracted, of course). ## Using the txtwrite device is simply too unstable: e.g., ## gs -dBATCH -dNOPAUSE -sDEVICE=txtwrite -dQUIET -sOutputFile=- \ ## /data/rsync/PKGS/AlleleRetain/inst/doc/AlleleRetain_User_Guide.pdf ## keeps segfaulting. ## An additional nuisance is that there seems no simple way to ## detect a ps2ascii.ps failure. ## Finally, note that we currently use -DSIMPLE: without this, more ## information would be made available, but require post-processing. ## Step 1. Convert PDF to Postscript. res <- system2(gs_cmd, c("-q -dNOPAUSE -dBATCH -P- -dSAFER -sDEVICE=ps2write", sprintf("-sOutputFile=%s", tf), "-c save pop -f", shQuote(file))) ## Step 2. Extract text. txt <- system2(gs_cmd, c("-q -dNODISPLAY -P- -dSAFER -dDELAYBIND -dWRITESYSTEMDICT -dSIMPLE", "-c save -f ps2ascii.ps", tf, "-c quit"), stdout = TRUE) ## Argh. How can we catch errors? ## The return values are always 0 ... if (any(grepl("Error handled by opdfread.ps", txt))) { stop(paste(c("Ghostscript failed, with output:", txt), collapse = "\n")) } strsplit(paste(txt, collapse = "\n"), "\f")[[1L]] } tm/R/complete.R0000644000175100001440000000373113034740316013051 0ustar hornikusers# Author: Ingo Feinerer stemCompletion <- function(x, dictionary, type = c("prevalent", "first", "longest", "none", "random", "shortest")) { if (inherits(dictionary, "Corpus")) dictionary <- unique(unlist(lapply(dictionary, words))) type <- match.arg(type) possibleCompletions <- lapply(x, function(w) grep(sprintf("^%s", w), dictionary, value = TRUE)) switch(type, first = { setNames(sapply(possibleCompletions, "[", 1), x) }, longest = { ordering <- lapply(possibleCompletions, function(x) order(nchar(x), decreasing = TRUE)) possibleCompletions <- mapply(function(x, id) x[id], possibleCompletions, ordering, SIMPLIFY = FALSE) setNames(sapply(possibleCompletions, "[", 1), x) }, none = { setNames(x, x) }, prevalent = { possibleCompletions <- lapply(possibleCompletions, function(x) sort(table(x), decreasing = TRUE)) n <- names(sapply(possibleCompletions, "[", 1)) setNames(if (length(n)) n else rep_len(NA, length(x)), x) }, random = { setNames(sapply(possibleCompletions, function(x) { if (length(x)) sample(x, 1) else NA }), x) }, shortest = { ordering <- lapply(possibleCompletions, function(x) order(nchar(x))) possibleCompletions <- mapply(function(x, id) x[id], possibleCompletions, ordering, SIMPLIFY = FALSE) setNames(sapply(possibleCompletions, "[", 1), x) } ) } tm/R/matrix.R0000644000175100001440000004341213202006321012531 0ustar hornikusers## Authors: Ingo Feinerer, Kurt Hornik TermDocumentMatrix_classes <- c("TermDocumentMatrix", "simple_triplet_matrix") DocumentTermMatrix_classes <- c("DocumentTermMatrix", "simple_triplet_matrix") .TermDocumentMatrix <- function(x, weighting) { x <- as.simple_triplet_matrix(x) if (!is.null(dimnames(x))) names(dimnames(x)) <- c("Terms", "Docs") class(x) <- TermDocumentMatrix_classes if (is.null(weighting)) weighting <- weightTf ## ## Note that if weighting is a weight function, it already needs to ## know whether we have a term-document or document-term matrix. ## ## Ideally we would require weighting to be a WeightFunction object ## or a character string of length 2. But then ## dtm <- DocumentTermMatrix(crude, ## control = list(weighting = ## function(x) ## weightTfIdf(x, normalize = ## FALSE), ## stopwords = TRUE)) ## in example("DocumentTermMatrix") fails [because weightTfIdf() is ## a weight function and not a weight function generator ...] ## Hence, for now, instead of ## if (inherits(weighting, "WeightFunction")) ## x <- weighting(x) ## use if (is.function(weighting)) x <- weighting(x) ## and hope for the best ... ## else if (is.character(weighting) && (length(weighting) == 2L)) attr(x, "weighting") <- weighting x } .SimpleTripletMatrix <- function(i, j, v, terms, corpus) { docs <- as.character(meta(corpus, "id", "local")) if (length(docs) != length(corpus)) { warning("invalid document identifiers") docs <- NULL } simple_triplet_matrix(i, j, v, nrow = length(terms), ncol = length(corpus), dimnames = list(Terms = terms, Docs = docs)) } filter_global_bounds <- function(m, bounds) { m <- as.simple_triplet_matrix(m) if (length(bounds) == 2L && is.numeric(bounds)) { rs <- row_sums(m > 0) m <- m[(rs >= bounds[1]) & (rs <= bounds[2]), ] } m } TermDocumentMatrix <- function(x, control = list()) UseMethod("TermDocumentMatrix", x) TermDocumentMatrix.SimpleCorpus <- function(x, control = list()) { stopifnot(is.list(control)) txt <- content(x) ## Conversion to lower case if (is.null(control$tolower) || isTRUE(control$tolower)) txt <- tolower(txt) ## Stopword filtering .stopwords <- if (isTRUE(control$stopwords)) stopwords(meta(x, "language")) else if (is.character(control$stopwords)) control$stopwords else character(0) .dictionary <- if (is.null(control$dictionary)) character(0) else control$dictionary ## Ensure local bounds bl <- control$bounds$local min_term_freq <- if (length(bl) == 2L && is.numeric(bl) && bl[1] >= 0) bl[1] else 0L max_term_freq <- if (length(bl) == 2L && is.numeric(bl) && bl[2] >= 0) min(bl[2], .Machine$integer.max) else .Machine$integer.max ## Filter out too short or too long terms wl <- control$wordLengths min_word_length <- if (is.numeric(wl[1]) && wl[1] >= 0) wl[1] else 3L max_word_length <- if (is.numeric(wl[2]) && wl[2] >= 0) min(wl[2], .Machine$integer.max) else .Machine$integer.max m <- tdm(txt, isTRUE(control$removeNumbers), .stopwords, .dictionary, as.integer(min_term_freq), as.integer(max_term_freq), as.integer(min_word_length), as.integer(max_word_length)) m <- .SimpleTripletMatrix(m$i, m$j, m$v, enc2utf8(m$terms), x) ## Stemming ## ## Ideally tdm() could perform stemming as well but there is no easy way to ## access the SnowballC::wordStem() function from C++ (via Rcpp) without ## significant overhead (as SnowballC does not export its internal C ## functions). ## ## Stemming afterwards is still quite performant as we already have all ## terms. However, there is some overhead involved as we need to recheck ## local bounds and word lengths. ## if (isTRUE(control$stemming)) { stems <- as.factor(SnowballC::wordStem(m$dimnames$Terms, meta(x, "language"))) m <- rollup(m, "Terms", stems) ## Recheck local bounds ## No need to check lower local bound as rollup aggregates frequencies m[m > max_term_freq] <- 0 ## Recheck word lengths terms_length <- nchar(rownames(m)) m <- m[min_word_length <= terms_length & terms_length <= max_word_length, ] } m <- filter_global_bounds(m, control$bounds$global) .TermDocumentMatrix(m, control$weighting) } TermDocumentMatrix.PCorpus <- TermDocumentMatrix.VCorpus <- function(x, control = list()) { stopifnot(is.list(control)) tflist <- tm_parLapply(unname(content(x)), termFreq, control) v <- unlist(tflist) i <- names(v) terms <- sort(unique(as.character(if (is.null(control$dictionary)) i else control$dictionary))) i <- match(i, terms) j <- rep.int(seq_along(x), lengths(tflist)) m <- .SimpleTripletMatrix(i, j, as.numeric(v), terms, x) m <- filter_global_bounds(m, control$bounds$global) .TermDocumentMatrix(m, control$weighting) } DocumentTermMatrix <- function(x, control = list()) t(TermDocumentMatrix(x, control)) as.TermDocumentMatrix <- function(x, ...) UseMethod("as.TermDocumentMatrix") as.TermDocumentMatrix.TermDocumentMatrix <- function(x, ...) x as.TermDocumentMatrix.DocumentTermMatrix <- function(x, ...) t(x) as.TermDocumentMatrix.term_frequency <- as.TermDocumentMatrix.textcnt <- function(x, ...) { m <- simple_triplet_matrix(i = seq_along(x), j = rep_len(1L, length(x)), v = as.numeric(x), nrow = length(x), ncol = 1, dimnames = list(Terms = names(x), Docs = NA_character_)) .TermDocumentMatrix(m, weightTf) } as.TermDocumentMatrix.default <- function(x, weighting, ...) .TermDocumentMatrix(x, weighting) as.DocumentTermMatrix <- function(x, ...) UseMethod("as.DocumentTermMatrix") as.DocumentTermMatrix.DocumentTermMatrix <- function(x, ...) x as.DocumentTermMatrix.TermDocumentMatrix <- function(x, ...) t(x) as.DocumentTermMatrix.term_frequency <- as.DocumentTermMatrix.textcnt <- function(x, ...) t(as.TermDocumentMatrix(x)) as.DocumentTermMatrix.default <- function(x, weighting, ...) { x <- as.simple_triplet_matrix(x) t(.TermDocumentMatrix(t(x), weighting)) } t.TermDocumentMatrix <- t.DocumentTermMatrix <- function(x) { m <- NextMethod("t") attr(m, "weighting") <- attr(x, "weighting") class(m) <- if (inherits(x, "DocumentTermMatrix")) TermDocumentMatrix_classes else DocumentTermMatrix_classes m } termFreq <- function(doc, control = list()) { stopifnot(inherits(doc, "TextDocument") || is.character(doc), is.list(control)) ## Tokenize the corpus .tokenize <- control$tokenize if (is.null(.tokenize) || identical(.tokenize, "words")) .tokenize <- words else if (identical(.tokenize, "Boost")) .tokenize <- Boost_tokenizer else if (identical(.tokenize, "MC")) .tokenize <- MC_tokenizer else if (identical(.tokenize, "scan")) .tokenize <- scan_tokenizer else if (is.Span_Tokenizer(.tokenize)) .tokenize <- as.Token_Tokenizer(.tokenize) if (is.function(.tokenize)) txt <- .tokenize(doc) else stop("invalid tokenizer") ## Conversion to lower case .tolower <- control$tolower if (is.null(.tolower) || isTRUE(.tolower)) .tolower <- tolower if (is.function(.tolower)) txt <- .tolower(txt) ## Punctuation removal .removePunctuation <- control$removePunctuation if (isTRUE(.removePunctuation)) .removePunctuation <- removePunctuation else if (is.list(.removePunctuation)) .removePunctuation <- function(x) do.call(removePunctuation, c(list(x), control$removePunctuation)) ## Number removal .removeNumbers <- control$removeNumbers if (isTRUE(.removeNumbers)) .removeNumbers <- removeNumbers .language <- control$language if (inherits(doc, "TextDocument")) .language <- meta(doc, "language") if (is.null(.language)) .language <- "en" ## Stopword filtering .stopwords <- control$stopwords if (isTRUE(.stopwords)) .stopwords <- function(x) x[is.na(match(x, stopwords(.language)))] else if (is.character(.stopwords)) .stopwords <- function(x) x[is.na(match(x, control$stopwords))] ## Stemming .stemming <- control$stemming if (isTRUE(.stemming)) .stemming <- function(x) SnowballC::wordStem(x, .language) ## Default order for options which support reordering or <- c("removePunctuation", "removeNumbers", "stopwords", "stemming") ## Process control options in specified order nc <- names(control) n <- nc[!is.na(match(nc, or))] for (name in sprintf(".%s", c(n, setdiff(or, n)))) { g <- get(name) if (is.function(g)) txt <- g(txt) } ## If dictionary is set tabulate against it dictionary <- control$dictionary tab <- .table(if (is.null(dictionary)) txt else txt[!is.na(match(txt, dictionary))]) ## Ensure local bounds bl <- control$bounds$local if (length(bl) == 2L && is.numeric(bl)) tab <- tab[(tab >= bl[1]) & (tab <= bl[2]), drop = FALSE] ## Filter out too short or too long terms nc <- nchar(names(tab), type = "chars") wl <- control$wordLengths lb <- if (is.numeric(wl[1])) wl[1] else 3 ub <- if (is.numeric(wl[2])) wl[2] else Inf tab <- tab[(nc >= lb) & (nc <= ub), drop = FALSE] class(tab) <- c("term_frequency", class(tab)) tab } print.TermDocumentMatrix <- print.DocumentTermMatrix <- function(x, ...) { format <- c("term", "document") if (inherits(x, "DocumentTermMatrix")) format <- rev(format) writeLines(sprintf("<<%s (%ss: %d, %ss: %d)>>", class(x)[1], format[1L], nrow(x), format[2L], ncol(x))) writeLines(sprintf("Non-/sparse entries: %d/%.0f", length(x$v), prod(dim(x)) - length(x$v))) sparsity <- if (!prod(dim(x))) 100 else round( (1 - length(x$v) / prod(dim(x))) * 100) writeLines(sprintf("Sparsity : %s%%", sparsity)) writeLines(sprintf("Maximal term length: %s", max(nchar(Terms(x), type = "chars"), 0))) writeLines(sprintf("Weighting : %s (%s)", attr(x, "weighting")[1L], attr(x, "weighting")[2L])) invisible(x) } inspect.TermDocumentMatrix <- inspect.DocumentTermMatrix <- function(x) { print(x) cat("Sample :\n") print(as.matrix(sample.TermDocumentMatrix(x))) } `[.TermDocumentMatrix` <- `[.DocumentTermMatrix` <- function(x, i, j, ..., drop) { m <- NextMethod("[") attr(m, "weighting") <- attr(x, "weighting") class(m) <- if (inherits(x, "DocumentTermMatrix")) DocumentTermMatrix_classes else TermDocumentMatrix_classes m } `dimnames<-.DocumentTermMatrix` <- function(x, value) { x <- NextMethod("dimnames<-") dnx <- x$dimnames if (!is.null(dnx)) names(dnx) <- c("Docs", "Terms") x$dimnames <- dnx x } `dimnames<-.TermDocumentMatrix` <- function(x, value) { x <- NextMethod("dimnames<-") dnx <- x$dimnames if (!is.null(dnx)) names(dnx) <- c("Terms", "Docs") x$dimnames <- dnx x } nDocs <- function(x) UseMethod("nDocs") nTerms <- function(x) UseMethod("nTerms") nDocs.DocumentTermMatrix <- nTerms.TermDocumentMatrix <- function(x) x$nrow nDocs.TermDocumentMatrix <- nTerms.DocumentTermMatrix <- function(x) x$ncol Docs <- function(x) UseMethod("Docs") Terms <- function(x) UseMethod("Terms") Docs.DocumentTermMatrix <- Terms.TermDocumentMatrix <- function(x) { s <- x$dimnames[[1L]] if (is.null(s)) s <- rep.int(NA_character_, x$nrow) s } Docs.TermDocumentMatrix <- Terms.DocumentTermMatrix <- function(x) { s <- x$dimnames[[2L]] if (is.null(s)) s <- rep.int(NA_character_, x$ncol) s } c.term_frequency <- function(..., recursive = FALSE) { do.call("c", lapply(list(...), as.TermDocumentMatrix)) } c.TermDocumentMatrix <- function(..., recursive = FALSE) { m <- lapply(list(...), as.TermDocumentMatrix) if (length(m) == 1L) return(m[[1L]]) weighting <- attr(m[[1L]], "weighting") allTermsNonUnique <- unlist(lapply(m, function(x) Terms(x)[x$i])) allTerms <- unique(allTermsNonUnique) allDocs <- unlist(lapply(m, Docs)) cs <- cumsum(lapply(m, nDocs)) cs <- c(0, cs[-length(cs)]) j <- lapply(m, "[[", "j") m <- simple_triplet_matrix(i = match(allTermsNonUnique, allTerms), j = unlist(j) + rep.int(cs, lengths(j)), v = unlist(lapply(m, "[[", "v")), nrow = length(allTerms), ncol = length(allDocs), dimnames = list(Terms = allTerms, Docs = allDocs)) ## ## - We assume that all arguments have the same weighting ## - Even if all matrices have the same input weighting it might be ## necessary to take additional steps (e.g., normalization for tf-idf or ## check for (0,1)-range for binary tf) ## .TermDocumentMatrix(m, weighting) } c.DocumentTermMatrix <- function(..., recursive = FALSE) { t(do.call("c", lapply(list(...), as.TermDocumentMatrix))) } findFreqTerms <- function(x, lowfreq = 0, highfreq = Inf) { stopifnot(inherits(x, c("DocumentTermMatrix", "TermDocumentMatrix")), is.numeric(lowfreq), is.numeric(highfreq)) if (inherits(x, "DocumentTermMatrix")) x <- t(x) rs <- row_sums(x) names(rs[rs >= lowfreq & rs <= highfreq]) } findAssocs <- function(x, terms, corlimit) UseMethod("findAssocs", x) findAssocs.TermDocumentMatrix <- function(x, terms, corlimit) findAssocs(t(x), terms, corlimit) findAssocs.DocumentTermMatrix <- function(x, terms, corlimit) { stopifnot(is.character(terms), is.numeric(corlimit), corlimit >= 0, corlimit <= 1) j <- match(unique(terms), Terms(x), nomatch = 0L) suppressWarnings( findAssocs(crossapply_simple_triplet_matrix(x[, j], x[, -j], cor), terms, rep_len(corlimit, length(terms)))) } findAssocs.matrix <- function(x, terms, corlimit) { stopifnot(is.numeric(x)) i <- match(terms, rownames(x), nomatch = 0L) names(i) <- terms Map(function(i, cl) { xi <- x[i, ] t <- sort(round(xi[which(xi >= cl)], 2), TRUE) if (!length(t)) names(t) <- NULL t }, i, corlimit) } removeSparseTerms <- function(x, sparse) { stopifnot(inherits(x, c("DocumentTermMatrix", "TermDocumentMatrix")), is.numeric(sparse), sparse > 0, sparse < 1) m <- if (inherits(x, "DocumentTermMatrix")) t(x) else x t <- table(m$i) > m$ncol * (1 - sparse) termIndex <- as.numeric(names(t[t])) if (inherits(x, "DocumentTermMatrix")) x[, termIndex] else x[termIndex, ] } sample.TermDocumentMatrix <- function(x, size = 10) { stopifnot(inherits(x, c("DocumentTermMatrix", "TermDocumentMatrix")), is.numeric(size), size >= 0) m <- if (inherits(x, "DocumentTermMatrix")) t(x) else x terms <- sort(names(sort(row_sums(m), decreasing = TRUE) [0:min(size, nTerms(m))])) docs <- sort(names(sort(col_sums(m), decreasing = TRUE) [0:min(size, nDocs(m))])) if (inherits(x, "DocumentTermMatrix")) x[docs, terms] else x[terms, docs] } CategorizedDocumentTermMatrix <- function(x, c) { if (inherits(x, "TermDocumentMatrix")) x <- t(x) else if (!inherits(x, "DocumentTermMatrix")) stop("wrong class") if (length(c) != nDocs(x)) stop("invalid category ids") attr(x, "Category") <- c class(x) <- c("CategorizedDocumentTermMatrix", DocumentTermMatrix_classes) x } findMostFreqTerms <- function(x, n = 6L, ...) UseMethod("findMostFreqTerms") findMostFreqTerms.term_frequency <- function(x, n = 6L, ...) { y <- x[order(x, decreasing = TRUE)[seq_len(n)]] y[y > 0] } findMostFreqTerms.DocumentTermMatrix <- function(x, n = 6L, INDEX = NULL, ...) { terms <- Terms(x) if (!is.null(INDEX)) x <- rollup(x, 1L, INDEX) f <- factor(x$i, seq_len(x$nrow)) js <- split(x$j, f) vs <- split(x$v, f) y <- Map(function(j, v, n) { p <- order(v, decreasing = TRUE)[seq_len(n)] v <- v[p] names(v) <- terms[j[p]] v }, js, vs, pmin(lengths(vs), n)) names(y) <- x$dimnames[[1L]] y } findMostFreqTerms.TermDocumentMatrix <- function(x, n = 6L, INDEX = NULL, ...) { terms <- Terms(x) if (!is.null(INDEX)) x <- rollup(x, 2L, INDEX) f <- factor(x$j, seq_len(x$ncol)) is <- split(x$i, f) vs <- split(x$v, f) y <- Map(function(i, v, n) { p <- order(v, decreasing = TRUE)[seq_len(n)] v <- v[p] names(v) <- terms[i[p]] v }, is, vs, pmin(lengths(vs), n)) names(y) <- x$dimnames[[2L]] y } tm/R/reader.R0000644000175100001440000001630513177046106012510 0ustar hornikusers## Author: Ingo Feinerer ## Readers FunctionGenerator <- function(x) { class(x) <- c("FunctionGenerator", "function") x } getReaders <- function() c("readDataframe", "readDOC", "readPDF", "readPlain", "readRCV1", "readRCV1asPlain", "readReut21578XML", "readReut21578XMLasPlain", "readTagged", "readXML") prepareReader <- function(readerControl, reader = NULL, ...) { if (is.null(readerControl$reader)) readerControl$reader <- reader if (inherits(readerControl$reader, "FunctionGenerator")) readerControl$reader <- readerControl$reader(...) if (is.null(readerControl$language)) readerControl$language <- "en" readerControl } processURI <- function(uri) { uri <- as.character(uri) if (identical(substr(uri, 1, 7), "file://")) uri <- substr(uri, 8, nchar(uri)) uri } readDataframe <- function(elem, language, id) { PlainTextDocument(elem$content[, "text"], id = elem$content[, "doc_id"], language = language) } # readDOC needs antiword installed to be able to extract the text readDOC <- function(engine = c("antiword", "executable"), AntiwordOptions = "") { stopifnot(is.character(engine), is.character(AntiwordOptions)) engine <- match.arg(engine) antiword <- switch(engine, antiword = antiword::antiword, executable = function(x) system2("antiword", c(AntiwordOptions, shQuote(normalizePath(x))), stdout = TRUE)) if (!is.function(antiword)) stop("invalid function for DOC extraction") function(elem, language, id) { uri <- processURI(elem$uri) content <- antiword(uri) PlainTextDocument(content, id = basename(elem$uri), language = language) } } class(readDOC) <- c("FunctionGenerator", "function") readPDF <- function(engine = c("pdftools", "xpdf", "Rpoppler", "ghostscript", "Rcampdf", "custom"), control = list(info = NULL, text = NULL)) { stopifnot(is.character(engine), is.list(control)) engine <- match.arg(engine) pdf_info <- switch(engine, pdftools = function(x) { i <- pdftools::pdf_info(x) c(i$keys, list(CreationDate = i$created)) }, xpdf = function(x) pdf_info_via_xpdf(x, control$info), Rpoppler = Rpoppler::PDF_info, ghostscript = pdf_info_via_gs, Rcampdf = Rcampdf::pdf_info, custom = control$info) pdf_text <- switch(engine, pdftools = pdftools::pdf_text, xpdf = function(x) system2("pdftotext", c(control$text, shQuote(x), "-"), stdout = TRUE), Rpoppler = Rpoppler::PDF_text, ghostscript = pdf_text_via_gs, Rcampdf = Rcampdf::pdf_text, custom = control$text) if (!is.function(pdf_info) || !is.function(pdf_text)) stop("invalid function for PDF extraction") function(elem, language, id) { uri <- processURI(elem$uri) meta <- pdf_info(uri) content <- pdf_text(uri) PlainTextDocument(content, meta$Author, meta$CreationDate, meta$Subject, meta$Title, basename(elem$uri), language, meta$Creator) } } class(readPDF) <- c("FunctionGenerator", "function") readPlain <- function(elem, language, id) { if (!is.null(elem$uri)) id <- basename(elem$uri) PlainTextDocument(elem$content, id = id, language = language) } readXML <- function(spec, doc) { stopifnot(is.list(spec), inherits(doc, "TextDocument")) function(elem, language, id) { content <- elem$content node <- if(inherits(content, "xml_node")) content else if(is.character(content)) read_xml(paste(elem$content, collapse = "\n")) else read_xml(content) content(doc) <- if ("content" %in% names(spec)) .xml_content(node, spec[["content"]]) else node for (n in setdiff(names(spec), "content")) meta(doc, n) <- .xml_content(node, spec[[n]]) if (!is.null(elem$uri)) id <- basename(elem$uri) if (!length(meta(doc, "id"))) meta(doc, "id") <- as.character(id) if (!length(meta(doc, "language"))) meta(doc, "language") <- as.character(language) doc } } class(readXML) <- c("FunctionGenerator", "function") RCV1Spec <- list(author = list("unevaluated", ""), datetimestamp = list("function", function(node) as.POSIXlt(xml_text(xml_find_all(node, "@date")), tz = "GMT")), description = list("unevaluated", ""), heading = list("node", "title"), id = list("node", "@itemid"), origin = list("unevaluated", "Reuters Corpus Volume 1"), publisher = list("node", "metadata/dc[@element='dc.publisher']/@value"), topics = list("node", "metadata/codes[@class='bip:topics:1.0']/code/@code"), industries = list("node", "metadata/codes[@class='bip:industries:1.0']/code/@code"), countries = list("node", "metadata/codes[@class='bip:countries:1.0']/code/@code")) readRCV1 <- readXML(spec = RCV1Spec, doc = XMLTextDocument()) readRCV1asPlain <- readXML(spec = c(RCV1Spec, list(content = list("node", "text"))), doc = PlainTextDocument()) Reut21578XMLSpec <- list(author = list("node", "TEXT/AUTHOR"), datetimestamp = list("function", function(node) strptime(xml_text(xml_find_all(node, "DATE")), format = "%d-%B-%Y %H:%M:%S", tz = "GMT")), description = list("unevaluated", ""), heading = list("node", "TEXT/TITLE"), id = list("node", "@NEWID"), topics = list("node", "@TOPICS"), lewissplit = list("node", "@LEWISSPLIT"), cgisplit = list("node", "@CGISPLIT"), oldid = list("node", "@OLDID"), origin = list("unevaluated", "Reuters-21578 XML"), topics_cat = list("node", "TOPICS/D"), places = list("node", "PLACES/D"), people = list("node", "PEOPLE/D"), orgs = list("node", "ORGS/D"), exchanges = list("node", "EXCHANGES/D")) readReut21578XML <- readXML(spec = Reut21578XMLSpec, doc = XMLTextDocument()) readReut21578XMLasPlain <- readXML(spec = c(Reut21578XMLSpec, list(content = list("node", "TEXT/BODY"))), doc = PlainTextDocument()) readTagged <- function(...) { args <- list(...) function(elem, language, id) { if (!is.null(elem$content)) { con <- textConnection(elem$content) on.exit(close(con)) } else con <- elem$uri if (!is.null(elem$uri)) id <- basename(elem$uri) a <- c(list(con = con, meta = list(id = id, language = language)), args) do.call(TaggedTextDocument, a) } } class(readTagged) <- c("FunctionGenerator", "function") tm/R/tokenizer.R0000644000175100001440000000150513176125210013245 0ustar hornikusersgetTokenizers <- function() c("Boost_tokenizer", "MC_tokenizer", "scan_tokenizer") # http://www.boost.org Boost_tokenizer <- Token_Tokenizer(function(x) enc2utf8(Boost_Tokenizer(as.character(x)))) # http://www.cs.utexas.edu/users/dml/software/mc/ MC_tokenizer <- Token_Tokenizer(function(x) { x <- as.character(x) ASCII_letters <- "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" id <- sprintf("[%s]+", ASCII_letters) http <- sprintf("(http://%s(\\.%s)*)", id, id) email <- sprintf("(%s@%s(\\.%s)*)", id, id, id) http_or_email <- sprintf("%s|%s", http, email) c(unlist(regmatches(x, gregexpr(http_or_email, x))), unlist(strsplit(gsub(http_or_email, "", x), sprintf("[^%s]", ASCII_letters)))) }) scan_tokenizer <- Token_Tokenizer(function(x) .Call(`_tm_scan`, x, 0L)) tm/vignettes/0000755000175100001440000000000013204065716012724 5ustar hornikuserstm/vignettes/extensions.Rnw0000644000175100001440000002727113177024075015626 0ustar hornikusers\documentclass[a4paper]{article} \usepackage[margin=2cm]{geometry} \usepackage[round]{natbib} \usepackage{url} \newcommand{\acronym}[1]{\textsc{#1}} \newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\proglang}[1]{\textsf{#1}} \let\code\texttt %% \VignetteIndexEntry{Extensions} \begin{document} <>= library("tm") library("xml2") @ \title{Extensions\\How to Handle Custom File Formats} \author{Ingo Feinerer} \maketitle \section*{Introduction} The possibility to handle custom file formats is a substantial feature in any modern text mining infrastructure. \pkg{tm} has been designed aware of this aspect from the beginning on, and has modular components which allow for extensions. A general explanation of \pkg{tm}'s extension mechanism is described by~\citet[Sec.~3.3]{Feinerer_etal_2008}, with an updated description as follows. \section*{Sources} A source abstracts input locations and provides uniform methods for access. Each source must provide implementations for following interface functions: \begin{description} \item[close()] closes the source and returns it, \item[eoi()] returns \code{TRUE} if the end of input of the source is reached, \item[getElem()] fetches the element at the current position, \item[length()] gives the number of elements, \item[open()] opens the source and returns it, \item[reader()] returns a default reader for processing elements, \item[pGetElem()] (optional) retrieves all elements in parallel at once, and \item[stepNext()] increases the position in the source to the next element. \end{description} Retrieved elements must be encapsulated in a list with the named components \code{content} holding the document and \code{uri} pointing to the origin of the document (e.g., a file path or a \acronym{URL}; \code{NULL} if not applicable or unavailable). Custom sources are required to inherit from the virtual base class \code{Source} and typically do so by extending the functionality provided by the simple reference implementation \code{SimpleSource}. E.g., a simple source which accepts an \proglang{R} vector as input could be defined as <>= VecSource <- function(x) SimpleSource(length = length(x), content = as.character(x), class = "VecSource") @ which overrides a few defaults (see \code{?SimpleSource} for defaults) and stores the vector in the \code{content} component. The functions \code{close()}, \code{eoi()}, \code{open()}, and \code{stepNext()} have reasonable default methods already for the \code{SimpleSource} class: the identity function for \code{open()} and \code{close()}, incrementing a position counter for \code{stepNext()}, and comparing the current position with the number of available elements as claimed by \code{length()} for \code{eoi()}, respectively. So we only need custom methods for element access: <>= getElem.VecSource <- function(x) list(content = x$content[x$position], uri = NULL) pGetElem.VecSource <- function(x) lapply(x$content, function(y) list(content = y, uri = NULL)) @ \section*{Readers} Readers are functions for extracting textual content and metadata out of elements delivered by a source and for constructing a text document. Each reader must accept following arguments in its signature: \begin{description} \item[elem] a list with the named components \code{content} and \code{uri} (as delivered by a source via \code{getElem()} or \code{pGetElem()}), \item[language] a string giving the language, and \item[id] a character giving a unique identifier for the created text document. \end{description} The element \code{elem} is typically provided by a source whereas the language and the identifier are normally provided by a corpus constructor (for the case that \code{elem\$content} does not give information on these two essential items). In case a reader expects configuration arguments we can use a function generator. A function generator is indicated by inheriting from class \code{FunctionGenerator} and \code{function}. It allows us to process additional arguments, store them in an environment, return a reader function with the well-defined signature described above, and still be able to access the additional arguments via lexical scoping. All corpus constructors in package \pkg{tm} check the reader function for being a function generator and if so apply it to yield the reader with the expected signature. E.g., the reader function \code{readPlain()} is defined as <>= readPlain <- function(elem, language, id) PlainTextDocument(elem$content, id = id, language = language) @ For examples on readers using the function generator please have a look at \code{?readPDF} or \code{?readPDF}. However, for many cases, it is not necessary to define each detailed aspect of how to extend \pkg{tm}. Typical examples are \acronym{XML} files which are very common but can be rather easily handled via standard conforming \acronym{XML} parsers. The aim of the remainder in this document is to give an overview on how simpler, more user-friendly, forms of extension mechanisms can be applied in \pkg{tm}. \section*{Custom Data Formats} A general situation is that you have gathered together some information into a tabular data structure (like a data frame or a list matrix) that suffices to describe documents in a corpus. However, you do not have a distinct file format because you extracted the information out of various resources, e.g., as delivered by \code{readtext()} in package \pkg{readtext}. Now you want to use your information to build a corpus which is recognized by \pkg{tm}. We assume that your information is put together in a data frame. E.g., consider the following example: <>= df <- data.frame(doc_id = c("doc 1" , "doc 2" , "doc 3" ), text = c("content 1", "content 2", "content 3"), title = c("title 1" , "title 2" , "title 3" ), authors = c("author 1" , "author 2" , "author 3" ), topics = c("topic 1" , "topic 2" , "topic 3" ), stringsAsFactors = FALSE) @ We want to map the data frame rows to the relevant entries of a text document. An entry \code{text} in the mapping will be matched to fill the actual content of the text document, \code{doc\_id} will be used as document ID, all other fields will be used as metadata tags. So we can construct a corpus out of the data frame: <<>>= (corpus <- Corpus(DataframeSource(df))) corpus[[1]] meta(corpus[[1]]) @ \section*{Custom XML Sources} Many modern file formats already come in \acronym{XML} format which allows to extract information with any \acronym{XML} conforming parser, e.g., as implemented in \proglang{R} by the \pkg{xml2} package. Now assume we have some custom \acronym{XML} format which we want to access with \pkg{tm}. Then a viable way is to create a custom \acronym{XML} source which can be configured with only a few commands. E.g., have a look at the following example: <>= custom.xml <- system.file("texts", "custom.xml", package = "tm") print(readLines(custom.xml), quote = FALSE) @ As you see there is a top-level tag stating that there is a corpus, and several document tags below. In fact, this structure is very common in \acronym{XML} files found in text mining applications (e.g., both the Reuters-21578 and the Reuters Corpus Volume 1 data sets follow this general scheme). In \pkg{tm} we expect a source to deliver self-contained blocks of information to a reader function, each block containing all information necessary such that the reader can construct a (subclass of a) \code{TextDocument} from it. The \code{XMLSource()} function can now be used to construct a custom \acronym{XML} source. It has three arguments: \begin{description} \item[x] a character giving a uniform resource identifier, \item[parser] a function accepting an \acronym{XML} document (as delivered by \code{read\_xml()} in package \pkg{xml2}) as input and returning a \acronym{XML} elements/nodes (each element/node will then be delivered to the reader as a self-contained block), \item[reader] a reader function capable of turning \acronym{XML} elements/nodes as returned by the parser into a subclass of \code{TextDocument}. \end{description} E.g., a custom source which can cope with our custom \acronym{XML} format could be: <>= mySource <- function(x) XMLSource(x, parser = xml2::xml_children, reader = myXMLReader) @ As you notice in this example we also provide a custom reader function (\code{myXMLReader}). See the next section for details. \section*{Custom XML Readers} As we saw in the previous section we often need a custom reader function to extract information out of \acronym{XML} chunks (typically as delivered by some source). Fortunately, \pkg{tm} provides an easy way to define custom \acronym{XML} reader functions. All you need to do is to provide a so-called \emph{specification}. Let us start with an example which defines a reader function for the file format from the previous section: <>= myXMLReader <- readXML( spec = list(author = list("node", "writer"), content = list("node", "description"), datetimestamp = list("function", function(x) as.POSIXlt(Sys.time(), tz = "GMT")), description = list("node", "@short"), heading = list("node", "caption"), id = list("function", function(x) tempfile()), origin = list("unevaluated", "My private bibliography"), type = list("node", "type")), doc = PlainTextDocument()) @ Formally, \code{readXML()} is the relevant function which constructs an reader. The customization is done via the first argument \code{spec}, the second provides an empty instance of the document which should be returned (augmented with the extracted information out of the \acronym{XML} chunks). The specification must consist of a named list of lists each containing two character vectors. The constructed reader will map each list entry to the content or a metadatum of the text document as specified by the named list entry. Valid names include \code{content} to access the document's content, and character strings which are mapped to metadata entries. Each list entry must consist of two character vectors: the first describes the type of the second argument, and the second is the specification entry. Valid combinations are: \begin{description} \item[\code{type = "node", spec = "XPathExpression"}] the XPath (1.0) expression \code{spec} extracts information out of an \acronym{XML} node (as seen for \code{author}, \code{content}, \code{description}, \code{heading}, and \code{type} in our example specification). \item[\code{type = "function", spec = function(doc) \ldots}] The function \code{spec} is called, passing over the \acronym{XML} document (as delivered by \code{read\_xml()} from package \pkg{xml2}) as first argument (as seen for \code{datetimestamp} and \code{id}). As you notice in our example nobody forces us to actually use the passed over document, instead we can do anything we want (e.g., create a unique character vector via \code{tempfile()} to have a unique identification string). \item[\code{type = "unevaluated", spec = "String"}] the character vector \code{spec} is returned without modification (e.g., \code{origin} in our specification). \end{description} Now that we have all we need to cope with our custom file format, we can apply the source and reader function at any place in \pkg{tm} where a source or reader is expected, respectively. E.g., <<>>= corpus <- VCorpus(mySource(custom.xml)) @ constructs a corpus out of the information in our \acronym{XML} file: <<>>= corpus[[1]] meta(corpus[[1]]) @ \bibliographystyle{abbrvnat} \bibliography{references} \end{document} tm/vignettes/tm.Rnw0000644000175100001440000003350513155253051014036 0ustar hornikusers\documentclass[a4paper]{article} \usepackage[margin=2cm]{geometry} \usepackage[utf8]{inputenc} \usepackage[round]{natbib} \usepackage{url} \newcommand{\acronym}[1]{\textsc{#1}} \newcommand{\class}[1]{\mbox{\textsf{#1}}} \newcommand{\code}[1]{\mbox{\texttt{#1}}} \newcommand{\pkg}[1]{{\normalfont\fontseries{b}\selectfont #1}} \newcommand{\proglang}[1]{\textsf{#1}} %% \VignetteIndexEntry{Introduction to the tm Package} \begin{document} <>= library("tm") data("crude") @ \title{Introduction to the \pkg{tm} Package\\Text Mining in \proglang{R}} \author{Ingo Feinerer} \maketitle \section*{Introduction} This vignette gives a short introduction to text mining in \proglang{R} utilizing the text mining framework provided by the \pkg{tm} package. We present methods for data import, corpus handling, preprocessing, metadata management, and creation of term-document matrices. Our focus is on the main aspects of getting started with text mining in \proglang{R}---an in-depth description of the text mining infrastructure offered by \pkg{tm} was published in the \emph{Journal of Statistical Software}~\citep{Feinerer_etal_2008}. An introductory article on text mining in \proglang{R} was published in \emph{R News}~\citep{Rnews:Feinerer:2008}. \section*{Data Import} The main structure for managing documents in \pkg{tm} is a so-called \class{Corpus}, representing a collection of text documents. A corpus is an abstract concept, and there can exist several implementations in parallel. The default implementation is the so-called \class{VCorpus} (short for \emph{Volatile Corpus}) which realizes a semantics as known from most \proglang{R} objects: corpora are \proglang{R} objects held fully in memory. We denote this as volatile since once the \proglang{R} object is destroyed, the whole corpus is gone. Such a volatile corpus can be created via the constructor \code{VCorpus(x, readerControl)}. Another implementation is the \class{PCorpus} which implements a \emph{Permanent Corpus} semantics, i.e., the documents are physically stored outside of \proglang{R} (e.g., in a database), corresponding \proglang{R} objects are basically only pointers to external structures, and changes to the underlying corpus are reflected to all \proglang{R} objects associated with it. Compared to the volatile corpus the corpus encapsulated by a permanent corpus object is not destroyed if the corresponding \proglang{R} object is released. Within the corpus constructor, \code{x} must be a \class{Source} object which abstracts the input location. \pkg{tm} provides a set of predefined sources, e.g., \class{DirSource}, \class{VectorSource}, or \class{DataframeSource}, which handle a directory, a vector interpreting each component as document, or data frame like structures (like \acronym{CSV} files), respectively. Except \class{DirSource}, which is designed solely for directories on a file system, and \class{VectorSource}, which only accepts (character) vectors, most other implemented sources can take connections as input (a character string is interpreted as file path). \code{getSources()} lists available sources, and users can create their own sources. The second argument \code{readerControl} of the corpus constructor has to be a list with the named components \code{reader} and \code{language}. The first component \code{reader} constructs a text document from elements delivered by a source. The \pkg{tm} package ships with several readers (e.g., \code{readPlain()}, \code{readPDF()}, \code{readDOC()}, \ldots). See \code{getReaders()} for an up-to-date list of available readers. Each source has a default reader which can be overridden. E.g., for \code{DirSource} the default just reads in the input files and interprets their content as text. Finally, the second component \code{language} sets the texts' language (preferably using \acronym{ISO} 639-2 codes). In case of a permanent corpus, a third argument \code{dbControl} has to be a list with the named components \code{dbName} giving the filename holding the sourced out objects (i.e., the database), and \code{dbType} holding a valid database type as supported by package \pkg{filehash}. Activated database support reduces the memory demand, however, access gets slower since each operation is limited by the hard disk's read and write capabilities. So e.g., plain text files in the directory \code{txt} containing Latin (\code{lat}) texts by the Roman poet \emph{Ovid} can be read in with following code: <>= txt <- system.file("texts", "txt", package = "tm") (ovid <- VCorpus(DirSource(txt, encoding = "UTF-8"), readerControl = list(language = "lat"))) @ For simple examples \code{VectorSource} is quite useful, as it can create a corpus from character vectors, e.g.: <>= docs <- c("This is a text.", "This another one.") VCorpus(VectorSource(docs)) @ Finally we create a corpus for some Reuters documents as example for later use: <>= reut21578 <- system.file("texts", "crude", package = "tm") reuters <- VCorpus(DirSource(reut21578, mode = "binary"), readerControl = list(reader = readReut21578XMLasPlain)) @ \section*{Data Export} For the case you have created a corpus via manipulating other objects in \proglang{R}, thus do not have the texts already stored on a hard disk, and want to save the text documents to disk, you can simply use \code{writeCorpus()} <>= writeCorpus(ovid) @ which writes a character representation of the documents in a corpus to multiple files on disk. \section*{Inspecting Corpora} Custom \code{print()} methods are available which hide the raw amount of information (consider a corpus could consist of several thousand documents, like a database). \code{print()} gives a concise overview whereas more details are displayed with \code{inspect()}. <<>>= inspect(ovid[1:2]) @ Individual documents can be accessed via \code{[[}, either via the position in the corpus, or via their identifier. <>= meta(ovid[[2]], "id") identical(ovid[[2]], ovid[["ovid_2.txt"]]) @ A character representation of a document is available via \code{as.character()} which is also used when inspecting a document: <>= inspect(ovid[[2]]) lapply(ovid[1:2], as.character) @ \section*{Transformations} Once we have a corpus we typically want to modify the documents in it, e.g., stemming, stopword removal, et cetera. In \pkg{tm}, all this functionality is subsumed into the concept of a \emph{transformation}. Transformations are done via the \code{tm\_map()} function which applies (maps) a function to all elements of the corpus. Basically, all transformations work on single text documents and \code{tm\_map()} just applies them to all documents in a corpus. \subsection*{Eliminating Extra Whitespace} Extra whitespace is eliminated by: <<>>= reuters <- tm_map(reuters, stripWhitespace) @ \subsection*{Convert to Lower Case} Conversion to lower case by: <<>>= reuters <- tm_map(reuters, content_transformer(tolower)) @ We can use arbitrary character processing functions as transformations as long as the function returns a text document. In this case we use \code{content\_transformer()} which provides a convenience wrapper to access and set the content of a document. Consequently most text manipulation functions from base \proglang{R} can directly be used with this wrapper. This works for \code{tolower()} as used here but also e.g.\ for \code{gsub()} which comes quite handy for a broad range of text manipulation tasks. \subsection*{Remove Stopwords} Removal of stopwords by: <>= reuters <- tm_map(reuters, removeWords, stopwords("english")) @ \subsection*{Stemming} Stemming is done by: <>= tm_map(reuters, stemDocument) @ \section*{Filters} Often it is of special interest to filter out documents satisfying given properties. For this purpose the function \code{tm\_filter} is designed. It is possible to write custom filter functions which get applied to each document in the corpus. Alternatively, we can create indices based on selections and subset the corpus with them. E.g., the following statement filters out those documents having an \code{ID} equal to \code{"237"} and the string \code{"INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE"} as their heading. <<>>= idx <- meta(reuters, "id") == '237' & meta(reuters, "heading") == 'INDONESIA SEEN AT CROSSROADS OVER ECONOMIC CHANGE' reuters[idx] @ \section*{Metadata Management} Metadata is used to annotate text documents or whole corpora with additional information. The easiest way to accomplish this with \pkg{tm} is to use the \code{meta()} function. A text document has a few predefined attributes like \code{author} but can be extended with an arbitrary number of additional user-defined metadata tags. These additional metadata tags are individually attached to a single text document. From a corpus perspective these metadata attachments are locally stored together with each individual text document. Alternatively to \code{meta()} the function \code{DublinCore()} provides a full mapping between Simple Dublin Core metadata and \pkg{tm} metadata structures and can be similarly used to get and set metadata information for text documents, e.g.: <>= DublinCore(crude[[1]], "Creator") <- "Ano Nymous" meta(crude[[1]]) @ For corpora the story is a bit more sophisticated. Corpora in \pkg{tm} have two types of metadata: one is the metadata on the corpus level (\code{corpus}), the other is the metadata related to the individual documents (\code{indexed}) in form of a data frame. The latter is often done for performance reasons (hence the named \code{indexed} for indexing) or because the metadata has an own entity but still relates directly to individual text documents, e.g., a classification result; the classifications directly relate to the documents but the set of classification levels forms an own entity. Both cases can be handled with \code{meta()}: <<>>= meta(crude, tag = "test", type = "corpus") <- "test meta" meta(crude, type = "corpus") meta(crude, "foo") <- letters[1:20] meta(crude) @ \section*{Standard Operators and Functions} Many standard operators and functions (\code{[}, \code{[<-}, \code{[[}, \code{[[<-}, \code{c()}, \code{lapply()}) are available for corpora with semantics similar to standard \proglang{R} routines. E.g., \code{c()} concatenates two (or more) corpora. Applied to several text documents it returns a corpus. The metadata is automatically updated, if corpora are concatenated (i.e., merged). \section*{Creating Term-Document Matrices} A common approach in text mining is to create a term-document matrix from a corpus. In the \pkg{tm} package the classes \class{TermDocumentMatrix} and \class{DocumentTermMatrix} (depending on whether you want terms as rows and documents as columns, or vice versa) employ sparse matrices for corpora. Inspecting a term-document matrix displays a sample, whereas \code{as.matrix()} yields the full matrix in dense format (which can be very memory consuming for large matrices). <<>>= dtm <- DocumentTermMatrix(reuters) inspect(dtm) @ \section*{Operations on Term-Document Matrices} Besides the fact that on this matrix a huge amount of \proglang{R} functions (like clustering, classifications, etc.) can be applied, this package brings some shortcuts. Imagine we want to find those terms that occur at least five times, then we can use the \code{findFreqTerms()} function: <<>>= findFreqTerms(dtm, 5) @ Or we want to find associations (i.e., terms which correlate) with at least $0.8$ correlation for the term \code{opec}, then we use \code{findAssocs()}: <<>>= findAssocs(dtm, "opec", 0.8) @ Term-document matrices tend to get very big already for normal sized data sets. Therefore we provide a method to remove \emph{sparse} terms, i.e., terms occurring only in very few documents. Normally, this reduces the matrix dramatically without losing significant relations inherent to the matrix: <<>>= inspect(removeSparseTerms(dtm, 0.4)) @ This function call removes those terms which have at least a 40 percentage of sparse (i.e., terms occurring 0 times in a document) elements. \section*{Dictionary} A dictionary is a (multi-)set of strings. It is often used to denote relevant terms in text mining. We represent a dictionary with a character vector which may be passed to the \code{DocumentTermMatrix()} constructor as a control argument. Then the created matrix is tabulated against the dictionary, i.e., only terms from the dictionary appear in the matrix. This allows to restrict the dimension of the matrix a priori and to focus on specific terms for distinct text mining contexts, e.g., <<>>= inspect(DocumentTermMatrix(reuters, list(dictionary = c("prices", "crude", "oil")))) @ \section*{Performance} Often you do not need all the generality, modularity and full range of features offered by \pkg{tm} as this sometimes comes at the price of performance. \class{SimpleCorpus} provides a corpus which is optimized for the most common usage scenario: importing plain texts from files in a directory or directly from a vector in \proglang{R}, preprocessing and transforming the texts, and finally exporting them to a term-document matrix. The aim is to boost performance and minimize memory pressure. It loads all documents into memory, and is designed for medium-sized to large data sets. However, it operates only under the following contraints: \begin{itemize} \item only \code{DirSource} and \code{VectorSource} are supported, \item no custom readers, i.e., each document is read in and stored as plain text (as a string, i.e., a character vector of length one), \item transformations applied via \code{tm\_map} must be able to process strings and return strings, \item no lazy transformations in \code{tm\_map}, \item no meta data for individual documents (i.e., no \code{"local"} in \code{meta()}). \end{itemize} \bibliographystyle{abbrvnat} \bibliography{references} \end{document} tm/vignettes/references.bib0000644000175100001440000000131311704521032015510 0ustar hornikusers@Article{Feinerer_etal_2008, author = {Ingo Feinerer and Kurt Hornik and David Meyer}, title = {Text Mining Infrastructure in {R}}, journal = {Journal of Statistical Software}, volume = 25, number = 5, pages = {1--54}, month = {March}, year = 2008, issn = {1548-7660}, coden = {JSSOBK}, url = {http://www.jstatsoft.org/v25/i05} } @Article{Rnews:Feinerer:2008, author = {Ingo Feinerer}, title = {An Introduction to Text Mining in {R}}, journal = {R News}, year = 2008, volume = 8, number = 2, pages = {19--22}, month = oct, url = {http://CRAN.R-project.org/doc/Rnews/}, pdf = {http://CRAN.R-project.org/doc/Rnews/Rnews_2008-2.pdf} } tm/MD50000644000175100001440000002625013204066220011220 0ustar hornikusersa0f88c7b6fb630f3505bc8ad42782a37 *DESCRIPTION 98ace665611aa6c2c6a431bb55cb5896 *NAMESPACE 5e8b4bb98870e34f2acdf214cb680794 *R/RcppExports.R 510a666ecf5fd65f67753e356193c9c0 *R/complete.R fe2db16e30315af31a01704b60c718f5 *R/corpus.R c1ac8a79992c42d3ec695df39b8c3bc9 *R/doc.R 203f9e89ba00c1991233a389aa5577a6 *R/filter.R b205235d27368949ee5ea0dd3a10b9d7 *R/foreign.R cb5367e831c1be819b9773304985724a *R/hpc.R 5caa4b8bbd9f0f2e4b09f8af8f1a6f67 *R/matrix.R c36f8ed69c326c2b027a670d2662e1d1 *R/meta.R 07d1407f6cfdbdbb6060ebfb11f97f6f *R/pdftools.R b9cd19804a89de8eca51394726256e68 *R/plot.R fd701389b291a843584167ab7385c453 *R/reader.R 5f6ff8b218e7679919b85230b11cdebb *R/score.R 25c6557d93d9ea9669a862ed6c69e9e1 *R/source.R dee7e0a8b245fd670436a019c54d904c *R/stopwords.R b2ec91910424e29c63d965419f177d2d *R/tokenizer.R 91f2faafc7c4f906bd9769ef2a97456f *R/transform.R 18a653f3d8c64fa2cba3e694972c1602 *R/utils.R c1de3acc3bc1bc9f64926b93c3be8301 *R/weight.R 188a363c8643f66c4270541fdc33e463 *build/vignette.rds 2461e30d7a1ca974ed3be4c174f986af *data/acq.rda 064f6ec0568118eee67e3140c332cd18 *data/crude.rda 1393d9426c77569eca1c952136019d84 *inst/CITATION fcf0ec32e555a6f0d8a69792e42256a6 *inst/NEWS.Rd ad6a6fe44b80541732690af3f36a4c32 *inst/doc/extensions.R d194109d976d7f242e64a8eab85026f8 *inst/doc/extensions.Rnw 0b178f9a6b6ca8691a12f500e080d4a2 *inst/doc/extensions.pdf 75f7ec6a9d04ef4326384185df892cad *inst/doc/tm.R e71ae9442d42f286eefa9d77a171c807 *inst/doc/tm.Rnw f87aacba1d209f606493002ef9d564d3 *inst/doc/tm.pdf 98f3b5f3d1f670032af4131a627c18d7 *inst/ghostscript/pdf_info.ps 7ec7b5de9c642afedf1159021c89f12a *inst/stopwords/SMART.dat 4c8fb2c1404c10540c267425fcc005f0 *inst/stopwords/catalan.dat 4e8d44fa90d87908846a2d92c2618b31 *inst/stopwords/danish.dat a638b876d5cbec644685d12d452a7407 *inst/stopwords/dutch.dat e181651a30ec45694b7fafc787f357dc *inst/stopwords/english.dat 1094269bf20052a5259983e23c69a552 *inst/stopwords/finnish.dat 29772f7c7dacf306981ad50c5484c4ad *inst/stopwords/french.dat 4a562db64979f200804127c3751a6efa *inst/stopwords/german.dat 1e1f45e67297e049bb22527d7efa8025 *inst/stopwords/hungarian.dat 7dfee49b4660f65f7bb935bef0c773bd *inst/stopwords/italian.dat 4cd3ddc90492cc5a3cbb9f0292d3844d *inst/stopwords/norwegian.dat d3483742365aa7d477512fd1810452c5 *inst/stopwords/portuguese.dat f6a262767ae1863b9e8cc92f78e3bb01 *inst/stopwords/romanian.dat 4bf4046fe7701b4940b8eb2c86f19c08 *inst/stopwords/russian.dat fddb7f14207d2649597b36e22b5eab18 *inst/stopwords/spanish.dat d3930c86664d4112ae772285dca85fd6 *inst/stopwords/swedish.dat 4dc7bdaa3323e71845cf4c018e871048 *inst/texts/acq/reut-00001.xml a63b803ca46191dc3a30eda875d95136 *inst/texts/acq/reut-00002.xml 7638d681bcb7d2f3539b8be8a454dff9 *inst/texts/acq/reut-00003.xml f822ea4bdb0691950284856b51c87e41 *inst/texts/acq/reut-00004.xml 1f8f1f8699bb3883748fa29807477a55 *inst/texts/acq/reut-00005.xml f44aa9f0b51556f382cf8a91d7f36244 *inst/texts/acq/reut-00006.xml e0d5ea56a8f42146f5b7d3735da730dc *inst/texts/acq/reut-00007.xml b7560c91c1f18e919d7548d9d1b59843 *inst/texts/acq/reut-00008.xml 6b2913f0f666d7f84dd38ac05b326726 *inst/texts/acq/reut-00009.xml 5625c064bfff14db909a25a6719dc3f8 *inst/texts/acq/reut-00010.xml 047f38558920a11ebaeab94727465e58 *inst/texts/acq/reut-00011.xml eb26151fa8a7fcd2c87065b0ad8f0924 *inst/texts/acq/reut-00012.xml abdbeb14424b6f5994674e604a0a5590 *inst/texts/acq/reut-00013.xml 05b945b892bbb8d575c6ff6193bb17b8 *inst/texts/acq/reut-00014.xml e5159c22413cae49c015a631df3a74e2 *inst/texts/acq/reut-00015.xml cd87fc59bfcbe37c847bd1548537effa *inst/texts/acq/reut-00016.xml 75ec08b1337a6035d553f8344ece2c2a *inst/texts/acq/reut-00017.xml 908e51c4b6f9f4e65805adef7029c884 *inst/texts/acq/reut-00018.xml e67944c5bb9ef8e0fe811b1ead21199b *inst/texts/acq/reut-00020.xml 1d19206cd4478bfc03bc9335316f6816 *inst/texts/acq/reut-00021.xml 621a7e8ba27aac9b8040adc7fc1d11f9 *inst/texts/acq/reut-00022.xml 736bff1fabc3f07b35cd992e8630ed90 *inst/texts/acq/reut-00023.xml da2ddc7ac585134cb7fe80e812d3ac80 *inst/texts/acq/reut-00024.xml a04162294ae6ae69f3d1a74f0ad0b9b1 *inst/texts/acq/reut-00025.xml 5e757cb13baa266c292da3ff010f1434 *inst/texts/acq/reut-00026.xml 7974dd802d4ca66b7f7f51c355c8e558 *inst/texts/acq/reut-00027.xml 62368bea00c9a71f01293060708fc6a4 *inst/texts/acq/reut-00028.xml 7e06015b7518b608148002364989c4f7 *inst/texts/acq/reut-00029.xml f24469e27c9f16266db0e141892e97d1 *inst/texts/acq/reut-00030.xml acc36dbfdffe0362d39975db07569b85 *inst/texts/acq/reut-00031.xml 7e342636219116a2d428e2188b1dcb0b *inst/texts/acq/reut-00032.xml c40ce905c6896410a672bee72f132b46 *inst/texts/acq/reut-00034.xml ead5a03af44fb5cf4e896f039a122e4b *inst/texts/acq/reut-00035.xml 684ddc28a9bb0fbb6f49fa412b54231d *inst/texts/acq/reut-00036.xml 1be33a6347aa406b843132da98286506 *inst/texts/acq/reut-00039.xml 1bdf38586ab43a0f6996d3135ff1f48c *inst/texts/acq/reut-00040.xml b89e5d9aeba1b0e02cf3bf3fa729e346 *inst/texts/acq/reut-00042.xml 7c3703135baad41765ad1f58fcab0ba5 *inst/texts/acq/reut-00043.xml d5ab6f6dfe5fefb25422b258bcd339d0 *inst/texts/acq/reut-00045.xml 1af51ea6ba1898d33a84b680c1fa4d09 *inst/texts/acq/reut-00046.xml cb00fc7833f2eb9e3ac97c12d900dd4f *inst/texts/acq/reut-00047.xml e5b440d419fa528d4c996cd47e88c0b4 *inst/texts/acq/reut-00048.xml 4ed77929b16a0c6f3264272183b6c951 *inst/texts/acq/reut-00049.xml 7f6df11fcb6617c253921861e217c3c6 *inst/texts/acq/reut-00050.xml ba0a88d8b9caaa0d0fa8bba01bf2a9d9 *inst/texts/acq/reut-00051.xml c8b4ee7875ddba1c1d2886c3e32a7cb6 *inst/texts/acq/reut-00052.xml b0e4f9f398ba4e2ab847e1dc44c2594e *inst/texts/acq/reut-00053.xml ea25a8bf959fe2769e578474d5f0176f *inst/texts/acq/reut-00054.xml 574a5170c695ad0bbc91055ef8fdd2e9 *inst/texts/acq/reut-00055.xml 66cf87f5587906604d96c3f64ab77a9b *inst/texts/acq/reut-00056.xml e1c26b346a6683c393b2f420593b02e5 *inst/texts/crude/reut-00001.xml 401049764894ad7b37be02cee2e926f6 *inst/texts/crude/reut-00002.xml 15a57b39a4172799d7926c440548b1fd *inst/texts/crude/reut-00004.xml 95474b7494ce4835ed952374601f921e *inst/texts/crude/reut-00005.xml e91c3ec329c1f82fc27ea79d33650d32 *inst/texts/crude/reut-00006.xml 5344713574482c3d393766422bd72498 *inst/texts/crude/reut-00007.xml 5803359fee327a77342d4d16bc467271 *inst/texts/crude/reut-00008.xml c0f88331bbf3da5ec273838ac832e7fa *inst/texts/crude/reut-00009.xml ed3994f50fa16217a6c62dfae5909a03 *inst/texts/crude/reut-00010.xml c74f1b54db67c730bcc117536903dc52 *inst/texts/crude/reut-00011.xml 32cf0da1d923fd2aee4fe28200047c3b *inst/texts/crude/reut-00012.xml 42f6d47f40304ddc482e62bf1d1c3c21 *inst/texts/crude/reut-00013.xml 51565e0b464e626cf1db1d812642e295 *inst/texts/crude/reut-00014.xml 8b107465269cd463e8d7deb470423dda *inst/texts/crude/reut-00015.xml 6b69f531b6953be522a58b0456820e04 *inst/texts/crude/reut-00016.xml 5deaf389a9067a5b6090c13195c0d254 *inst/texts/crude/reut-00018.xml 9e745c906a03765fb0b364ae78bbdcd5 *inst/texts/crude/reut-00019.xml 488f96e28466feeac3175f57724a1f8e *inst/texts/crude/reut-00021.xml da9f871a845a256e2c12ace2a2e2fb36 *inst/texts/crude/reut-00022.xml 2439e7823a1ff6403efd3108fa5ecc45 *inst/texts/crude/reut-00023.xml 7d9482d1fc4a624492dacf584a940b4c *inst/texts/custom.xml 717801d47bc20af5d69340eee342ce21 *inst/texts/loremipsum.txt e76c36aad136268277f2c036dc1c37cd *inst/texts/rcv1_2330.xml eda82aaa0c873d62be4905cb32dedb05 *inst/texts/reuters-21578.xml 5901120140c757daf5f21fba990e2bbe *inst/texts/txt/ovid_1.txt 2b5dc16305207ed29df7bbe0cc47abee *inst/texts/txt/ovid_2.txt 08197bca339b621d395220bd7ab719a7 *inst/texts/txt/ovid_3.txt 832ea34c305426cc653701df40750edf *inst/texts/txt/ovid_4.txt 3b3cb14d62de578684d6c59fa6dcba60 *inst/texts/txt/ovid_5.txt d44474e05cd96e80932106e24ed572a1 *man/Corpus.Rd bfc8d3be010d917643ffc4ff14c6778c *man/DataframeSource.Rd 1c104e63fd71cd63ad6e0da3669fbdf5 *man/DirSource.Rd 5871b5f9883ba4359e269bbfca27db37 *man/Docs.Rd ed2a81a46a4c6d22606272d51711b147 *man/PCorpus.Rd cbdbb32bebfa34b97869d4734b0c816c *man/PlainTextDocument.Rd f1c465f51d627af46612833ffcc17f59 *man/Reader.Rd b4d2dcdc0c2b16f38561637956a7a328 *man/SimpleCorpus.Rd 79170405ed1af7434fbfa37adebd56f7 *man/Source.Rd 0874f71fccd7c7d141f46f405b1ae105 *man/TextDocument.Rd c82a889b500268683904a4ad7fc9d3b1 *man/URISource.Rd 7c84cd5a42cdac47a1b0301e2b6459a6 *man/VCorpus.Rd 3fb4034c6df0b6277f07a028a958b932 *man/VectorSource.Rd 5a32dfd6e72da8d3c8569803d6761126 *man/WeightFunction.Rd 0b79ee972dac094d6f0ed9c1f4d2685f *man/XMLSource.Rd 18ad8795900f380383d1d970b7fbd31e *man/XMLTextDocument.Rd 2d25fcd9863b4ac7128c1d2a521e27f2 *man/ZipSource.Rd aa35e738196c054b9928a70c2178eef6 *man/Zipf_n_Heaps.Rd 0d4a3658b54b335bc8fd96d38b28cd94 *man/acq.Rd aa36762f11d31e840ba6115b9b913341 *man/combine.Rd 0f0ed4b165a6c3744b83c69abf59c7a9 *man/content_transformer.Rd 00d46e7ddb4fc8f5f1d03d8a632d9415 *man/crude.Rd f30ebc7d2c9ad750ef0e6037d1669827 *man/findAssocs.Rd 74d7ea8ee4c4ac46492bbc3b52a10dca *man/findFreqTerms.Rd 36e135250b446bbd0e677115bcf1a82a *man/findMostFreqTerms.Rd acc78a2ea2ca4ac0fe5194db189282ff *man/foreign.Rd be785d88b0821a06be0b4772868dc37c *man/getTokenizers.Rd 9ad9e3d7afb9815f04529a435f430a53 *man/getTransformations.Rd 5ccda6cc10a0093f08a7f707e5bac380 *man/hpc.Rd 6a72cef1df5795bb189bd1a0177e5d4d *man/inspect.Rd 1fbf7e471e7ed3472c91a014a4d40cba *man/matrix.Rd 33d7c410f4690b95bfe78b864f504b06 *man/meta.Rd a90444b9479d7cf70c0c07b5806d7aac *man/plot.Rd da970e1fa5602d3f934de219b681c61d *man/readDOC.Rd 13b3964279323a7d94ccab25ca7afaef *man/readDataframe.Rd 4ae0665813807e28f1b2e62752526e34 *man/readPDF.Rd d625f0434c021f98e4529ce1427703cf *man/readPlain.Rd 6da3dff5477fb944c5534f0c1fca32df *man/readRCV1.Rd 875a172667cb858efc9df4310362d1fd *man/readReut21578XML.Rd ec13c14161ee1c95f89ce75237aa3df7 *man/readTagged.Rd ce6a6feb64dd79693b7ceba7bdb4c6a0 *man/readXML.Rd 6425d73a8f5bea5db8903b2125dee10d *man/removeNumbers.Rd 3ac2234e01b41546a680622438dbde7e *man/removePunctuation.Rd ef0d87508b367cdd71f066244605407e *man/removeSparseTerms.Rd 2484a54292458f80e26f2956fc5d7501 *man/removeWords.Rd 5bdcaccf0076e98a2341078e61c59be5 *man/stemCompletion.Rd ce3570d40ff709d339fbe5ba16385607 *man/stemDocument.Rd 9d1c5303c355c85ac1923993e5fa73b2 *man/stopwords.Rd 15b8549fd381105839451d9b15c7efa3 *man/stripWhitespace.Rd a57bca15339139bb301c1034d1fc543a *man/termFreq.Rd 1c2142e8706936eff122b7a2c0717782 *man/tm_filter.Rd 29e0ffff4b61d1422fe7964e053a85bf *man/tm_map.Rd 6eb083c9b6f1b08700065fd58bf1f8be *man/tm_reduce.Rd 458b061071b9b320951c3b48adf16264 *man/tm_term_score.Rd 5b257009e1d5b3b928dd0283268e7a13 *man/tokenizer.Rd 47bc8704437b53709120add15f205be0 *man/weightBin.Rd abe06433d8438326d1e03c8367312a59 *man/weightSMART.Rd 4e7d2dd30d4de494ba122cd3aff128ee *man/weightTf.Rd 88fbb7eda2e788887e1fe67cb7fd0855 *man/weightTfIdf.Rd 193b23f2d16e20a4944846725eebd155 *man/writeCorpus.Rd eed5d9ebacbdfb2831d1b194bdb34ebb *src/RcppExports.cpp bb554fd4dbfc74bad2ddb2abc7a08fdd *src/copy.c c808ad6ae870847ef123925bdfc44ae4 *src/init.c e8da9acc334a0411e1e33edff0f76ec6 *src/remove.c 51c1a9a7ab86f80ef98e1b4ef9c665ef *src/scan.c b3718cfabc91e3a803fa566fcc8f76b8 *src/tdm.cpp c4629b65d20fb20a2e7179934ccce720 *src/tokenizer.cpp f280e050264388e7c120d4869357efb7 *tests/testthat.R 7987b16eeb87d6c4e9787b85e5b764a4 *tests/testthat/test-Source.R 9aaa445dfcdaf1ed1cbbc282495f593e *tests/testthat/test-TermDocumentMatrix.R d194109d976d7f242e64a8eab85026f8 *vignettes/extensions.Rnw 3641da272a48168ad7b4ffef9fbf7d21 *vignettes/references.bib e71ae9442d42f286eefa9d77a171c807 *vignettes/tm.Rnw tm/build/0000755000175100001440000000000013204065716012013 5ustar hornikuserstm/build/vignette.rds0000644000175100001440000000034513204065716014354 0ustar hornikusers}P0P/+x1ƃV%JK` z-Lݝtg҃3ˡjhͩtf o)DHQ;QjŴA]h5X e|Hg1<cKvΥ9[>oto=kB/]x\ ~ Wz2 wF,c6QɞT꺮EW^Db<8PC&tm/DESCRIPTION0000644000175100001440000000236313204066220012415 0ustar hornikusersPackage: tm Title: Text Mining Package Version: 0.7-2 Date: 2017-11-17 Authors@R: c(person("Ingo", "Feinerer", role = c("aut", "cre"), email = "feinerer@logic.at"), person("Kurt", "Hornik", role = "aut", comment = c(ORCID = "0000-0003-4198-9911")), person("Artifex Software, Inc.", role = c("ctb", "cph"), comment = "pdf_info.ps taken from GPL Ghostscript")) Depends: R (>= 3.2.0), NLP (>= 0.1-6.2) Imports: Rcpp, parallel, slam (>= 0.1-37), stats, tools, utils, graphics, xml2 LinkingTo: BH, Rcpp Suggests: antiword, filehash, methods, pdftools, Rcampdf, Rgraphviz, Rpoppler, SnowballC, testthat, tm.lexicon.GeneralInquirer SystemRequirements: C++11 Description: A framework for text mining applications within R. License: GPL-3 URL: http://tm.r-forge.r-project.org/ Additional_repositories: http://datacube.wu.ac.at NeedsCompilation: yes Packaged: 2017-11-18 17:20:14 UTC; hornik Author: Ingo Feinerer [aut, cre], Kurt Hornik [aut] (), Artifex Software, Inc. [ctb, cph] (pdf_info.ps taken from GPL Ghostscript) Maintainer: Ingo Feinerer Repository: CRAN Date/Publication: 2017-11-18 17:23:28 UTC tm/man/0000755000175100001440000000000013176776017011502 5ustar hornikuserstm/man/meta.Rd0000644000175100001440000000633113176776017012722 0ustar hornikusers\name{meta} \alias{DublinCore} \alias{DublinCore<-} \alias{meta} \alias{meta.PCorpus} \alias{meta.SimpleCorpus} \alias{meta.VCorpus} \alias{meta<-.PCorpus} \alias{meta<-.SimpleCorpus} \alias{meta<-.VCorpus} \alias{meta.PlainTextDocument} \alias{meta<-.PlainTextDocument} \alias{meta.XMLTextDocument} \alias{meta<-.XMLTextDocument} \title{Metadata Management} \description{ Accessing and modifying metadata of text documents and corpora. } \usage{ \method{meta}{PCorpus}(x, tag = NULL, type = c("indexed", "corpus", "local"), \dots) \method{meta}{PCorpus}(x, tag, type = c("indexed", "corpus", "local"), \dots) <- value \method{meta}{SimpleCorpus}(x, tag = NULL, type = c("indexed", "corpus"), \dots) \method{meta}{SimpleCorpus}(x, tag, type = c("indexed", "corpus"), \dots) <- value \method{meta}{VCorpus}(x, tag = NULL, type = c("indexed", "corpus", "local"), \dots) \method{meta}{VCorpus}(x, tag, type = c("indexed", "corpus", "local"), \dots) <- value \method{meta}{PlainTextDocument}(x, tag = NULL, \dots) \method{meta}{PlainTextDocument}(x, tag = NULL, \dots) <- value \method{meta}{XMLTextDocument}(x, tag = NULL, \dots) \method{meta}{XMLTextDocument}(x, tag = NULL, \dots) <- value DublinCore(x, tag = NULL) DublinCore(x, tag) <- value } \arguments{ \item{x}{For \code{DublinCore} a \code{\link{TextDocument}}, and for \code{meta} a \code{\link{TextDocument}} or a \code{\link{Corpus}}.} \item{tag}{a character giving the name of a metadatum. No tag corresponds to all available metadata.} \item{type}{a character specifying the kind of corpus metadata (see \bold{Details}).} \item{\dots}{Not used.} \item{value}{replacement value.} } \details{ A corpus has two types of metadata. \emph{Corpus metadata} (\code{"corpus"}) contains corpus specific metadata in form of tag-value pairs. \emph{Document level metadata} (\code{"indexed"}) contains document specific metadata but is stored in the corpus as a data frame. Document level metadata is typically used for semantic reasons (e.g., classifications of documents form an own entity due to some high-level information like the range of possible values) or for performance reasons (single access instead of extracting metadata of each document). The latter can be seen as a from of indexing, hence the name \code{"indexed"}. \emph{Document metadata} (\code{"local"}) are tag-value pairs directly stored locally at the individual documents. \code{DublinCore} is a convenience wrapper to access and modify the metadata of a text document using the Simple Dublin Core schema (supporting the 15 metadata elements from the Dublin Core Metadata Element Set \url{http://dublincore.org/documents/dces/}). } \seealso{ \code{\link[NLP]{meta}} for metadata in package \pkg{NLP}. } \references{ Dublin Core Metadata Initiative. \url{http://dublincore.org/} } \examples{ data("crude") meta(crude[[1]]) DublinCore(crude[[1]]) meta(crude[[1]], tag = "topics") meta(crude[[1]], tag = "comment") <- "A short comment." meta(crude[[1]], tag = "topics") <- NULL DublinCore(crude[[1]], tag = "creator") <- "Ano Nymous" DublinCore(crude[[1]], tag = "format") <- "XML" DublinCore(crude[[1]]) meta(crude[[1]]) meta(crude) meta(crude, type = "corpus") meta(crude, "labels") <- 21:40 meta(crude) } tm/man/tm_map.Rd0000644000175100001440000000405013034203013013213 0ustar hornikusers\name{tm_map} \alias{tm_map} \alias{tm_map.VCorpus} \alias{tm_map.SimpleCorpus} \alias{tm_map.PCorpus} \title{Transformations on Corpora} \description{ Interface to apply transformation functions (also denoted as mappings) to corpora. } \usage{ \method{tm_map}{PCorpus}(x, FUN, \dots) \method{tm_map}{SimpleCorpus}(x, FUN, \dots) \method{tm_map}{VCorpus}(x, FUN, \dots, lazy = FALSE) } \arguments{ \item{x}{A corpus.} \item{FUN}{a transformation function taking a text document (a character vector when \code{x} is a \code{SimpleCorpus}) as input and returning a text document (a character vector of the same length as the input vector for \code{SimpleCorpus}). The function \code{\link{content_transformer}} can be used to create a wrapper to get and set the content of text documents.} \item{\dots}{arguments to \code{FUN}.} \item{lazy}{a logical. Lazy mappings are mappings which are delayed until the content is accessed. It is useful for large corpora if only few documents will be accessed. In such a case it avoids the computationally expensive application of the mapping to all elements in the corpus.} } \value{ A corpus with \code{FUN} applied to each document in \code{x}. In case of lazy mappings only internal flags are set. Access of individual documents triggers the execution of the corresponding transformation function. } \seealso{ \code{\link{getTransformations}} for available transformations. } \note{ Lazy transformations change \R's standard evaluation semantics. } \examples{ data("crude") ## Document access triggers the stemming function ## (i.e., all other documents are not stemmed yet) tm_map(crude, stemDocument, lazy = TRUE)[[1]] ## Use wrapper to apply character processing function tm_map(crude, content_transformer(tolower)) ## Generate a custom transformation function which takes the heading as new content headings <- function(x) PlainTextDocument(meta(x, "heading"), id = meta(x, "id"), language = meta(x, "language")) inspect(tm_map(crude, headings)) } tm/man/removeWords.Rd0000644000175100001440000000144712327511431014273 0ustar hornikusers\name{removeWords} \alias{removeWords} \alias{removeWords.character} \alias{removeWords.PlainTextDocument} \title{Remove Words from a Text Document} \description{ Remove words from a text document. } \usage{ \method{removeWords}{character}(x, words) \method{removeWords}{PlainTextDocument}(x, \dots) } \arguments{ \item{x}{A character or text document.} \item{words}{A character vector giving the words to be removed.} \item{\dots}{passed over argument \code{words}.} } \value{ The character or text document without the specified words. } \seealso{ \code{\link{getTransformations}} to list available transformation (mapping) functions. \code{\link[tau]{remove_stopwords}} provided by package \pkg{tau}. } \examples{ data("crude") crude[[1]] removeWords(crude[[1]], stopwords("english")) } tm/man/Docs.Rd0000644000175100001440000000131212324523350012636 0ustar hornikusers\name{Docs} \alias{Docs} \alias{nDocs} \alias{nTerms} \alias{Terms} \title{Access Document IDs and Terms} \description{ Accessing document IDs, terms, and their number of a term-document matrix or document-term matrix. } \usage{ Docs(x) nDocs(x) nTerms(x) Terms(x) } \arguments{ \item{x}{Either a \code{\link{TermDocumentMatrix}} or \code{\link{DocumentTermMatrix}}.} } \value{ For \code{Docs} and \code{Terms}, a character vector with document IDs and terms, respectively. For \code{nDocs} and \code{nTerms}, an integer with the number of document IDs and terms, respectively. } \examples{ data("crude") tdm <- TermDocumentMatrix(crude)[1:10,1:20] Docs(tdm) nDocs(tdm) nTerms(tdm) Terms(tdm) } tm/man/readXML.Rd0000644000175100001440000000517413155253051013255 0ustar hornikusers\name{readXML} \alias{readXML} \title{Read In an XML Document} \description{ Return a function which reads in an \acronym{XML} document. The structure of the \acronym{XML} document is described with a specification. } \usage{ readXML(spec, doc) } \arguments{ \item{spec}{A named list of lists each containing two components. The constructed reader will map each list entry to the content or metadatum of the text document as specified by the named list entry. Valid names include \code{content} to access the document's content, and character strings which are mapped to metadata entries. Each list entry must consist of two components: the first must be a string describing the type of the second argument, and the second is the specification entry. Valid combinations are: \describe{ \item{\code{type = "node", spec = "XPathExpression"}}{The XPath (1.0) expression \code{spec} extracts information from an \acronym{XML} node.} \item{\code{type = "function", spec = function(doc) \dots}}{The function \code{spec} is called, passing over the \acronym{XML} document (as delivered by \code{\link[xml2]{read_xml}} from package \pkg{xml2}) as first argument.} \item{\code{type = "unevaluated", spec = "String"}}{The character vector \code{spec} is returned without modification.} } } \item{doc}{An (empty) document of some subclass of \code{TextDocument}.} } \details{ Formally this function is a function generator, i.e., it returns a function (which reads in a text document) with a well-defined signature, but can access passed over arguments (e.g., the specification) via lexical scoping. } \value{ A function with the following formals: \describe{ \item{\code{elem}}{a named list with the component \code{content} which must hold the document to be read in.} \item{\code{language}}{a string giving the language.} \item{\code{id}}{a character giving a unique identifier for the created text document.} } The function returns \code{doc} augmented by the parsed information as described by \code{spec} out of the \acronym{XML} file in \code{elem$content}. The arguments \code{language} and \code{id} are used as fallback: \code{language} if no corresponding metadata entry is found in \code{elem$content}, and \code{id} if no corresponding metadata entry is found in \code{elem$content} and if \code{elem$uri} is null. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. Vignette 'Extensions: How to Handle Custom File Formats', and \code{\link{XMLSource}}. } tm/man/Corpus.Rd0000644000175100001440000000335412747047144013244 0ustar hornikusers\name{Corpus} \alias{Corpus} \title{Corpora} \description{ Representing and computing on corpora. } \details{ \emph{Corpora} are collections of documents containing (natural language) text. In packages which employ the infrastructure provided by package \pkg{tm}, such corpora are represented via the virtual S3 class \code{Corpus}: such packages then provide S3 corpus classes extending the virtual base class (such as \code{\link{VCorpus}} provided by package \pkg{tm} itself). All extension classes must provide accessors to extract subsets (\code{\link{[}}), individual documents (\code{\link{[[}}), and metadata (\code{\link{meta}}). The function \code{\link{length}} must return the number of documents, and \code{\link{as.list}} must construct a list holding the documents. A corpus can have two types of metadata (accessible via \code{\link{meta}}). \emph{Corpus metadata} contains corpus specific metadata in form of tag-value pairs. \emph{Document level metadata} contains document specific metadata but is stored in the corpus as a data frame. Document level metadata is typically used for semantic reasons (e.g., classifications of documents form an own entity due to some high-level information like the range of possible values) or for performance reasons (single access instead of extracting metadata of each document). The function \code{Corpus} is a convenience alias to \code{SimpleCorpus} or \code{VCorpus}, depending on the arguments provided. } \seealso{ \code{\link{SimpleCorpus}}, \code{\link{VCorpus}}, and \code{\link{PCorpus}} for the corpora classes provided by package \pkg{tm}. \code{\link[tm.plugin.dc]{DCorpus}} for a distributed corpus class provided by package \pkg{tm.plugin.dc}. } tm/man/PCorpus.Rd0000644000175100001440000000357513177025160013361 0ustar hornikusers\name{PCorpus} \alias{PCorpus} \title{Permanent Corpora} \description{ Create permanent corpora. } \usage{ PCorpus(x, readerControl = list(reader = reader(x), language = "en"), dbControl = list(dbName = "", dbType = "DB1")) } \arguments{ \item{x}{A \code{\link{Source}} object.} \item{readerControl}{a named list of control parameters for reading in content from \code{x}. \describe{ \item{\code{reader}}{a function capable of reading in and processing the format delivered by \code{x}.} \item{\code{language}}{a character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}). The default language is assumed to be English (\code{"en"}).} } } \item{dbControl}{a named list of control parameters for the underlying database storage provided by package \pkg{filehash}. \describe{ \item{\code{dbName}}{a character giving the filename for the database.} \item{\code{dbType}}{a character giving the database format (see \code{\link[filehash]{filehashOption}} for possible database formats).} } } } \value{ An object inheriting from \code{PCorpus} and \code{Corpus}. } \details{ A \emph{permanent corpus} stores documents outside of \R in a database. Since multiple \code{PCorpus} \R objects with the same underlying database can exist simultaneously in memory, changes in one get propagated to all corresponding objects (in contrast to the default \R semantics). } \seealso{ \code{\link[tm]{Corpus}} for basic information on the corpus infrastructure employed by package \pkg{tm}. \code{\link{VCorpus}} provides an implementation with volatile storage semantics. } \examples{ txt <- system.file("texts", "txt", package = "tm") \dontrun{PCorpus(DirSource(txt), dbControl = list(dbName = "pcorpus.db", dbType = "DB1"))} } tm/man/readDOC.Rd0000644000175100001440000000346713101143235013216 0ustar hornikusers\name{readDOC} \alias{readDOC} \title{Read In a MS Word Document} \description{ Return a function which reads in a Microsoft Word document extracting its text. } \usage{ readDOC(engine = c("antiword", "executable"), AntiwordOptions = "") } \arguments{ \item{engine}{a character string for the preferred \acronym{DOC} extraction engine (see \bold{Details}).} \item{AntiwordOptions}{Options passed over to \command{antiword} executable.} } \details{ Formally this function is a function generator, i.e., it returns a function (which reads in a text document) with a well-defined signature, but can access passed over arguments (e.g., options to \command{antiword}) via lexical scoping. Available \acronym{DOC} extraction engines are as follows. \describe{ \item{\code{"antiword"}}{(default) Antiword utility as provided by the function \code{\link[antiword]{antiword}} in package \pkg{antiword}.} \item{\code{"executable"}}{command line \command{antiword} executable which must be installed and accessible on your system. This can convert documents from Microsoft Word version 2, 6, 7, 97, 2000, 2002 and 2003 to plain text, and is available from \url{http://www.winfield.demon.nl/}. The character vector \code{AntiwordOptions} is passed over to the executable.} } } \value{ A \code{function} with the following formals: \describe{ \item{\code{elem}}{a list with the named component \code{uri} which must hold a valid file name.} \item{\code{language}}{a string giving the language.} \item{\code{id}}{Not used.} } The function returns a \code{\link{PlainTextDocument}} representing the text and metadata extracted from \code{elem$uri}. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } tm/man/combine.Rd0000644000175100001440000000254112323504623013370 0ustar hornikusers\name{tm_combine} \alias{c.VCorpus} \alias{c.TextDocument} \alias{c.TermDocumentMatrix} \alias{c.term_frequency} \title{Combine Corpora, Documents, Term-Document Matrices, and Term Frequency Vectors} \description{ Combine several corpora into a single one, combine multiple documents into a corpus, combine multiple term-document matrices into a single one, or combine multiple term frequency vectors into a single term-document matrix. } \usage{ \method{c}{VCorpus}(\dots, recursive = FALSE) \method{c}{TextDocument}(\dots, recursive = FALSE) \method{c}{TermDocumentMatrix}(\dots, recursive = FALSE) \method{c}{term_frequency}(\dots, recursive = FALSE) } \arguments{ \item{\dots}{Corpora, text documents, term-document matrices, or term frequency vectors.} \item{recursive}{Not used.} } \seealso{ \code{\link{VCorpus}}, \code{\link{TextDocument}}, \code{\link{TermDocumentMatrix}}, and \code{\link{termFreq}}. } \examples{ data("acq") data("crude") meta(acq, "comment", type = "corpus") <- "Acquisitions" meta(crude, "comment", type = "corpus") <- "Crude oil" meta(acq, "acqLabels") <- 1:50 meta(acq, "jointLabels") <- 1:50 meta(crude, "crudeLabels") <- letters[1:20] meta(crude, "jointLabels") <- 1:20 c(acq, crude) meta(c(acq, crude), type = "corpus") meta(c(acq, crude)) c(acq[[30]], crude[[10]]) c(TermDocumentMatrix(acq), TermDocumentMatrix(crude)) } tm/man/getTokenizers.Rd0000644000175100001440000000052713150552514014614 0ustar hornikusers\name{getTokenizers} \alias{getTokenizers} \title{Tokenizers} \description{ Predefined tokenizers. } \usage{ getTokenizers() } \value{ A character vector with tokenizers provided by package \pkg{tm}. } \seealso{ \code{\link{Boost_tokenizer}}, \code{\link{MC_tokenizer}} and \code{\link{scan_tokenizer}}. } \examples{ getTokenizers() } tm/man/foreign.Rd0000644000175100001440000000353513065660374013423 0ustar hornikusers\name{foreign} \alias{read_dtm_Blei_et_al} \alias{read_dtm_MC} \title{Read Document-Term Matrices} \description{ Read document-term matrices stored in special file formats. } \usage{ read_dtm_Blei_et_al(file, vocab = NULL) read_dtm_MC(file, scalingtype = NULL) } \arguments{ \item{file}{a character string with the name of the file to read.} \item{vocab}{a character string with the name of a vocabulary file (giving the terms, one per line), or \code{NULL}.} \item{scalingtype}{a character string specifying the type of scaling to be used, or \code{NULL} (default), in which case the scaling will be inferred from the names of the files with non-zero entries found (see \bold{Details}).} } \details{ \code{read_dtm_Blei_et_al} reads the (List of Lists type sparse matrix) format employed by the Latent Dirichlet Allocation and Correlated Topic Model C codes by Blei et al (\url{http://www.cs.columbia.edu/~blei}). MC is a toolkit for creating vector models from text documents (see \url{http://www.cs.utexas.edu/users/dml/software/mc/}). It employs a variant of Compressed Column Storage (CCS) sparse matrix format, writing data into several files with suitable names: e.g., a file with \file{_dim} appended to the base file name stores the matrix dimensions. The non-zero entries are stored in a file the name of which indicates the scaling type used: e.g., \file{_tfx_nz} indicates scaling by term frequency (\samp{t}), inverse document frequency (\samp{f}) and no normalization (\samp{x}). See \file{README} in the MC sources for more information. \code{read_dtm_MC} reads such sparse matrix information with argument \code{file} giving the path with the base file name. } \value{ A \link[=DocumentTermMatrix]{document-term matrix}. } \seealso{ \code{\link[slam]{read_stm_MC}} in package \pkg{slam}. } \keyword{IO} tm/man/PlainTextDocument.Rd0000644000175100001440000000446612345630146015377 0ustar hornikusers\name{PlainTextDocument} \alias{PlainTextDocument} \title{Plain Text Documents} \description{ Create plain text documents. } \usage{ PlainTextDocument(x = character(0), author = character(0), datetimestamp = as.POSIXlt(Sys.time(), tz = "GMT"), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0), \dots, meta = NULL, class = NULL) } \arguments{ \item{x}{A character giving the plain text content.} \item{author}{a character or an object of class \code{\link{person}} giving the author names.} \item{datetimestamp}{an object of class \code{\link{POSIXt}} or a character string giving the creation date/time information. If a character string, exactly one of the \acronym{ISO} 8601 formats defined by \url{http://www.w3.org/TR/NOTE-datetime} should be used. See \code{\link[NLP]{parse_ISO_8601_datetime}} in package \pkg{NLP} for processing such date/time information. } \item{description}{a character giving a description.} \item{heading}{a character giving the title or a short heading.} \item{id}{a character giving a unique identifier.} \item{language}{a character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}).} \item{origin}{a character giving information on the source and origin.} \item{\dots}{user-defined document metadata tag-value pairs.} \item{meta}{a named list or \code{NULL} (default) giving all metadata. If set all other metadata arguments are ignored.} \item{class}{a character vector or \code{NULL} (default) giving additional classes to be used for the created plain text document.} } \value{ An object inheriting from \code{class}, \code{PlainTextDocument} and \code{\link{TextDocument}}. } \seealso{ \code{\link{TextDocument}} for basic information on the text document infrastructure employed by package \pkg{tm}. } \examples{ (ptd <- PlainTextDocument("A simple plain text document", heading = "Plain text document", id = basename(tempfile()), language = "en")) meta(ptd) } tm/man/VectorSource.Rd0000644000175100001440000000116512324377361014410 0ustar hornikusers\name{VectorSource} \alias{VectorSource} \title{Vector Source} \description{ Create a vector source. } \usage{ VectorSource(x) } \arguments{ \item{x}{A vector giving the texts.} } \details{ A \emph{vector source} interprets each element of the vector \code{x} as a document. } \value{ An object inheriting from \code{VectorSource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}. } \examples{ docs <- c("This is a text.", "This another one.") (vs <- VectorSource(docs)) inspect(VCorpus(vs)) } tm/man/readTagged.Rd0000644000175100001440000000373413177025262014015 0ustar hornikusers\name{readTagged} \alias{readTagged} \title{Read In a POS-Tagged Word Text Document} \description{ Return a function which reads in a text document containing POS-tagged words. } \usage{ readTagged(\dots) } \arguments{ \item{\dots}{Arguments passed to \code{\link[NLP]{TaggedTextDocument}}.} } \details{ Formally this function is a function generator, i.e., it returns a function (which reads in a text document) with a well-defined signature, but can access passed over arguments (\code{\dots}) via lexical scoping. } \value{ A \code{function} with the following formals: \describe{ \item{\code{elem}}{a named list with the component \code{content} which must hold the document to be read in or the component \code{uri} holding a connection object or a character string.} \item{\code{language}}{a string giving the language.} \item{\code{id}}{a character giving a unique identifier for the created text document.} } The function returns a \code{\link[NLP]{TaggedTextDocument}} representing the text and metadata extracted from \code{elem$content} or \code{elem$uri}. The argument \code{id} is used as fallback if \code{elem$uri} is null. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } \examples{ # See http://www.nltk.org/book/ch05.html or file ca01 in the Brown corpus x <- paste("The/at grand/jj jury/nn commented/vbd on/in a/at number/nn of/in", "other/ap topics/nns ,/, among/in them/ppo the/at Atlanta/np and/cc", "Fulton/np-tl County/nn-tl purchasing/vbg departments/nns which/wdt", "it/pps said/vbd ``/`` are/ber well/ql operated/vbn and/cc follow/vb", "generally/rb accepted/vbn practices/nns which/wdt inure/vb to/in the/at", "best/jjt interest/nn of/in both/abx governments/nns ''/'' ./.") vs <- VectorSource(x) elem <- getElem(stepNext(vs)) (doc <- readTagged()(elem, language = "en", id = "id1")) tagged_words(doc) } tm/man/matrix.Rd0000644000175100001440000000720713177025234013270 0ustar hornikusers\name{TermDocumentMatrix} \alias{TermDocumentMatrix} \alias{DocumentTermMatrix} \alias{as.TermDocumentMatrix} \alias{as.DocumentTermMatrix} \title{Term-Document Matrix} \description{ Constructs or coerces to a term-document matrix or a document-term matrix. } \usage{ TermDocumentMatrix(x, control = list()) DocumentTermMatrix(x, control = list()) as.TermDocumentMatrix(x, \dots) as.DocumentTermMatrix(x, \dots) } \arguments{ \item{x}{a corpus for the constructors and either a term-document matrix or a document-term matrix or a \link[slam:matrix]{simple triplet matrix} (package \pkg{slam}) or a \link[=termFreq]{term frequency vector} for the coercing functions.} \item{control}{a named list of control options. There are local options which are evaluated for each document and global options which are evaluated once for the constructed matrix. Available local options are documented in \code{\link{termFreq}} and are internally delegated to a \code{\link{termFreq}} call. This is different for a \code{\link{SimpleCorpus}}. In this case all options are processed in a fixed order in one pass to improve performance. It always uses the Boost (\url{http://www.boost.org}) Tokenizer (via \pkg{Rcpp}) and takes no custom functions as option arguments. Available global options are: \describe{ \item{\code{bounds}}{A list with a tag \code{global} whose value must be an integer vector of length 2. Terms that appear in less documents than the lower bound \code{bounds$global[1]} or in more documents than the upper bound \code{bounds$global[2]} are discarded. Defaults to \code{list(global = c(1, Inf))} (i.e., every term will be used).} \item{\code{weighting}}{A weighting function capable of handling a \code{TermDocumentMatrix}. It defaults to \code{weightTf} for term frequency weighting. Available weighting functions shipped with the \pkg{tm} package are \code{\link{weightTf}}, \code{\link{weightTfIdf}}, \code{\link{weightBin}}, and \code{\link{weightSMART}}.} }} \item{\dots}{the additional argument \code{weighting} (typically a \code{\link{WeightFunction}}) is allowed when coercing a simple triplet matrix to a term-document or document-term matrix.} } \value{ An object of class \code{TermDocumentMatrix} or class \code{DocumentTermMatrix} (both inheriting from a \link[slam:matrix]{simple triplet matrix} in package \pkg{slam}) containing a sparse term-document matrix or document-term matrix. The attribute \code{weighting} contains the weighting applied to the matrix. } \seealso{ \code{\link{termFreq}} for available local control options. } \examples{ data("crude") tdm <- TermDocumentMatrix(crude, control = list(removePunctuation = TRUE, stopwords = TRUE)) dtm <- DocumentTermMatrix(crude, control = list(weighting = function(x) weightTfIdf(x, normalize = FALSE), stopwords = TRUE)) inspect(tdm[202:205, 1:5]) inspect(tdm[c("price", "prices", "texas"), c("127", "144", "191", "194")]) inspect(dtm[1:5, 273:276]) s <- SimpleCorpus(VectorSource(unlist(lapply(crude, as.character)))) m <- TermDocumentMatrix(s, control = list(removeNumbers = TRUE, stopwords = TRUE, stemming = TRUE)) inspect(m[c("price", "texa"), c("127", "144", "191", "194")]) } tm/man/tm_filter.Rd0000644000175100001440000000214213007636267013750 0ustar hornikusers\name{tm_filter} \alias{tm_filter} \alias{tm_filter.PCorpus} \alias{tm_filter.SimpleCorpus} \alias{tm_filter.VCorpus} \alias{tm_index} \alias{tm_index.PCorpus} \alias{tm_index.SimpleCorpus} \alias{tm_index.VCorpus} \title{Filter and Index Functions on Corpora} \description{ Interface to apply filter and index functions to corpora. } \usage{ \method{tm_filter}{PCorpus}(x, FUN, \dots) \method{tm_filter}{SimpleCorpus}(x, FUN, \dots) \method{tm_filter}{VCorpus}(x, FUN, \dots) \method{tm_index}{PCorpus}(x, FUN, \dots) \method{tm_index}{SimpleCorpus}(x, FUN, \dots) \method{tm_index}{VCorpus}(x, FUN, \dots) } \arguments{ \item{x}{A corpus.} \item{FUN}{a filter function taking a text document or a string (if \code{x} is a \code{SimpleCorpus}) as input and returning a logical value.} \item{\dots}{arguments to \code{FUN}.} } \value{ \code{tm_filter} returns a corpus containing documents where \code{FUN} matches, whereas \code{tm_index} only returns the corresponding indices. } \examples{ data("crude") # Full-text search tm_filter(crude, FUN = function(x) any(grep("co[m]?pany", content(x)))) } tm/man/SimpleCorpus.Rd0000644000175100001440000000436313126626477014423 0ustar hornikusers\name{SimpleCorpus} \alias{SimpleCorpus} \title{Simple Corpora} \description{ Create simple corpora. } \usage{ SimpleCorpus(x, control = list(language = "en")) } \arguments{ \item{x}{a \code{\link{DataframeSource}}, \code{\link{DirSource}} or \code{\link{VectorSource}}.} \item{control}{a named list of control parameters. \describe{ \item{\code{language}}{a character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}). The default language is assumed to be English (\code{"en"}).} } } } \value{ An object inheriting from \code{SimpleCorpus} and \code{Corpus}. } \details{ A \emph{simple corpus} is fully kept in memory. Compared to a \code{VCorpus}, it is optimized for the most common usage scenario: importing plain texts from files in a directory or directly from a vector in \R, preprocessing and transforming the texts, and finally exporting them to a term-document matrix. It adheres to the \code{\link{Corpus}} \acronym{API}. However, it takes internally various shortcuts to boost performance and minimize memory pressure; consequently it operates only under the following contraints: \itemize{ \item{only \code{DataframeSource}, \code{DirSource} and \code{VectorSource} are supported,} \item{no custom readers, i.e., each document is read in and stored as plain text (as a string, i.e., a character vector of length one),} \item{transformations applied via \code{\link{tm_map}} must be able to process character vectors and return character vectors (of the same length),} \item{no lazy transformations in \code{\link{tm_map}},} \item{no meta data for individual documents (i.e., no \code{"local"} in \code{\link{meta}}).} } } \seealso{ \code{\link{Corpus}} for basic information on the corpus infrastructure employed by package \pkg{tm}. \code{\link{VCorpus}} provides an implementation with volatile storage semantics, and \code{\link{PCorpus}} provides an implementation with permanent storage semantics. } \examples{ txt <- system.file("texts", "txt", package = "tm") (ovid <- SimpleCorpus(DirSource(txt, encoding = "UTF-8"), control = list(language = "lat"))) } tm/man/stripWhitespace.Rd0000644000175100001440000000122712324523350015131 0ustar hornikusers\name{stripWhitespace} \alias{stripWhitespace} \alias{stripWhitespace.PlainTextDocument} \title{Strip Whitespace from a Text Document} \description{ Strip extra whitespace from a text document. Multiple whitespace characters are collapsed to a single blank. } \usage{ \method{stripWhitespace}{PlainTextDocument}(x, \dots) } \arguments{ \item{x}{A text document.} \item{\dots}{Not used.} } \value{ The text document with multiple whitespace characters collapsed to a single blank. } \seealso{ \code{\link{getTransformations}} to list available transformation (mapping) functions. } \examples{ data("crude") crude[[1]] stripWhitespace(crude[[1]]) } tm/man/XMLSource.Rd0000644000175100001440000000161013177022735013600 0ustar hornikusers\name{XMLSource} \alias{XMLSource} \title{XML Source} \description{ Create an \acronym{XML} source. } \usage{ XMLSource(x, parser = xml_contents, reader) } \arguments{ \item{x}{a character giving a uniform resource identifier.} \item{parser}{a function accepting an \acronym{XML} document (as delivered by \code{\link[xml2]{read_xml}} in package \pkg{xml2}) as input and returning \acronym{XML} elements/nodes.} \item{reader}{a function capable of turning \acronym{XML} elements/nodes as returned by \code{parser} into a subclass of \code{\link{TextDocument}}.} } \value{ An object inheriting from \code{XMLSource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}. Vignette 'Extensions: How to Handle Custom File Formats', and \code{\link{readXML}}. } tm/man/tm_reduce.Rd0000644000175100001440000000163712355322342013731 0ustar hornikusers\name{tm_reduce} \alias{tm_reduce} \title{Combine Transformations} \description{ Fold multiple transformations (mappings) into a single one. } \usage{ tm_reduce(x, tmFuns, \dots) } \arguments{ \item{x}{A corpus.} \item{tmFuns}{A list of \pkg{tm} transformations.} \item{\dots}{Arguments to the individual transformations.} } \value{ A single \pkg{tm} transformation function obtained by folding \code{tmFuns} from right to left (via \code{Reduce(\dots, right = TRUE)}). } \seealso{ \code{Reduce} for \R's internal folding/accumulation mechanism, and \code{\link{getTransformations}} to list available transformation (mapping) functions. } \examples{ data(crude) crude[[1]] skipWords <- function(x) removeWords(x, c("it", "the")) funs <- list(stripWhitespace, skipWords, removePunctuation, content_transformer(tolower)) tm_map(crude, FUN = tm_reduce, tmFuns = funs)[[1]] } tm/man/readReut21578XML.Rd0000644000175100001440000000255413041567324014530 0ustar hornikusers\name{readReut21578XML} \alias{readReut21578XML} \alias{readReut21578XMLasPlain} \title{Read In a Reuters-21578 XML Document} \description{ Read in a Reuters-21578 \acronym{XML} document. } \usage{ readReut21578XML(elem, language, id) readReut21578XMLasPlain(elem, language, id) } \arguments{ \item{elem}{a named list with the component \code{content} which must hold the document to be read in.} \item{language}{a string giving the language.} \item{id}{Not used.} } \value{ An \code{\link{XMLTextDocument}} for \code{readReut21578XML}, or a \code{\link{PlainTextDocument}} for \code{readReut21578XMLasPlain}, representing the text and metadata extracted from \code{elem$content}. } \references{ Emms, Martin and Luz, Saturnino (2007). Machine Learning for Natural Language Processing. \emph{European Summer School of Logic, Language and Information, course reader}. \url{https://www.scss.tcd.ie/~luzs/publications/mlfornlp.pdf} Lewis, David (1997) \emph{Reuters-21578 Text Categorization Collection Distribution 1.0}. \url{http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html} Luz, Saturnino \emph{\acronym{XML}-encoded version of Reuters-21578}. \url{https://www.scss.tcd.ie/~luzs/t/cs4ll4/sw/reuters21578-xml/} } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } tm/man/URISource.Rd0000644000175100001440000000300212326753543013577 0ustar hornikusers\name{URISource} \alias{URISource} \title{Uniform Resource Identifier Source} \description{ Create a uniform resource identifier source. } \usage{ URISource(x, encoding = "", mode = "text") } \arguments{ \item{x}{A character vector of uniform resource identifiers (\acronym{URI}s.} \item{encoding}{A character string describing the current encoding. It is passed to \code{\link{iconv}} to convert the input to UTF-8.} \item{mode}{a character string specifying if and how \acronym{URI}s should be read in. Available modes are: \describe{ \item{\code{""}}{No read. In this case \code{\link{getElem}} and \code{\link{pGetElem}} only deliver \acronym{URI}s.} \item{\code{"binary"}}{\acronym{URI}s are read in binary raw mode (via \code{\link{readBin}}).} \item{\code{"text"}}{\acronym{URI}s are read as text (via \code{\link{readLines}}).} } } } \details{ A \emph{uniform resource identifier source} interprets each \acronym{URI} as a document. } \value{ An object inheriting from \code{URISource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}. \code{\link{Encoding}} and \code{\link{iconv}} on encodings. } \examples{ loremipsum <- system.file("texts", "loremipsum.txt", package = "tm") ovid <- system.file("texts", "txt", "ovid_1.txt", package = "tm") us <- URISource(sprintf("file://\%s", c(loremipsum, ovid))) inspect(VCorpus(us)) } tm/man/hpc.Rd0000644000175100001440000000665213037140514012533 0ustar hornikusers\name{hpc} \alias{tm_parLapply} \alias{tm_parLapply_engine} \title{Parallelized \sQuote{lapply}} \description{ Parallelize applying a function over a list or vector according to the registered parallelization engine. } \usage{ tm_parLapply(X, FUN, ...) tm_parLapply_engine(new) } \arguments{ \item{X}{A vector (atomic or list), or other objects suitable for the engine in use.} \item{FUN}{the function to be applied to each element of \code{X}.} \item{...}{optional arguments to \code{FUN}.} \item{new}{an object inheriting from class \code{cluster} as created by \code{\link[parallel:makeCluster]{makeCluster}()} from package \pkg{parallel}, or a function with formals \code{X}, \code{FUN} and \code{...}, or \code{NULL} corresponding to the default of using no parallelization engine.} } \details{ Parallelization can be employed to speed up some of the embarrassingly parallel computations performed in package \pkg{tm}, specifically \code{\link{tm_index}()}, \code{\link{tm_map}()} on a non-lazy-mapped \code{\link{VCorpus}}, and \code{\link{TermDocumentMatrix}()} on a \code{\link{VCorpus}} or \code{\link{PCorpus}}. Functions \code{tm_parLapply()} and \code{tm_parLapply_engine()} can be used to customize parallelization according to the available resources. \code{tm_parLapply_engine()} is used for getting (with no arguments) or setting (with argument \code{new}) the parallelization engine employed (see below for examples). If an engine is set to an object inheriting from class \code{cluster}, \code{tm_parLapply()} calls \code{\link[parallel:parLapply]{parLapply}()} with this cluster and the given arguments. If set to a function, \code{tm_parLapply()} calls the function with the given arguments. Otherwise, it simply calls \code{\link{lapply}()}. Hence, to achieve parallelization via \code{\link[parallel:parLapply]{parLapply}()} and a default cluster registered via \code{\link[parallel:setDefaultCluster]{setDefaultCluster}()}, one can use \preformatted{ tm_parLapply_engine(function(X, FUN, ...) parallel::parLapply(NULL, X, FUN, ...))} or re-register the cluster, say \code{cl}, using \preformatted{ tm_parLapply_engine(cl)} (note that there is no mechanism for programmatically getting the registered default cluster). Using \preformatted{ tm_parLapply_engine(function(X, FUN, ...) parallel::parLapplyLB(NULL, X, FUN, ...))} or \preformatted{ tm_parLapply_engine(function(X, FUN, ...) parallel::parLapplyLB(cl, X, FUN, ...))} gives load-balancing parallelization with the registered default or given cluster, respectively. To achieve parallelization via forking (on Unix-alike platforms), one can use the above with clusters created by \code{\link[parallel:makeForkCluster]{makeForkCluster}()}, or use \preformatted{ tm_parLapply_engine(parallel::mclapply)} or \preformatted{ tm_parLapply_engine(function(X, FUN, ...) parallel::mclapply(X, FUN, ..., mc.cores = n))} to use \code{\link[parallel:mclapply]{mclapply}()} with the default or given number \code{n} of cores. } \value{ A list the length of \code{X}, with the result of applying \code{FUN} together with the \code{...} arguments to each element of \code{X}. } \seealso{ \code{\link[parallel:makeCluster]{makeCluster}()}, \code{\link[parallel:parLapply]{parLapply}()}, \code{\link[parallel:parLapplyLB]{parLapplyLB}()}, and \code{\link[parallel:mclapply]{mclapply}()}. } tm/man/readDataframe.Rd0000644000175100001440000000206413110235235014467 0ustar hornikusers\name{readDataframe} \alias{readDataframe} \title{Read In a Text Document from a Data Frame} \description{ Read in a text document from a row in a data frame. } \usage{ readDataframe(elem, language, id) } \arguments{ \item{elem}{a named list with the component \code{content} which must hold a data frame with rows as the documents to be read in. The names of the columns holding the text content and the document identifier must be \code{"text"} and \code{"doc_id"}, respectively.} \item{language}{a string giving the language.} \item{id}{Not used.} } \value{ A \code{\link{PlainTextDocument}} representing \code{elem$content}. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } \examples{ docs <- data.frame(doc_id = c("doc_1", "doc_2"), text = c("This is a text.", "This another one."), stringsAsFactors = FALSE) ds <- DataframeSource(docs) elem <- getElem(stepNext(ds)) result <- readDataframe(elem, "en", NULL) inspect(result) meta(result) } tm/man/removeSparseTerms.Rd0000644000175100001440000000147312262761010015442 0ustar hornikusers\name{removeSparseTerms} \alias{removeSparseTerms} \title{Remove Sparse Terms from a Term-Document Matrix} \description{ Remove sparse terms from a document-term or term-document matrix. } \usage{ removeSparseTerms(x, sparse) } \arguments{ \item{x}{A \code{\link{DocumentTermMatrix}} or a \code{\link{TermDocumentMatrix}}.} \item{sparse}{A numeric for the maximal allowed sparsity in the range from bigger zero to smaller one.} } \value{ A term-document matrix where those terms from \code{x} are removed which have at least a \code{sparse} percentage of empty (i.e., terms occurring 0 times in a document) elements. I.e., the resulting matrix contains only terms with a sparse factor of less than \code{sparse}. } \examples{ data("crude") tdm <- TermDocumentMatrix(crude) removeSparseTerms(tdm, 0.2) } tm/man/weightTfIdf.Rd0000644000175100001440000000267613025174645014200 0ustar hornikusers\name{weightTfIdf} \alias{weightTfIdf} \title{Weight by Term Frequency - Inverse Document Frequency} \description{ Weight a term-document matrix by term frequency - inverse document frequency. } \usage{ weightTfIdf(m, normalize = TRUE) } \arguments{ \item{m}{A \code{\link{TermDocumentMatrix}} in term frequency format.} \item{normalize}{A Boolean value indicating whether the term frequencies should be normalized.} } \details{ Formally this function is of class \code{WeightingFunction} with the additional attributes \code{name} and \code{acronym}. \emph{Term frequency} \eqn{\mathit{tf}_{i,j}} counts the number of occurrences \eqn{n_{i,j}} of a term \eqn{t_i} in a document \eqn{d_j}. In the case of normalization, the term frequency \eqn{\mathit{tf}_{i,j}} is divided by \eqn{\sum_k n_{k,j}}. \emph{Inverse document frequency} for a term \eqn{t_i} is defined as \deqn{\mathit{idf}_i = \log_2 \frac{|D|}{|\{d \mid t_i \in d\}|}} where \eqn{|D|} denotes the total number of documents and where \eqn{|\{d \mid t_i \in d\}|} is the number of documents where the term \eqn{t_i} appears. \emph{Term frequency - inverse document frequency} is now defined as \eqn{\mathit{tf}_{i,j} \cdot \mathit{idf}_i}. } \value{ The weighted matrix. } \references{ Gerard Salton and Christopher Buckley (1988). Term-weighting approaches in automatic text retrieval. \emph{Information Processing and Management}, \bold{24}/5, 513--523. } tm/man/Zipf_n_Heaps.Rd0000644000175100001440000000357612324523350014331 0ustar hornikusers\name{Zipf_n_Heaps} \alias{Zipf_plot} \alias{Heaps_plot} \title{Explore Corpus Term Frequency Characteristics} \description{ Explore Zipf's law and Heaps' law, two empirical laws in linguistics describing commonly observed characteristics of term frequency distributions in corpora. } \usage{ Zipf_plot(x, type = "l", \dots) Heaps_plot(x, type = "l", \dots) } \arguments{ \item{x}{a document-term matrix or term-document matrix with unweighted term frequencies.} \item{type}{a character string indicating the type of plot to be drawn, see \code{\link{plot}}.} \item{\dots}{further graphical parameters to be used for plotting.} } \details{ Zipf's law (e.g., \url{http://en.wikipedia.org/wiki/Zipf\%27s_law}) states that given some corpus of natural language utterances, the frequency of any word is inversely proportional to its rank in the frequency table, or, more generally, that the pmf of the term frequencies is of the form \eqn{c k^{-\beta}}, where \eqn{k} is the rank of the term (taken from the most to the least frequent one). We can conveniently explore the degree to which the law holds by plotting the logarithm of the frequency against the logarithm of the rank, and inspecting the goodness of fit of a linear model. Heaps' law (e.g., \url{http://en.wikipedia.org/wiki/Heaps\%27_law}) states that the vocabulary size \eqn{V} (i.e., the number of different terms employed) grows polynomially with the text size \eqn{T} (the total number of terms in the texts), so that \eqn{V = c T^\beta}. We can conveniently explore the degree to which the law holds by plotting \eqn{\log(V)} against \eqn{\log(T)}, and inspecting the goodness of fit of a linear model. } \value{ The coefficients of the fitted linear model. As a side effect, the corresponding plot is produced. } \examples{ data("acq") m <- DocumentTermMatrix(acq) Zipf_plot(m) Heaps_plot(m) } tm/man/crude.Rd0000644000175100001440000000213413041567324013061 0ustar hornikusers\name{crude} \docType{data} \alias{crude} \title{20 Exemplary News Articles from the Reuters-21578 Data Set of Topic crude} \description{ This data set holds 20 news articles with additional meta information from the Reuters-21578 data set. All documents belong to the topic \code{crude} dealing with crude oil. } \usage{data("crude")} \format{A \code{\link{VCorpus}} of 20 text documents.} \source{Reuters-21578 Text Categorization Collection Distribution 1.0 (\acronym{XML} format).} \references{ Emms, Martin and Luz, Saturnino (2007). Machine Learning for Natural Language Processing. \emph{European Summer School of Logic, Language and Information, course reader}. \url{https://www.scss.tcd.ie/~luzs/publications/mlfornlp.pdf} Lewis, David (1997) \emph{Reuters-21578 Text Categorization Collection Distribution 1.0}. \url{http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html} Luz, Saturnino \emph{\acronym{XML}-encoded version of Reuters-21578}. \url{https://www.scss.tcd.ie/~luzs/t/cs4ll4/sw/reuters21578-xml/} } \examples{ data("crude") crude } \keyword{datasets} tm/man/getTransformations.Rd0000644000175100001440000000107212335713251015645 0ustar hornikusers\name{getTransformations} \alias{getTransformations} \title{Transformations} \description{ Predefined transformations (mappings) which can be used with \code{\link{tm_map}}. } \usage{ getTransformations() } \value{ A character vector with transformations provided by package \pkg{tm}. } \seealso{ \code{\link{removeNumbers}}, \code{\link{removePunctuation}}, \code{\link{removeWords}}, \code{\link{stemDocument}}, and \code{\link{stripWhitespace}}. \code{\link{content_transformer}} to create custom transformations. } \examples{ getTransformations() } tm/man/content_transformer.Rd0000644000175100001440000000141212326160010016033 0ustar hornikusers\name{content_transformer} \alias{content_transformer} \title{Content Transformers} \description{ Create content transformers, i.e., functions which modify the content of an \R object. } \usage{ content_transformer(FUN) } \arguments{ \item{FUN}{a function.} } \value{ A function with two arguments: \describe{ \item{\code{x}}{an \R object with implemented content getter (\code{\link[NLP]{content}}) and setter (\code{\link{content<-}}) functions.} \item{\code{\dots}}{arguments passed over to \code{FUN}.} } } \seealso{ \code{\link{tm_map}} for an interface to apply transformations to corpora. } \examples{ data("crude") crude[[1]] (f <- content_transformer(function(x, pattern) gsub(pattern, "", x))) tm_map(crude, f, "[[:digit:]]+")[[1]] } tm/man/readPlain.Rd0000644000175100001440000000164612342614656013671 0ustar hornikusers\name{readPlain} \alias{readPlain} \title{Read In a Text Document} \description{ Read in a text document without knowledge about its internal structure and possible available metadata. } \usage{ readPlain(elem, language, id) } \arguments{ \item{elem}{a named list with the component \code{content} which must hold the document to be read in.} \item{language}{a string giving the language.} \item{id}{a character giving a unique identifier for the created text document.} } \value{ A \code{\link{PlainTextDocument}} representing \code{elem$content}. The argument \code{id} is used as fallback if \code{elem$uri} is null. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } \examples{ docs <- c("This is a text.", "This another one.") vs <- VectorSource(docs) elem <- getElem(stepNext(vs)) (result <- readPlain(elem, "en", "id1")) meta(result) } tm/man/readPDF.Rd0000644000175100001440000000773513101143235013224 0ustar hornikusers\name{readPDF} \alias{readPDF} \title{Read In a PDF Document} \description{ Return a function which reads in a portable document format (\acronym{PDF}) document extracting both its text and its metadata. } \usage{ readPDF(engine = c("pdftools", "xpdf", "Rpoppler", "ghostscript", "Rcampdf", "custom"), control = list(info = NULL, text = NULL)) } \arguments{ \item{engine}{a character string for the preferred \acronym{PDF} extraction engine (see \bold{Details}).} \item{control}{a list of control options for the engine with the named components \code{info} and \code{text} (see \bold{Details}).} } \details{ Formally this function is a function generator, i.e., it returns a function (which reads in a text document) with a well-defined signature, but can access passed over arguments (e.g., the preferred \acronym{PDF} extraction \code{engine} and \code{control} options) via lexical scoping. Available \acronym{PDF} extraction engines are as follows. \describe{ \item{\code{"pdftools"}}{(default) Poppler \acronym{PDF} rendering library as provided by the functions \code{\link[pdftools]{pdf_info}} and \code{\link[pdftools]{pdf_text}} in package \pkg{pdftools}.} \item{\code{"xpdf"}}{command line \command{pdfinfo} and \command{pdftotext} executables which must be installed and accessible on your system. Suitable utilities are provided by the Xpdf (\url{http://www.foolabs.com/xpdf/}) \acronym{PDF} viewer or by the Poppler (\url{http://poppler.freedesktop.org/}) \acronym{PDF} rendering library.} \item{\code{"Rpoppler"}}{Poppler \acronym{PDF} rendering library as provided by the functions \code{\link[Rpoppler]{PDF_info}} and \code{\link[Rpoppler]{PDF_text}} in package \pkg{Rpoppler}.} \item{\code{"ghostscript"}}{Ghostscript using \file{pdf_info.ps} and \file{ps2ascii.ps}.} \item{\code{"Rcampdf"}}{Perl CAM::PDF \acronym{PDF} manipulation library as provided by the functions \code{pdf_info} and \code{pdf_text} in package \pkg{Rcampdf}, available from the repository at \url{http://datacube.wu.ac.at}.} \item{\code{"custom"}}{custom user-provided extraction engine.} } Control parameters for engine \code{"xpdf"} are as follows. \describe{ \item{\code{info}}{a character vector specifying options passed over to the \command{pdfinfo} executable.} \item{\code{text}}{a character vector specifying options passed over to the \command{pdftotext} executable.} } Control parameters for engine \code{"custom"} are as follows. \describe{ \item{\code{info}}{a function extracting metadata from a \acronym{PDF}. The function must accept a file path as first argument and must return a named list with the components \code{Author} (as character string), \code{CreationDate} (of class \code{POSIXlt}), \code{Subject} (as character string), \code{Title} (as character string), and \code{Creator} (as character string).} \item{\code{text}}{a function extracting content from a \acronym{PDF}. The function must accept a file path as first argument and must return a character vector.} } } \value{ A \code{function} with the following formals: \describe{ \item{\code{elem}}{a named list with the component \code{uri} which must hold a valid file name.} \item{\code{language}}{a string giving the language.} \item{\code{id}}{Not used.} } The function returns a \code{\link{PlainTextDocument}} representing the text and metadata extracted from \code{elem$uri}. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } \examples{ uri <- sprintf("file://\%s", system.file(file.path("doc", "tm.pdf"), package = "tm")) pdf <- readPDF()(elem = list(uri = uri), language = "en", id = "id1") cat(content(pdf)[1]) VCorpus(URISource(uri, mode = ""), readerControl = list(reader = readPDF(engine = "ghostscript"))) } \keyword{file} tm/man/tm_term_score.Rd0000644000175100001440000000322113023472150014607 0ustar hornikusers\name{tm_term_score} \alias{tm_term_score} \alias{tm_term_score.DocumentTermMatrix} \alias{tm_term_score.term_frequency} \alias{tm_term_score.PlainTextDocument} \alias{tm_term_score.TermDocumentMatrix} \title{Compute Score for Matching Terms} \description{ Compute a score based on the number of matching terms. } \usage{ \method{tm_term_score}{DocumentTermMatrix}(x, terms, FUN = row_sums) \method{tm_term_score}{PlainTextDocument}(x, terms, FUN = function(x) sum(x, na.rm = TRUE)) \method{tm_term_score}{term_frequency}(x, terms, FUN = function(x) sum(x, na.rm = TRUE)) \method{tm_term_score}{TermDocumentMatrix}(x, terms, FUN = col_sums) } \arguments{ \item{x}{Either a \code{\link{PlainTextDocument}}, a term frequency as returned by \code{\link{termFreq}}, or a \code{\link{TermDocumentMatrix}}.} \item{terms}{A character vector of terms to be matched.} \item{FUN}{A function computing a score from the number of terms matching in \code{x}.} } \value{ A score as computed by \code{FUN} from the number of matching \code{terms} in \code{x}. } \examples{ data("acq") tm_term_score(acq[[1]], c("company", "change")) \dontrun{## Test for positive and negative sentiments ## install.packages("tm.lexicon.GeneralInquirer", repos="http://datacube.wu.ac.at", type="source") require("tm.lexicon.GeneralInquirer") sapply(acq[1:10], tm_term_score, terms_in_General_Inquirer_categories("Positiv")) sapply(acq[1:10], tm_term_score, terms_in_General_Inquirer_categories("Negativ")) tm_term_score(TermDocumentMatrix(acq[1:10], control = list(removePunctuation = TRUE)), terms_in_General_Inquirer_categories("Positiv"))} } tm/man/removeNumbers.Rd0000644000175100001440000000223013161434236014603 0ustar hornikusers\name{removeNumbers} \alias{removeNumbers} \alias{removeNumbers.character} \alias{removeNumbers.PlainTextDocument} \title{Remove Numbers from a Text Document} \description{ Remove numbers from a text document. } \usage{ \method{removeNumbers}{character}(x, ucp = FALSE, \dots) \method{removeNumbers}{PlainTextDocument}(x, \dots) } \arguments{ \item{x}{a character vector or text document.} \item{ucp}{a logical specifying whether to use Unicode character properties for determining digit characters. If \code{FALSE} (default), characters in the ASCII \code{[:digit:]} class (i.e., the decimal digits from 0 to 9) are taken; if \code{TRUE}, the characters with Unicode general category \code{Nd} (Decimal_Number).} \item{\dots}{arguments to be passed to or from methods; in particular, from the \code{PlainTextDocument} method to the \code{character} method.} } \value{ The text document without numbers. } \seealso{ \code{\link{getTransformations}} to list available transformation (mapping) functions. \url{http://unicode.org/reports/tr44/#General_Category_Values}. } \examples{ data("crude") crude[[1]] removeNumbers(crude[[1]]) } tm/man/findFreqTerms.Rd0000644000175100001440000000150712262761010014523 0ustar hornikusers\name{findFreqTerms} \alias{findFreqTerms} \title{Find Frequent Terms} \description{ Find frequent terms in a document-term or term-document matrix. } \usage{ findFreqTerms(x, lowfreq = 0, highfreq = Inf) } \arguments{ \item{x}{A \code{\link{DocumentTermMatrix}} or \code{\link{TermDocumentMatrix}}.} \item{lowfreq}{A numeric for the lower frequency bound.} \item{highfreq}{A numeric for the upper frequency bound.} } \value{ A character vector of terms in \code{x} which occur more or equal often than \code{lowfreq} times and less or equal often than \code{highfreq} times. } \details{This method works for all numeric weightings but is probably most meaningful for the standard term frequency (\code{tf}) weighting of \code{x}.} \examples{ data("crude") tdm <- TermDocumentMatrix(crude) findFreqTerms(tdm, 2, 3) } tm/man/DataframeSource.Rd0000644000175100001440000000247313110235235015020 0ustar hornikusers\name{DataframeSource} \alias{DataframeSource} \title{Data Frame Source} \description{ Create a data frame source. } \usage{ DataframeSource(x) } \arguments{ \item{x}{A data frame giving the texts and metadata.} } \details{ A \emph{data frame source} interprets each row of the data frame \code{x} as a document. The first column must be named \code{"doc_id"} and contain a unique string identifier for each document. The second column must be named \code{"text"} and contain a \code{"UTF-8"} encoded string representing the document's content. Optional additional columns are used as document level metadata. } \value{ An object inheriting from \code{DataframeSource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}, and \code{\link{meta}} for types of metadata. \code{\link[readtext]{readtext}} for reading in a text in multiple formats suitable to be processed by \code{DataframeSource}. } \examples{ docs <- data.frame(doc_id = c("doc_1", "doc_2"), text = c("This is a text.", "This another one."), dmeta1 = 1:2, dmeta2 = letters[1:2], stringsAsFactors = FALSE) (ds <- DataframeSource(docs)) x <- Corpus(ds) inspect(x) meta(x) } tm/man/weightBin.Rd0000644000175100001440000000061313025174645013701 0ustar hornikusers\name{weightBin} \alias{weightBin} \title{Weight Binary} \description{ Binary weight a term-document matrix. } \usage{ weightBin(m) } \arguments{ \item{m}{A \code{\link{TermDocumentMatrix}} in term frequency format.} } \details{ Formally this function is of class \code{WeightingFunction} with the additional attributes \code{name} and \code{acronym}. } \value{ The weighted matrix. } tm/man/weightTf.Rd0000644000175100001440000000101313025174645013535 0ustar hornikusers\name{weightTf} \alias{weightTf} \title{Weight by Term Frequency} \description{ Weight a term-document matrix by term frequency. } \usage{ weightTf(m) } \arguments{ \item{m}{A \code{\link{TermDocumentMatrix}} in term frequency format.} } \details{ Formally this function is of class \code{WeightingFunction} with the additional attributes \code{name} and \code{acronym}. This function acts as the identity function since the input matrix is already in term frequency format. } \value{ The weighted matrix. } tm/man/weightSMART.Rd0000644000175100001440000000654213025174645014066 0ustar hornikusers\name{weightSMART} \alias{weightSMART} \title{SMART Weightings} \encoding{UTF-8} \description{ Weight a term-document matrix according to a combination of weights specified in SMART notation. } \usage{ weightSMART(m, spec = "nnn", control = list()) } \arguments{ \item{m}{A \code{\link{TermDocumentMatrix}} in term frequency format.} \item{spec}{a character string consisting of three characters. The first letter specifies a term frequency schema, the second a document frequency schema, and the third a normalization schema. See \bold{Details} for available built-in schemata.} \item{control}{a list of control parameters. See \bold{Details}.} } \details{ Formally this function is of class \code{WeightingFunction} with the additional attributes \code{name} and \code{acronym}. The first letter of \code{spec} specifies a weighting schema for term frequencies of \code{m}: \describe{ \item{"n"}{(natural) \eqn{\mathit{tf}_{i,j}} counts the number of occurrences \eqn{n_{i,j}} of a term \eqn{t_i} in a document \eqn{d_j}. The input term-document matrix \code{m} is assumed to be in this standard term frequency format already.} \item{"l"}{(logarithm) is defined as \eqn{1 + \log_2(\mathit{tf}_{i,j})}.} \item{"a"}{(augmented) is defined as \eqn{0.5 + \frac{0.5 * \mathit{tf}_{i,j}}{\max_i(\mathit{tf}_{i,j})}}.} \item{"b"}{(boolean) is defined as 1 if \eqn{\mathit{tf}_{i,j} > 0} and 0 otherwise.} \item{"L"}{(log average) is defined as \eqn{\frac{1 + \log_2(\mathit{tf}_{i,j})}{1+\log_2(\mathrm{ave}_{i\in j}(\mathit{tf}_{i,j}))}}.} } The second letter of \code{spec} specifies a weighting schema of document frequencies for \code{m}: \describe{ \item{"n"}{(no) is defined as 1.} \item{"t"}{(idf) is defined as \eqn{\log_2 \frac{N}{\mathit{df}_t}} where \eqn{\mathit{df}_t} denotes how often term \eqn{t} occurs in all documents.} \item{"p"}{(prob idf) is defined as \eqn{\max(0, \log_2(\frac{N - \mathit{df}_t}{\mathit{df}_t}))}.} } The third letter of \code{spec} specifies a schema for normalization of \code{m}: \describe{ \item{"n"}{(none) is defined as 1.} \item{"c"}{(cosine) is defined as \eqn{\sqrt{\mathrm{col\_sums}(m ^ 2)}}.} \item{"u"}{(pivoted unique) is defined as \eqn{\mathit{slope} * \sqrt{\mathrm{col\_sums}(m ^ 2)} + (1 - \mathit{slope}) * \mathit{pivot}} where both \code{slope} and \code{pivot} must be set via named tags in the \code{control} list.} \item{"b"}{(byte size) is defined as \eqn{\frac{1}{\mathit{CharLength}^\alpha}}. The parameter \eqn{\alpha} must be set via the named tag \code{alpha} in the \code{control} list.} } The final result is defined by multiplication of the chosen term frequency component with the chosen document frequency component with the chosen normalization component. } \value{ The weighted matrix. } \references{ Christopher D. Manning and Prabhakar Raghavan and Hinrich Schütze (2008). \emph{Introduction to Information Retrieval}. Cambridge University Press, ISBN 0521865719. } \examples{ data("crude") TermDocumentMatrix(crude, control = list(removePunctuation = TRUE, stopwords = TRUE, weighting = function(x) weightSMART(x, spec = "ntc"))) } tm/man/removePunctuation.Rd0000644000175100001440000000375713161434267015524 0ustar hornikusers\name{removePunctuation} \alias{removePunctuation} \alias{removePunctuation.character} \alias{removePunctuation.PlainTextDocument} \title{Remove Punctuation Marks from a Text Document} \description{ Remove punctuation marks from a text document. } \usage{ \method{removePunctuation}{character}(x, preserve_intra_word_contractions = FALSE, preserve_intra_word_dashes = FALSE, ucp = FALSE, \dots) \method{removePunctuation}{PlainTextDocument}(x, \dots) } \arguments{ \item{x}{a character vector or text document.} \item{preserve_intra_word_contractions}{a logical specifying whether intra-word contractions should be kept.} \item{preserve_intra_word_dashes}{a logical specifying whether intra-word dashes should be kept.} \item{ucp}{a logical specifying whether to use Unicode character properties for determining punctuation characters. If \code{FALSE} (default), characters in the ASCII \code{[:punct:]} class are taken; if \code{TRUE}, the characters with Unicode general category \code{P} (Punctuation).} \item{\dots}{arguments to be passed to or from methods; in particular, from the \code{PlainTextDocument} method to the \code{character} method.} } \value{ The character or text document \code{x} without punctuation marks (besides intra-word contractions (\samp{'}) and intra-word dashes (\samp{-}) if \code{preserve_intra_word_contractions} and \code{preserve_intra_word_dashes} are set, respectively). } \seealso{ \code{\link{getTransformations}} to list available transformation (mapping) functions. \code{\link{regex}} shows the class \code{[:punct:]} of punctuation characters. \url{http://unicode.org/reports/tr44/#General_Category_Values}. } \examples{ data("crude") inspect(crude[[14]]) inspect(removePunctuation(crude[[14]])) inspect(removePunctuation(crude[[14]], preserve_intra_word_contractions = TRUE, preserve_intra_word_dashes = TRUE)) } tm/man/readRCV1.Rd0000644000175100001440000000236413155253051013326 0ustar hornikusers\name{readRCV1} \alias{readRCV1} \alias{readRCV1asPlain} \title{Read In a Reuters Corpus Volume 1 Document} \description{ Read in a Reuters Corpus Volume 1 \acronym{XML} document. } \usage{ readRCV1(elem, language, id) readRCV1asPlain(elem, language, id) } \arguments{ \item{elem}{a named list with the component \code{content} which must hold the document to be read in.} \item{language}{a string giving the language.} \item{id}{Not used.} } \value{ An \code{\link{XMLTextDocument}} for \code{readRCV1}, or a \code{\link{PlainTextDocument}} for \code{readRCV1asPlain}, representing the text and metadata extracted from \code{elem$content}. } \seealso{ \code{\link{Reader}} for basic information on the reader infrastructure employed by package \pkg{tm}. } \references{ Lewis, D. D.; Yang, Y.; Rose, T.; and Li, F (2004). RCV1: A New Benchmark Collection for Text Categorization Research. \emph{Journal of Machine Learning Research}, \bold{5}, 361--397. \url{http://www.jmlr.org/papers/volume5/lewis04a/lewis04a.pdf} } \examples{ f <- system.file("texts", "rcv1_2330.xml", package = "tm") f_bin <- readBin(f, raw(), file.info(f)$size) rcv1 <- readRCV1(elem = list(content = f_bin), language = "en", id = "id1") content(rcv1) meta(rcv1) } tm/man/VCorpus.Rd0000644000175100001440000000255313177025175013370 0ustar hornikusers\name{VCorpus} \alias{VCorpus} \alias{as.VCorpus} \title{Volatile Corpora} \description{ Create volatile corpora. } \usage{ VCorpus(x, readerControl = list(reader = reader(x), language = "en")) as.VCorpus(x) } \arguments{ \item{x}{For \code{VCorpus} a \code{\link{Source}} object, and for \code{as.VCorpus} an \R object.} \item{readerControl}{a named list of control parameters for reading in content from \code{x}. \describe{ \item{\code{reader}}{a function capable of reading in and processing the format delivered by \code{x}.} \item{\code{language}}{a character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}). The default language is assumed to be English (\code{"en"}).} } } } \value{ An object inheriting from \code{VCorpus} and \code{Corpus}. } \details{ A \emph{volatile corpus} is fully kept in memory and thus all changes only affect the corresponding \R object. } \seealso{ \code{\link{Corpus}} for basic information on the corpus infrastructure employed by package \pkg{tm}. \code{\link{PCorpus}} provides an implementation with permanent storage semantics. } \examples{ reut21578 <- system.file("texts", "crude", package = "tm") VCorpus(DirSource(reut21578, mode = "binary"), list(reader = readReut21578XMLasPlain)) } tm/man/stemCompletion.Rd0000644000175100001440000000270613177025300014757 0ustar hornikusers\name{stemCompletion} \alias{stemCompletion} \title{Complete Stems} \description{ Heuristically complete stemmed words. } \usage{ stemCompletion(x, dictionary, type = c("prevalent", "first", "longest", "none", "random", "shortest")) } \arguments{ \item{x}{A character vector of stems to be completed.} \item{dictionary}{A \code{\link{Corpus}} or character vector to be searched for possible completions.} \item{type}{A \code{character} naming the heuristics to be used: \describe{ \item{\code{prevalent}}{Default. Takes the most frequent match as completion.} \item{\code{first}}{Takes the first found completion.} \item{\code{longest}}{Takes the longest completion in terms of characters.} \item{\code{none}}{Is the identity.} \item{\code{random}}{Takes some completion.} \item{\code{shortest}}{Takes the shortest completion in terms of characters.} } } } \value{ A character vector with completed words. } \examples{ data("crude") stemCompletion(c("compan", "entit", "suppl"), crude) } \references{ Ingo Feinerer (2010). Analysis and Algorithms for Stemming Inversion. \emph{Information Retrieval Technology --- 6th Asia Information Retrieval Societies Conference, AIRS 2010, Taipei, Taiwan, December 1--3, 2010. Proceedings}, volume 6458 of \emph{Lecture Notes in Computer Science}, pages 290--299. Springer-Verlag, December 2010. } tm/man/TextDocument.Rd0000644000175100001440000000204013176776017014410 0ustar hornikusers\name{TextDocument} \alias{TextDocument} \title{Text Documents} \description{ Representing and computing on text documents. } \details{ \emph{Text documents} are documents containing (natural language) text. The \pkg{tm} package employs the infrastructure provided by package \pkg{NLP} and represents text documents via the virtual S3 class \code{TextDocument}. Actual S3 text document classes then extend the virtual base class (such as \code{\link{PlainTextDocument}}). All extension classes must provide an \code{\link{as.character}} method which extracts the natural language text in documents of the respective classes in a \dQuote{suitable} (not necessarily structured) form, as well as \code{\link{content}} and \code{\link{meta}} methods for accessing the (possibly raw) document content and metadata. } \seealso{ \code{\link{PlainTextDocument}}, and \code{\link{XMLTextDocument}} for the text document classes provided by package \pkg{tm}. \code{\link[NLP]{TextDocument}} for text documents in package \pkg{NLP}. } tm/man/acq.Rd0000644000175100001440000000213213041567324012521 0ustar hornikusers\name{acq} \docType{data} \alias{acq} \title{50 Exemplary News Articles from the Reuters-21578 Data Set of Topic acq} \description{ This dataset holds 50 news articles with additional meta information from the Reuters-21578 data set. All documents belong to the topic \code{acq} dealing with corporate acquisitions. } \usage{data("acq")} \format{A \code{\link{VCorpus}} of 50 text documents.} \source{Reuters-21578 Text Categorization Collection Distribution 1.0 (\acronym{XML} format).} \references{ Emms, Martin and Luz, Saturnino (2007). Machine Learning for Natural Language Processing. \emph{European Summer School of Logic, Language and Information, course reader}. \url{https://www.scss.tcd.ie/~luzs/publications/mlfornlp.pdf} Lewis, David (1997) \emph{Reuters-21578 Text Categorization Collection Distribution 1.0}. \url{http://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html} Luz, Saturnino \emph{\acronym{XML}-encoded version of Reuters-21578}. \url{https://www.scss.tcd.ie/~luzs/t/cs4ll4/sw/reuters21578-xml/} } \examples{ data("acq") acq } \keyword{datasets} tm/man/writeCorpus.Rd0000644000175100001440000000147312327470624014314 0ustar hornikusers\name{writeCorpus} \alias{writeCorpus} \title{Write a Corpus to Disk} \description{ Write a plain text representation of a corpus to multiple files on disk corresponding to the individual documents in the corpus. } \usage{ writeCorpus(x, path = ".", filenames = NULL) } \arguments{ \item{x}{A corpus.} \item{path}{A character listing the directory to be written into.} \item{filenames}{Either \code{NULL} or a character vector. In case no filenames are provided, filenames are automatically generated by using the documents' identifiers in \code{x}.} } \details{ The plain text representation of the corpus is obtained by calling \code{as.character} on each document. } \examples{ data("crude") \dontrun{writeCorpus(crude, path = ".", filenames = paste(seq_along(crude), ".txt", sep = ""))} } tm/man/stemDocument.Rd0000644000175100001440000000127112743665047014440 0ustar hornikusers\name{stemDocument} \alias{stemDocument} \alias{stemDocument.character} \alias{stemDocument.PlainTextDocument} \title{Stem Words} \description{ Stem words in a text document using Porter's stemming algorithm. } \usage{ \method{stemDocument}{character}(x, language = "english") \method{stemDocument}{PlainTextDocument}(x, language = meta(x, "language")) } \arguments{ \item{x}{A character vector or text document.} \item{language}{A string giving the language for stemming.} } \details{ The argument \code{language} is passed over to \code{\link[SnowballC]{wordStem}} as the name of the Snowball stemmer. } \examples{ data("crude") inspect(crude[[1]]) inspect(stemDocument(crude[[1]])) } tm/man/Reader.Rd0000644000175100001440000000366313176776017013203 0ustar hornikusers\name{Reader} \alias{FunctionGenerator} \alias{Reader} \alias{getReaders} \title{Readers} \description{ Creating readers. } \usage{ getReaders() } \details{ \emph{Readers} are functions for extracting textual content and metadata out of elements delivered by a \code{\link{Source}}, and for constructing a \code{\link{TextDocument}}. A reader must accept following arguments in its signature: \describe{ \item{\code{elem}}{a named list with the components \code{content} and \code{uri} (as delivered by a \code{\link{Source}} via \code{\link{getElem}} or \code{\link{pGetElem}}).} \item{\code{language}}{a character string giving the language.} \item{\code{id}}{a character giving a unique identifier for the created text document.} } The element \code{elem} is typically provided by a source whereas the language and the identifier are normally provided by a corpus constructor (for the case that \code{elem$content} does not give information on these two essential items). In case a reader expects configuration arguments we can use a function generator. A function generator is indicated by inheriting from class \code{FunctionGenerator} and \code{function}. It allows us to process additional arguments, store them in an environment, return a reader function with the well-defined signature described above, and still be able to access the additional arguments via lexical scoping. All corpus constructors in package \pkg{tm} check the reader function for being a function generator and if so apply it to yield the reader with the expected signature. } \value{ For \code{getReaders()}, a character vector with readers provided by package \pkg{tm}. } \seealso{ \code{\link{readDOC}}, \code{\link{readPDF}}, \code{\link{readPlain}}, \code{\link{readRCV1}}, \code{\link{readRCV1asPlain}}, \code{\link{readReut21578XML}}, \code{\link{readReut21578XMLasPlain}}, and \code{\link{readXML}}. } tm/man/tokenizer.Rd0000644000175100001440000000346213164112376013775 0ustar hornikusers\name{tokenizer} \alias{Boost_tokenizer} \alias{MC_tokenizer} \alias{scan_tokenizer} \title{Tokenizers} \description{Tokenize a document or character vector.} \usage{ Boost_tokenizer(x) MC_tokenizer(x) scan_tokenizer(x) } \arguments{ \item{x}{A character vector, or an object that can be coerced to character by \code{as.character}.} } \value{ A character vector consisting of tokens obtained by tokenization of \code{x}. } \details{ The quality and correctness of a tokenization algorithm highly depends on the context and application scenario. Relevant factors are the language of the underlying text and the notions of whitespace (which can vary with the used encoding and the language) and punctuation marks. Consequently, for superior results you probably need a custom tokenization function. \describe{ \item{Boost_tokenizer}{Uses the Boost (\url{http://www.boost.org}) Tokenizer (via \pkg{Rcpp}).} \item{MC_tokenizer}{Implements the functionality of the tokenizer in the MC toolkit (\url{http://www.cs.utexas.edu/users/dml/software/mc/}).} \item{scan_tokenizer}{Simulates \code{scan(\dots, what = "character")}.} } } \seealso{ \code{\link{getTokenizers}} to list tokenizers provided by package \pkg{tm}. \code{\link[NLP]{Regexp_Tokenizer}} for tokenizers using regular expressions provided by package \pkg{NLP}. \code{\link[tau]{tokenize}} for a simple regular expression based tokenizer provided by package \pkg{tau}. \code{\link[tokenizers]{tokenizers}} for a collection of tokenizers provided by package \pkg{tokenizers}. } \examples{ data("crude") Boost_tokenizer(crude[[1]]) MC_tokenizer(crude[[1]]) scan_tokenizer(crude[[1]]) strsplit_space_tokenizer <- function(x) unlist(strsplit(as.character(x), "[[:space:]]+")) strsplit_space_tokenizer(crude[[1]]) } tm/man/termFreq.Rd0000644000175100001440000001064513177025323013550 0ustar hornikusers\name{termFreq} \alias{termFreq} \title{Term Frequency Vector} \description{ Generate a term frequency vector from a text document. } \usage{ termFreq(doc, control = list()) } \arguments{ \item{doc}{An object inheriting from \code{\link{TextDocument}} or a character vector.} \item{control}{A list of control options which override default settings. First, following two options are processed. \describe{ \item{\code{tokenize}}{A function tokenizing a \code{\link{TextDocument}} into single tokens, a \code{\link[NLP]{Span_Tokenizer}}, \code{\link[NLP]{Token_Tokenizer}}, or a string matching one of the predefined tokenization functions: \describe{ \item{\code{"Boost"}}{for \code{\link{Boost_tokenizer}}, or} \item{\code{"MC"}}{for \code{\link{MC_tokenizer}}, or} \item{\code{"scan"}}{for \code{\link{scan_tokenizer}}, or} \item{\code{"words"}}{for \code{\link{words}}.} } Defaults to \code{\link{words}}.} \item{\code{tolower}}{Either a logical value indicating whether characters should be translated to lower case or a custom function converting characters to lower case. Defaults to \code{\link{tolower}}.} } Next, a set of options which are sensitive to the order of occurrence in the \code{control} list. Options are processed in the same order as specified. User-specified options have precedence over the default ordering so that first all user-specified options and then all remaining options (with the default settings and in the order as listed below) are processed. \describe{ \item{\code{language}}{A character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}) to be used for \code{stopwords} and \code{stemming} if not provided by \code{doc}.} \item{\code{removePunctuation}}{A logical value indicating whether punctuation characters should be removed from \code{doc}, a custom function which performs punctuation removal, or a list of arguments for \code{\link{removePunctuation}}. Defaults to \code{FALSE}.} \item{\code{removeNumbers}}{A logical value indicating whether numbers should be removed from \code{doc} or a custom function for number removal. Defaults to \code{FALSE}.} \item{\code{stopwords}}{Either a Boolean value indicating stopword removal using default language specific stopword lists shipped with this package, a character vector holding custom stopwords, or a custom function for stopword removal. Defaults to \code{FALSE}.} \item{\code{stemming}}{Either a Boolean value indicating whether tokens should be stemmed or a custom stemming function. Defaults to \code{FALSE}.} } Finally, following options are processed in the given order. \describe{ \item{\code{dictionary}}{A character vector to be tabulated against. No other terms will be listed in the result. Defaults to \code{NULL} which means that all terms in \code{doc} are listed.} \item{\code{bounds}}{A list with a tag \code{local} whose value must be an integer vector of length 2. Terms that appear less often in \code{doc} than the lower bound \code{bounds$local[1]} or more often than the upper bound \code{bounds$local[2]} are discarded. Defaults to \code{list(local = c(1, Inf))} (i.e., every token will be used).} \item{\code{wordLengths}}{An integer vector of length 2. Words shorter than the minimum word length \code{wordLengths[1]} or longer than the maximum word length \code{wordLengths[2]} are discarded. Defaults to \code{c(3, Inf)}, i.e., a minimum word length of 3 characters.} } } } \value{ A table of class \code{c("term_frequency", "table")} with term frequencies as values and tokens as names. } \seealso{ \code{\link{getTokenizers}} } \examples{ data("crude") termFreq(crude[[14]]) strsplit_space_tokenizer <- function(x) unlist(strsplit(as.character(x), "[[:space:]]+")) ctrl <- list(tokenize = strsplit_space_tokenizer, removePunctuation = list(preserve_intra_word_dashes = TRUE), stopwords = c("reuter", "that"), stemming = TRUE, wordLengths = c(4, Inf)) termFreq(crude[[14]], control = ctrl) } \keyword{math} tm/man/findAssocs.Rd0000644000175100001440000000211712262463076014057 0ustar hornikusers\name{findAssocs} \alias{findAssocs} \alias{findAssocs.DocumentTermMatrix} \alias{findAssocs.TermDocumentMatrix} \title{Find Associations in a Term-Document Matrix} \description{ Find associations in a document-term or term-document matrix. } \usage{ \method{findAssocs}{DocumentTermMatrix}(x, terms, corlimit) \method{findAssocs}{TermDocumentMatrix}(x, terms, corlimit) } \arguments{ \item{x}{A \code{\link{DocumentTermMatrix}} or a \code{\link{TermDocumentMatrix}}.} \item{terms}{a character vector holding terms.} \item{corlimit}{a numeric vector (of the same length as \code{terms}; recycled otherwise) for the (inclusive) lower correlation limits of each term in the range from zero to one.} } \value{A named list. Each list component is named after a term in \code{terms} and contains a named numeric vector. Each vector holds matching terms from \code{x} and their rounded correlations satisfying the inclusive lower correlation limit of \code{corlimit}.} \examples{ data("crude") tdm <- TermDocumentMatrix(crude) findAssocs(tdm, c("oil", "opec", "xyz"), c(0.7, 0.75, 0.1)) } tm/man/Source.Rd0000644000175100001440000001055313176776017013235 0ustar hornikusers\name{Source} \alias{Source} \alias{SimpleSource} \alias{close.SimpleSource} \alias{eoi} \alias{eoi.SimpleSource} \alias{getMeta} \alias{getMeta.DataframeSource} \alias{getElem} \alias{getElem.DataframeSource} \alias{getElem.DirSource} \alias{getElem.URISource} \alias{getElem.VectorSource} \alias{getElem.XMLSource} \alias{getSources} \alias{length.SimpleSource} \alias{open.SimpleSource} \alias{pGetElem} \alias{pGetElem.DataframeSource} \alias{pGetElem.DirSource} \alias{pGetElem.URISource} \alias{pGetElem.VectorSource} \alias{reader} \alias{reader.SimpleSource} \alias{Source} \alias{stepNext} \alias{stepNext.SimpleSource} \title{Sources} \description{ Creating and accessing sources. } \usage{ SimpleSource(encoding = "", length = 0, position = 0, reader = readPlain, \dots, class) getSources() \method{close}{SimpleSource}(con, \dots) \method{eoi}{SimpleSource}(x) \method{getMeta}{DataframeSource}(x) \method{getElem}{DataframeSource}(x) \method{getElem}{DirSource}(x) \method{getElem}{URISource}(x) \method{getElem}{VectorSource}(x) \method{getElem}{XMLSource}(x) \method{length}{SimpleSource}(x) \method{open}{SimpleSource}(con, \dots) \method{pGetElem}{DataframeSource}(x) \method{pGetElem}{DirSource}(x) \method{pGetElem}{URISource}(x) \method{pGetElem}{VectorSource}(x) \method{reader}{SimpleSource}(x) \method{stepNext}{SimpleSource}(x) } \arguments{ \item{x}{A \code{Source}.} \item{con}{A \code{Source}.} \item{encoding}{a character giving the encoding of the elements delivered by the source.} \item{length}{a non-negative integer denoting the number of elements delivered by the source. If the length is unknown in advance set it to \code{0}.} \item{position}{a numeric indicating the current position in the source.} \item{reader}{a reader function (generator).} \item{\dots}{For \code{SimpleSource} tag-value pairs for storing additional information; not used otherwise.} \item{class}{a character vector giving additional classes to be used for the created source.} } \details{ \emph{Sources} abstract input locations, like a directory, a connection, or simply an \R vector, in order to acquire content in a uniform way. In packages which employ the infrastructure provided by package \pkg{tm}, such sources are represented via the virtual S3 class \code{Source}: such packages then provide S3 source classes extending the virtual base class (such as \code{\link{DirSource}} provided by package \pkg{tm} itself). All extension classes must provide implementations for the functions \code{close}, \code{eoi}, \code{getElem}, \code{length}, \code{open}, \code{reader}, and \code{stepNext}. For parallel element access the (optional) function \code{pGetElem} must be provided as well. If document level metadata is available, the (optional) function \code{getMeta} must be implemented. The functions \code{open} and \code{close} open and close the source, respectively. \code{eoi} indicates end of input. \code{getElem} fetches the element at the current position, whereas \code{pGetElem} retrieves all elements in parallel at once. The function \code{length} gives the number of elements. \code{reader} returns a default reader for processing elements. \code{stepNext} increases the position in the source to acquire the next element. The function \code{SimpleSource} provides a simple reference implementation and can be used when creating custom sources. } \value{ For \code{SimpleSource}, an object inheriting from \code{class}, \code{SimpleSource}, and \code{Source}. For \code{getSources}, a character vector with sources provided by package \pkg{tm}. \code{open} and \code{close} return the opened and closed source, respectively. For \code{eoi}, a logical indicating if the end of input of the source is reached. For \code{getElem} a named list with the components \code{content} holding the document and \code{uri} giving a uniform resource identifier (e.g., a file path or \acronym{URL}; \code{NULL} if not applicable or unavailable). For \code{pGetElem} a list of such named lists. For \code{length}, an integer for the number of elements. For \code{reader}, a function for the default reader. } \seealso{ \code{\link{DataframeSource}}, \code{\link{DirSource}}, \code{\link{URISource}}, \code{\link{VectorSource}}, and \code{\link{XMLSource}}. } tm/man/XMLTextDocument.Rd0000644000175100001440000000422513177023776014776 0ustar hornikusers\name{XMLTextDocument} \alias{XMLTextDocument} \title{XML Text Documents} \description{ Create \acronym{XML} text documents. } \usage{ XMLTextDocument(x = xml_missing(), author = character(0), datetimestamp = as.POSIXlt(Sys.time(), tz = "GMT"), description = character(0), heading = character(0), id = character(0), language = character(0), origin = character(0), \dots, meta = NULL) } \arguments{ \item{x}{An \code{\link[xml2:read_xml]{XMLDocument}}.} \item{author}{a character or an object of class \code{\link{person}} giving the author names.} \item{datetimestamp}{an object of class \code{\link{POSIXt}} or a character string giving the creation date/time information. If a character string, exactly one of the \acronym{ISO} 8601 formats defined by \url{http://www.w3.org/TR/NOTE-datetime} should be used. See \code{\link[NLP]{parse_ISO_8601_datetime}} in package \pkg{NLP} for processing such date/time information. } \item{description}{a character giving a description.} \item{heading}{a character giving the title or a short heading.} \item{id}{a character giving a unique identifier.} \item{language}{a character giving the language (preferably as \acronym{IETF} language tags, see \link[NLP]{language} in package \pkg{NLP}).} \item{origin}{a character giving information on the source and origin.} \item{\dots}{user-defined document metadata tag-value pairs.} \item{meta}{a named list or \code{NULL} (default) giving all metadata. If set all other metadata arguments are ignored.} } \value{ An object inheriting from \code{XMLTextDocument} and \code{\link{TextDocument}}. } \seealso{ \code{\link{TextDocument}} for basic information on the text document infrastructure employed by package \pkg{tm}. } \examples{ xml <- system.file("extdata", "order-doc.xml", package = "xml2") (xtd <- XMLTextDocument(xml2::read_xml(xml), heading = "XML text document", id = xml, language = "en")) content(xtd) meta(xtd) } tm/man/plot.Rd0000644000175100001440000000257612217235156012746 0ustar hornikusers\name{plot} \alias{plot.TermDocumentMatrix} \title{Visualize a Term-Document Matrix} \description{ Visualize correlations between terms of a term-document matrix. } \usage{ \method{plot}{TermDocumentMatrix}(x, terms = sample(Terms(x), 20), corThreshold = 0.7, weighting = FALSE, attrs = list(graph = list(rankdir = "BT"), node = list(shape = "rectangle", fixedsize = FALSE)), \dots) } \arguments{ \item{x}{A term-document matrix.} \item{terms}{Terms to be plotted. Defaults to 20 randomly chosen terms of the term-document matrix.} \item{corThreshold}{Do not plot correlations below this threshold. Defaults to \code{0.7}.} \item{weighting}{Define whether the line width corresponds to the correlation.} \item{attrs}{Argument passed to the plot method for class \code{\link[graph:graphNEL-class]{graphNEL}}.} \item{\dots}{Other arguments passed to the \code{\link[graph:graphNEL-class]{graphNEL}} plot method.} } \details{ Visualization requires that package \pkg{Rgraphviz} is available. } \examples{ \dontrun{data(crude) tdm <- TermDocumentMatrix(crude, control = list(removePunctuation = TRUE, removeNumbers = TRUE, stopwords = TRUE)) plot(tdm, corThreshold = 0.2, weighting = TRUE)} } tm/man/inspect.Rd0000644000175100001440000000120212623274522013417 0ustar hornikusers\name{inspect} \alias{inspect} \alias{inspect.PCorpus} \alias{inspect.TermDocumentMatrix} \alias{inspect.TextDocument} \alias{inspect.VCorpus} \title{Inspect Objects} \description{ Inspect, i.e., display detailed information on a corpus, a term-document matrix, or a text document. } \usage{ \method{inspect}{PCorpus}(x) \method{inspect}{VCorpus}(x) \method{inspect}{TermDocumentMatrix}(x) \method{inspect}{TextDocument}(x) } \arguments{ \item{x}{Either a corpus, a term-document matrix, or a text document.} } \examples{ data("crude") inspect(crude[1:3]) inspect(crude[[1]]) tdm <- TermDocumentMatrix(crude)[1:10, 1:10] inspect(tdm) } tm/man/stopwords.Rd0000644000175100001440000000355013164112376014025 0ustar hornikusers\name{stopwords} \alias{stopwords} \title{Stopwords} \description{ Return various kinds of stopwords with support for different languages. } \usage{ stopwords(kind = "en") } \arguments{ \item{kind}{A character string identifying the desired stopword list.} } \details{ Available stopword lists are: \describe{ \item{\code{catalan}}{Catalan stopwords (obtained from \url{http://latel.upf.edu/morgana/altres/pub/ca_stop.htm}),} \item{\code{romanian}}{Romanian stopwords (extracted from \url{http://snowball.tartarus.org/otherapps/romanian/romanian1.tgz}),} \item{\code{SMART}}{English stopwords from the SMART information retrieval system (as documented in Appendix 11 of \url{http://jmlr.csail.mit.edu/papers/volume5/lewis04a/}) (which coincides with the stopword list used by the MC toolkit (\url{http://www.cs.utexas.edu/users/dml/software/mc/})),} } and a set of stopword lists from the Snowball stemmer project in different languages (obtained from \samp{http://svn.tartarus.org/snowball/trunk/website/algorithms/*/stop.txt}). Supported languages are \code{danish}, \code{dutch}, \code{english}, \code{finnish}, \code{french}, \code{german}, \code{hungarian}, \code{italian}, \code{norwegian}, \code{portuguese}, \code{russian}, \code{spanish}, and \code{swedish}. Language names are case sensitive. Alternatively, their \acronym{IETF} language tags may be used. % % Earlier \pkg{tm} versions (before 2013-06-14) used merged stopword lists from % \url{http://www.ranks.nl/resources/stopwords.html} and the Snowball stemmer % project. } \value{A character vector containing the requested stopwords. An error is raised if no stopwords are available for the requested \code{kind}.} \examples{ stopwords("en") stopwords("SMART") stopwords("german") } \keyword{file} tm/man/DirSource.Rd0000644000175100001440000000336312326753523013666 0ustar hornikusers\name{DirSource} \alias{DirSource} \title{Directory Source} \description{ Create a directory source. } \usage{ DirSource(directory = ".", encoding = "", pattern = NULL, recursive = FALSE, ignore.case = FALSE, mode = "text") } \arguments{ \item{directory}{A character vector of full path names; the default corresponds to the working directory \code{getwd()}.} \item{encoding}{a character string describing the current encoding. It is passed to \code{\link{iconv}} to convert the input to UTF-8.} \item{pattern}{an optional regular expression. Only file names which match the regular expression will be returned.} \item{recursive}{logical. Should the listing recurse into directories?} \item{ignore.case}{logical. Should pattern-matching be case-insensitive?} \item{mode}{a character string specifying if and how files should be read in. Available modes are: \describe{ \item{\code{""}}{No read. In this case \code{\link{getElem}} and \code{\link{pGetElem}} only deliver \acronym{URI}s.} \item{\code{"binary"}}{Files are read in binary raw mode (via \code{\link{readBin}}).} \item{\code{"text"}}{Files are read as text (via \code{\link{readLines}}).} } } } \details{ A \emph{directory source} acquires a list of files via \code{\link{dir}} and interprets each file as a document. } \value{ An object inheriting from \code{DirSource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}. \code{\link{Encoding}} and \code{\link{iconv}} on encodings. } \examples{ DirSource(system.file("texts", "txt", package = "tm")) } tm/man/WeightFunction.Rd0000644000175100001440000000142612324523350014711 0ustar hornikusers\name{WeightFunction} \alias{WeightFunction} \title{Weighting Function} \description{ Construct a weighting function for term-document matrices. } \usage{ WeightFunction(x, name, acronym) } \arguments{ \item{x}{A function which takes a \code{\link{TermDocumentMatrix}} with term frequencies as input, weights the elements, and returns the weighted matrix.} \item{name}{A character naming the weighting function.} \item{acronym}{A character giving an acronym for the name of the weighting function.} } \value{ An object of class \code{WeightFunction} which extends the class \code{function} representing a weighting function. } \examples{ weightCutBin <- WeightFunction(function(m, cutoff) m > cutoff, "binary with cutoff", "bincut") } tm/man/ZipSource.Rd0000644000175100001440000000322113177024653013703 0ustar hornikusers\name{ZipSource} \alias{ZipSource} \title{ZIP File Source} \description{ Create a ZIP file source. } \usage{ ZipSource(zipfile, pattern = NULL, recursive = FALSE, ignore.case = FALSE, mode = "text") } \arguments{ \item{zipfile}{A character string with the full path name of a ZIP file.} \item{pattern}{an optional regular expression. Only file names in the ZIP file which match the regular expression will be returned.} \item{recursive}{logical. Should the listing recurse into directories?} \item{ignore.case}{logical. Should pattern-matching be case-insensitive?} \item{mode}{a character string specifying if and how files should be read in. Available modes are: \describe{ \item{\code{""}}{No read. In this case \code{\link{getElem}} and \code{\link{pGetElem}} only deliver \acronym{URI}s.} \item{\code{"binary"}}{Files are read in binary raw mode (via \code{\link{readBin}}).} \item{\code{"text"}}{Files are read as text (via \code{\link{readLines}}).} } } } \details{ A \emph{ZIP file source} extracts a compressed ZIP file via \code{\link{unzip}} and interprets each file as a document. } \value{ An object inheriting from \code{ZipSource}, \code{\link{SimpleSource}}, and \code{\link{Source}}. } \seealso{ \code{\link{Source}} for basic information on the source infrastructure employed by package \pkg{tm}. } \examples{ zipfile <- tempfile() files <- Sys.glob(file.path(system.file("texts", "txt", package = "tm"), "*")) zip(zipfile, files) zipfile <- paste0(zipfile, ".zip") Corpus(ZipSource(zipfile, recursive = TRUE))[[1]] file.remove(zipfile) } tm/man/findMostFreqTerms.Rd0000644000175100001440000000322713023461533015372 0ustar hornikusers\name{findMostFreqTerms} \alias{findMostFreqTerms} \alias{findMostFreqTerms.term_frequency} \alias{findMostFreqTerms.DocumentTermMatrix} \alias{findMostFreqTerms.TermDocumentMatrix} \title{Find Most Frequent Terms} \description{ Find most frequent terms in a document-term or term-document matrix, or a vector of term frequencies. } \usage{ findMostFreqTerms(x, n = 6L, ...) \S3method{findMostFreqTerms}{DocumentTermMatrix}(x, n = 6L, INDEX = NULL, ...) \S3method{findMostFreqTerms}{TermDocumentMatrix}(x, n = 6L, INDEX = NULL, ...) } \arguments{ \item{x}{A \code{\link{DocumentTermMatrix}} or \code{\link{TermDocumentMatrix}}, or a vector of term frequencies as obtained by \code{\link{termFreq}()}.} \item{n}{A single integer giving the maximal number of terms.} \item{INDEX}{an object specifying a grouping of documents for rollup, or \code{NULL} (default) in which case each document is considered individually.} \item{...}{arguments to be passed to or from methods.} } \value{ For the document-term or term-document matrix methods, a list with the named frequencies of the up to \code{n} most frequent terms occurring in each document (group). Otherwise, a single such vector of most frequent terms. } \details{ Only terms with positive frequencies are included in the results. } \examples{ data("crude") ## Term frequencies: tf <- termFreq(crude[[14L]]) findMostFreqTerms(tf) ## Document-term matrices: dtm <- DocumentTermMatrix(crude) ## Most frequent terms for each document: findMostFreqTerms(dtm) ## Most frequent terms for the first 10 the second 10 documents, ## respectively: findMostFreqTerms(dtm, INDEX = rep(1 : 2, each = 10L)) }