pax_global_header00006660000000000000000000000064134047477560014533gustar00rootroot0000000000000052 comment=cb98f11ddced07d66f82ffb0e9c49b90b871d5bd kiwix-lib-3.1.1/000077500000000000000000000000001340474775600134345ustar00rootroot00000000000000kiwix-lib-3.1.1/.clang-format000066400000000000000000000005101340474775600160030ustar00rootroot00000000000000BasedOnStyle: Google BinPackArguments: false BinPackParameters: false BreakBeforeBinaryOperators: All BreakBeforeBraces: Linux DerivePointerAlignment: false SpacesInContainerLiterals: false Standard: Cpp11 AllowShortFunctionsOnASingleLine: Inline AllowShortIfStatementsOnASingleLine: false AllowShortLoopsOnASingleLine: false kiwix-lib-3.1.1/.travis.yml000066400000000000000000000013101340474775600155400ustar00rootroot00000000000000language: cpp dist: trusty sudo: false cache: ccache before_install: - PATH=$PATH:$HOME/bin install: travis/install_deps.sh script: travis/compile.sh env: matrix: - PLATFORM="native_static" - PLATFORM="native_dyn" - PLATFORM="win32_static" - PLATFORM="win32_dyn" - PLATFORM="android_arm" - PLATFORM="android_arm64" addons: apt: packages: - cmake - python3-pip - libbz2-dev - ccache - zlib1g-dev - uuid-dev - libctpp2-dev - ctpp2-utils - libmicrohttpd-dev - g++-mingw-w64-i686 - gcc-mingw-w64-i686 - gcc-mingw-w64-base - mingw-w64-tools matrix: include: - env: PLATFORM="native_dyn" os: osx kiwix-lib-3.1.1/AUTHORS000066400000000000000000000011421340474775600145020ustar00rootroot00000000000000Automactic Ayoub DARDORY Cristian Patrasciuc Dattaz Elad Keyshawn Emmanuel Engelhart Isaac jleow00 Julian Harty Kiran Mathew Koshy Kunal Mehta Matthieu Gautier Rashiq Ahmad Renaud Gaudin Shivam Steve Wills Synhershko kiwix-lib-3.1.1/COPYING000066400000000000000000001043741340474775600145000ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . kiwix-lib-3.1.1/ChangeLog000066400000000000000000000147741340474775600152230ustar00rootroot00000000000000kiwix-lib 3.1.1 =============== * The OPDS feed book's date must be the date of the book, not the date of the feed generation. * Convert the standard opds date to our format (YYYY-MM-DD) * Remove duplicate language attribute in the libxml dumper. * Create the datadirectory to not fail to write a file in a non-existent directory kiwix-lib 3.1.0 =============== * Add a method to get the favicon url of book (if available). * Move dump code of library.xml in a specific class. * Add a first support to bookmarks kiwix-lib 3.0.3 =============== * Add the 'en' language to the mapping alpha2-code ('en') to alpha3-code ('eng'). * Correctly write the 'ArticleCount' and 'MediaCount' in the library.xml. * Correctly fill the book size for the zim file size. * Fix launch of aria2c. kiwix-lib 3.0.2 =============== * Use the correct path separator when computing relativePath on Windows. kiwix-lib 3.0.1 =============== * Small fix about parsing the opdsStream. kiwix-lib 3.0.0 =============== * Change the downloader to use aria2 using a separated process (with rpc) instead of using the libaria2. This simplify a lot the link process to libaria2 on Windows. - kiwix-lib doesn't depend on libaria2 anymore. - kiwix-lib now depends on libcurl. * [API break] Library class API has been updated : - Books are referenced by id, not index. A lot of methods have been updated this way. - Books "list" is now private. - There is no more "current" book. - listBooksIds's filters have been updated. * [API break] Book class API has been updated : - Move the definition of Book in `book.h`. - Use getter/setter methods instead public members. - Size (getSize/setSize) is now returned in bytes, not kB. - Dependending of how the book has been initialized (opdsfeed), the faviconUrl may be stored in the book, the favicon being downloaded when using `getFavicon`. - The path (and indexPath) are always absolute path. - Book has now a downloadId, corresponding to the aria2 download id (if exists) * [API break] Manager class API has been updated : - The manager is mainly use to fill a Libray from a "library.xml" file or opds feed. Other operations (has removeBookById, setBookPath, filter, ...) have been removed. - The manager use a intermediate class (LibraryManipulator) to add book to the library. This dependency injection allow caller code to hook the add of a book to the library. - The manager work on a existing Library. It doesn't how a internal Library. * [API break] OpdsDumper class API has been updated : - dumpOPDSFeed method now take the list of bookIds to dump instead of dumping all books in the library. - OpdsDumper can now dump openSearch result information (total result count, start index, ...). * [API break] Common tools API has been updated : - `base64_encode` and `base64_decode` take std::string as arguments. - New `download` function in networkTools.h using libcurl. - New `getDataDirectory` function in pathTools. - Better `beautifyInteger` and `beautifyFileSize` functions. - New `nodeToString` function serializing a pugi::xml_node to a string. - New `converta2toa3` function to convert alpha2 language code to aplha3 language code. kiwix-lib 2.0.2 =============== * [Android] Forward c++ errors message de Java world. * Follow redirection of favicon. * Make aria2 dependency optional. * Inculde unistd.h only on unix platform. kiwix-lib 2.0.1 =============== * Fix parsing of url. * Remove unused static resources. * Correctly decode reserved characters in URLs. * Explicitly use icu namespace to allow use of packaged icu lib. kiwix-lib 2.0.0 =============== * Introduce a new API to retrive content from a reader. * Introduce the `Entry` class. * Reader's methods return an `Entry`. * Content and other information can be retrieved from the `Entry`. * Older Reader's methods are depreciated. * Add an `OPDSDumper` class to dump a whole `Library` as an OPDS feed. * Add a tool function to get the content of a file. * Add a tool function to create a tempory directory. * Add a `Downloader` class to download a file. * Allow the manager to populate a `Library` from an OPDS feed. * Try to locate libctpp2 in default system libdir and then fallback in 'lib' directory. * Build kiwix-lib setting RPATH. * Build kiwix-lib without warning (werror=true) * Build kiwix-lib on macos. kiwix-lib 1.1.1 =============== * Correct the name of kiwix-lib (from `kiwixlib`) in meson.build to generate dist archive with the correct name. * Libzim version need to be at least 3.2.0 kiwix-lib 1.1.0 =============== * Allow for more than 70 search result per page in html results rendering (kiwix/kiwix-tools#92) * Add a small api to do geo queries. * Add multi-search support in the JNI (#67) * Add an API to get only one part of an article. * Add an API to get direct location of an article content in the zim file. * Improve urlencoding * Fix pagination in html results rendering. * Compile using gcc-5 on Travis. * Allow JNI to access search snippets. * JNI throw an exception instead of returning an invalid object if something goes wrong. * Add doctext documentation. (#116) * Various bug fixes. kiwix-lib 1.0.0 =============== * Correctly regenerate template resource using cttp2c at compilation time. * Suggestion use xapian database when available * Support multi-zim search in kiwix-lib (a search can now search on several embedded database in zims in the same time) * Fix some wording * Fix license issues * Add out argument to jni getContent* method to get the title of article in the same time we get the content * Rename `compile_resources.py` script to `kiwix-compile-resources` * Use static lib when building for android or in "static mode" * Make the ResourceNotFound exception public kiwix-lib 0.2.0 =============== * Generate the snippet from the article content if the snippet is not directly in the database. This provide better snippets as they now depending of the query. * Use the stopwords and the language stored in the fulltext index database to parse the user query. * Remove the indexer functionnality. * Move to C++11 standard. * Use the fulltext search of the zimlib. We still have the fulltext search code in kiwix-lib to be able to search in fulltext index by side of a zim file. (To be remove in the future) * Few API hanges * Change a lot of `Reader` methods to const methods. * Fix some crashes. kiwix-lib-3.1.1/README.md000066400000000000000000000102721340474775600147150ustar00rootroot00000000000000Kiwix library ============= The Kiwix library provides the Kiwix software core. It contains the code shared by all Kiwix ports (Windows, Linux, OSX, Android, ...). Disclaimer ---------- This document assumes you have a little knowledge about software compilation. If you experience difficulties with the dependencies or with the Kiwix libary compilation itself, we recommend to have a look to [kiwix-build](https://github.com/kiwix/kiwix-build). Preamble -------- Although the Kiwix library can be (cross-)compiled on/for many sytems, the following documentation explains how to do it on POSIX ones. It is primarly thought for GNU/Linux systems and has been tested on recent releases of Ubuntu and Fedora. Dependencies ------------ The Kiwix library relies on many third parts software libraries. They are prerequisites to the Kiwix library compilation. Following libraries need to be available: * ICU ................................... http://site.icu-project.org/ (package libicu-dev on Ubuntu) * ZIM ........................................ http://www.openzim.org/ (package libzim-dev on Ubuntu) * Pugixml ........................................ http://pugixml.org/ (package libpugixml-dev on Ubuntu) * ctpp2 ........................................ http://ctpp.havoc.ru/ (package libctpp2-dev on Ubuntu) * Xapian ......................................... https://xapian.org/ (package libxapian-dev on Ubuntu) * libaria2 .................................. https://aria2.github.io/ (no package on Ubuntu) These dependencies may or may not be packaged by your operating system. They may also be packaged but only in an older version. The compilation script will tell you if one of them is missing or too old. In the worse case, you will have to download and compile bleeding edge version by hand. If you want to install these dependencies locally, then use the kiwix-lib directory as install prefix. If you compile ctpp2 from source and want to compile the Kiwix library statically then you will probably need to rename ctpp2 static library from ctpp2-st.a to ctpp2.a. Environment ------------- The Kiwix library builds using [Meson](http://mesonbuild.com/) version 0.39 or higher. Meson relies itself on Ninja, pkg-config and few other compilation tools. Install first the few common compilation tools: * Meson * Ninja * Pkg-config These tools should be packaged if you use a cutting edge operating system. If not, have a look to the "Troubleshooting" section. Compilation ----------- Once all dependencies are installed, you can compile the Kiwix library with: ``` meson . build ninja -C build ``` By default, it will compile dynamic linked libraries. All binary files will be created in the "build" directory created automatically by Meson. If you want statically linked libraries, you can add `--default-library=static` option to the Meson command. Depending of you system, `ninja` may be called `ninja-build`. Installation ------------ If you want to install the Kiwix library and the headers you just have compiled on your system, here we go: ``` ninja -C build install ``` You might need to run the command as root (or using 'sudo'), depending where you want to install the libraries. After the installation succeeded, you may need to run ldconfig (as root). Uninstallation ------------ If you want to uninstall the Kiwix library: ``` ninja -C build uninstall ``` Like for the installation, you might need to run the command as root (or using 'sudo'). Troubleshooting --------------- If you need to install Meson "manually": ``` virtualenv -p python3 ./ # Create virtualenv source bin/activate # Activate the virtualenv pip3 install meson # Install Meson hash -r # Refresh bash paths ``` If you need to install Ninja "manually": ``` git clone git://github.com/ninja-build/ninja.git cd ninja git checkout release ./configure.py --bootstrap mkdir ../bin cp ninja ../bin cd .. ``` If the compilation still fails, you might need to get a more recent version of a dependency than the one packaged by your Linux distribution. Try then with a source tarball distributed by the problematic upstream project or even directly from the source code repository. License ------- GPLv3 or later, see COPYING for more details. kiwix-lib-3.1.1/format_code.sh000077500000000000000000000015131340474775600162550ustar00rootroot00000000000000#!/usr/bin/bash files=( "include/library.h" "include/common/stringTools.h" "include/common/pathTools.h" "include/common/otherTools.h" "include/common/regexTools.h" "include/common/networkTools.h" "include/manager.h" "include/reader.h" "include/kiwix.h" "include/xapianSearcher.h" "include/searcher.h" "src/library.cpp" "src/android/kiwix.cpp" "src/android/org/kiwix/kiwixlib/JNIKiwixBool.java" "src/android/org/kiwix/kiwixlib/JNIKiwix.java" "src/android/org/kiwix/kiwixlib/JNIKiwixString.java" "src/android/org/kiwix/kiwixlib/JNIKiwixInt.java" "src/searcher.cpp" "src/common/pathTools.cpp" "src/common/regexTools.cpp" "src/common/otherTools.cpp" "src/common/networkTools.cpp" "src/common/stringTools.cpp" "src/xapianSearcher.cpp" "src/manager.cpp" "src/reader.cpp" ) for i in "${files[@]}" do echo $i clang-format -i -style=file $i done kiwix-lib-3.1.1/include/000077500000000000000000000000001340474775600150575ustar00rootroot00000000000000kiwix-lib-3.1.1/include/book.h000066400000000000000000000113171340474775600161650ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_BOOK_H #define KIWIX_BOOK_H #include namespace pugi { class xml_node; } namespace kiwix { enum supportedIndexType { UNKNOWN, XAPIAN }; class OPDSDumper; class Reader; /** * A class to store information about a book (a zim file) */ class Book { public: Book(); ~Book(); bool update(const Book& other); void update(const Reader& reader); void updateFromXml(const pugi::xml_node& node, const std::string& baseDir); void updateFromOpds(const pugi::xml_node& node, const std::string& urlHost); std::string getHumanReadableIdFromPath(); bool readOnly() const { return m_readOnly; } const std::string& getId() const { return m_id; } const std::string& getPath() const { return m_path; } bool isPathValid() const { return m_pathValid; } const std::string& getIndexPath() const { return m_indexPath; } const supportedIndexType& getIndexType() const { return m_indexType; } const std::string& getTitle() const { return m_title; } const std::string& getDescription() const { return m_description; } const std::string& getLanguage() const { return m_language; } const std::string& getCreator() const { return m_creator; } const std::string& getPublisher() const { return m_publisher; } const std::string& getDate() const { return m_date; } const std::string& getUrl() const { return m_url; } const std::string& getName() const { return m_name; } const std::string& getTags() const { return m_tags; } const std::string& getOrigId() const { return m_origId; } const uint64_t& getArticleCount() const { return m_articleCount; } const uint64_t& getMediaCount() const { return m_mediaCount; } const uint64_t& getSize() const { return m_size; } const std::string& getFavicon() const; const std::string& getFaviconUrl() const { return m_faviconUrl; } const std::string& getFaviconMimeType() const { return m_faviconMimeType; } const std::string& getDownloadId() const { return m_downloadId; } void setReadOnly(bool readOnly) { m_readOnly = readOnly; } void setId(const std::string& id) { m_id = id; } void setPath(const std::string& path); void setPathValid(bool valid) { m_pathValid = valid; } void setIndexPath(const std::string& indexPath); void setIndexType(supportedIndexType indexType) { m_indexType = indexType;} void setTitle(const std::string& title) { m_title = title; } void setDescription(const std::string& description) { m_description = description; } void setLanguage(const std::string& language) { m_language = language; } void setCreator(const std::string& creator) { m_creator = creator; } void setPublisher(const std::string& publisher) { m_publisher = publisher; } void setDate(const std::string& date) { m_date = date; } void setUrl(const std::string& url) { m_url = url; } void setName(const std::string& name) { m_name = name; } void setTags(const std::string& tags) { m_tags = tags; } void setOrigId(const std::string& origId) { m_origId = origId; } void setArticleCount(uint64_t articleCount) { m_articleCount = articleCount; } void setMediaCount(uint64_t mediaCount) { m_mediaCount = mediaCount; } void setSize(uint64_t size) { m_size = size; } void setFavicon(const std::string& favicon) { m_favicon = favicon; } void setFaviconMimeType(const std::string& faviconMimeType) { m_faviconMimeType = faviconMimeType; } void setDownloadId(const std::string& downloadId) { m_downloadId = downloadId; } protected: std::string m_id; std::string m_downloadId; std::string m_path; bool m_pathValid; std::string m_indexPath; supportedIndexType m_indexType; std::string m_title; std::string m_description; std::string m_language; std::string m_creator; std::string m_publisher; std::string m_date; std::string m_url; std::string m_name; std::string m_tags; std::string m_origId; uint64_t m_articleCount; uint64_t m_mediaCount; bool m_readOnly; uint64_t m_size; mutable std::string m_favicon; std::string m_faviconUrl; std::string m_faviconMimeType; }; } #endif kiwix-lib-3.1.1/include/bookmark.h000066400000000000000000000037341340474775600170440ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_BOOKMARK_H #define KIWIX_BOOKMARK_H #include namespace pugi { class xml_node; } namespace kiwix { /** * A class to store information about a bookmark (an article in a book) */ class Bookmark { public: Bookmark(); ~Bookmark(); void updateFromXml(const pugi::xml_node& node); const std::string& getBookId() const { return m_bookId; } const std::string& getBookTitle() const { return m_bookTitle; } const std::string& getUrl() const { return m_url; } const std::string& getTitle() const { return m_title; } const std::string& getLanguage() const { return m_language; } const std::string& getDate() const { return m_date; } void setBookId(const std::string& bookId) { m_bookId = bookId; } void setBookTitle(const std::string& bookTitle) { m_bookTitle = bookTitle; } void setUrl(const std::string& url) { m_url = url; } void setTitle(const std::string& title) { m_title = title; } void setLanguage(const std::string& language) { m_language = language; } void setDate(const std::string& date) { m_date = date; } protected: std::string m_bookId; std::string m_bookTitle; std::string m_url; std::string m_title; std::string m_language; std::string m_date; }; } #endif kiwix-lib-3.1.1/include/common.h000066400000000000000000000006641340474775600165260ustar00rootroot00000000000000 #ifndef _KIWIX_COMMON_H_ #define _KIWIX_COMMON_H_ #include #ifdef __GNUC__ #define DEPRECATED __attribute__((deprecated)) #elif defined(_MSC_VER) #define DEPRECATED __declspec(deprecated) #else #praga message("WARNING: You need to implement DEPRECATED for this compiler") #define DEPRECATED #endif namespace kiwix { typedef zim::size_type size_type; typedef zim::offset_type offset_type; } #endif //_KIWIX_COMMON_H_ kiwix-lib-3.1.1/include/common/000077500000000000000000000000001340474775600163475ustar00rootroot00000000000000kiwix-lib-3.1.1/include/common/base64.h000066400000000000000000000001741340474775600176060ustar00rootroot00000000000000#include std::string base64_encode(const std::string& inString); std::string base64_decode(const std::string& s); kiwix-lib-3.1.1/include/common/networkTools.h000066400000000000000000000020041340474775600212260ustar00rootroot00000000000000/* * Copyright 2012 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_NETWORKTOOLS_H #define KIWIX_NETWORKTOOLS_H #include #include namespace kiwix { std::map getNetworkInterfaces(); std::string getBestPublicIp(); std::string download(const std::string& url); } #endif kiwix-lib-3.1.1/include/common/otherTools.h000066400000000000000000000020771340474775600206700ustar00rootroot00000000000000/* * Copyright 2014 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_OTHERTOOLS_H #define KIWIX_OTHERTOOLS_H #ifdef _WIN32 #include #else #include #endif #include namespace kiwix { void sleep(unsigned int milliseconds); std::string nodeToString(pugi::xml_node node); std::string converta2toa3(const std::string& a2code); } #endif kiwix-lib-3.1.1/include/common/pathTools.h000066400000000000000000000040761340474775600205040ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_PATHTOOLS_H #define KIWIX_PATHTOOLS_H #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #endif #include "stringTools.h" using namespace std; bool isRelativePath(const string& path); string computeAbsolutePath(const string path, const string relativePath); string computeRelativePath(const string path, const string absolutePath); string removeLastPathElement(const string path, const bool removePreSeparator = false, const bool removePostSeparator = false); string appendToDirectory(const string& directoryPath, const string& filename); unsigned int getFileSize(const string& path); string getFileSizeAsString(const string& path); string getFileContent(const string& path); bool fileExists(const string& path); bool makeDirectory(const string& path); string makeTmpDirectory(); bool copyFile(const string& sourcePath, const string& destPath); string getLastPathElement(const string& path); string getExecutablePath(); string getCurrentDirectory(); string getDataDirectory(); bool writeTextFile(const string& path, const string& content); #endif kiwix-lib-3.1.1/include/common/regexTools.h000066400000000000000000000025011340474775600206510ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_REGEXTOOLS_H #define KIWIX_REGEXTOOLS_H #include #include #include #include bool matchRegex(const std::string& content, const std::string& regex); std::string replaceRegex(const std::string& content, const std::string& replacement, const std::string& regex); std::string appendToFirstOccurence(const std::string& content, const std::string regex, const std::string& replacement); #endif kiwix-lib-3.1.1/include/common/stringTools.h000066400000000000000000000047041340474775600210540ustar00rootroot00000000000000/* * Copyright 2011-2012 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_STRINGTOOLS_H #define KIWIX_STRINGTOOLS_H #include #include #include #include #include #include #include #include "pathTools.h" namespace kiwix { #ifndef __ANDROID__ std::string beautifyInteger(uint64_t number); std::string beautifyFileSize(uint64_t number); void printStringInHexadecimal(const char* s); void printStringInHexadecimal(icu::UnicodeString s); void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr); std::string encodeDiples(const std::string& str); #endif std::string removeAccents(const std::string& text); void loadICUExternalTables(); std::string urlEncode(const std::string& value, bool encodeReserved = false); std::string urlDecode(const std::string& value, bool component = false); std::vector split(const std::string&, const std::string&); std::vector split(const char*, const char*); std::vector split(const std::string&, const char*); std::vector split(const char*, const std::string&); std::string ucAll(const std::string& word); std::string lcAll(const std::string& word); std::string ucFirst(const std::string& word); std::string lcFirst(const std::string& word); std::string toTitle(const std::string& word); std::string normalize(const std::string& word); template std::string to_string(T value) { std::ostringstream oss; oss << value; return oss.str(); } template T extractFromString(const std::string& str) { std::istringstream iss(str); T ret; iss >> ret; return ret; } } //namespace kiwix #endif kiwix-lib-3.1.1/include/ctpp2/000077500000000000000000000000001340474775600161075ustar00rootroot00000000000000kiwix-lib-3.1.1/include/ctpp2/CTPP2VMStringLoader.hpp000066400000000000000000000036261340474775600222400ustar00rootroot00000000000000/* * Copyright 2013 Renaud Gaudin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef _CTPP2_VM_STRING_LOADER_HPP__ #define _CTPP2_VM_STRING_LOADER_HPP__ 1 #include #include #include #include #include #include #include #include #include #include #include #include #include /** @file VMStringLoader.hpp @brief Load program core from file */ namespace CTPP // C++ Template Engine { // FWD struct VMExecutable; /** @class VMStringLoader CTPP2VMStringLoader.hpp @brief Load program core from file */ class CTPP2DECL VMStringLoader: public VMLoader { public: /** */ VMStringLoader(CCHAR_P rawContent, size_t rawContentSize); /** @brief Get ready-to-run program */ const VMMemoryCore * GetCore() const; /** @brief A destructor */ ~VMStringLoader() throw(); private: /** Program core */ VMExecutable * oCore; /** Ready-to-run program */ VMMemoryCore * pVMMemoryCore; }; } // namespace CTPP #endif // _CTPP2_VM_STRING_LOADER_HPP__ // End. kiwix-lib-3.1.1/include/downloader.h000066400000000000000000000054471340474775600174000ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_DOWNLOADER_H #define KIWIX_DOWNLOADER_H #include #include #include #include #include namespace kiwix { class Aria2; struct DownloadedFile { DownloadedFile() : success(false) {} bool success; std::string path; }; class AriaError : public std::runtime_error { public: AriaError(const std::string& message) : std::runtime_error(message) {} }; class Download { public: typedef enum { K_ACTIVE, K_WAITING, K_PAUSED, K_ERROR, K_COMPLETE, K_REMOVED, K_UNKNOWN } StatusResult; Download() : m_status(K_UNKNOWN) {} Download(std::shared_ptr p_aria, std::string did) : mp_aria(p_aria), m_status(K_UNKNOWN), m_did(did) {}; void updateStatus(bool follow=false); StatusResult getStatus() { return m_status; } std::string getDid() { return m_did; } std::string getFollowedBy() { return m_followedBy; } uint64_t getTotalLength() { return m_totalLength; } uint64_t getCompletedLength() { return m_completedLength; } uint64_t getDownloadSpeed() { return m_downloadSpeed; } uint64_t getVerifiedLength() { return m_verifiedLength; } std::string getPath() { return m_path; } std::vector& getUris() { return m_uris; } protected: std::shared_ptr mp_aria; StatusResult m_status; std::string m_did = ""; std::string m_followedBy = ""; uint64_t m_totalLength; uint64_t m_completedLength; uint64_t m_downloadSpeed; uint64_t m_verifiedLength; std::vector m_uris; std::string m_path; }; /** * A tool to download things. * */ class Downloader { public: Downloader(); virtual ~Downloader(); void close(); Download* startDownload(const std::string& uri); Download* getDownload(const std::string& did); size_t getNbDownload() { return m_knownDownloads.size(); } std::vector getDownloadIds(); private: std::map> m_knownDownloads; std::shared_ptr mp_aria; }; } #endif kiwix-lib-3.1.1/include/entry.h000066400000000000000000000113521340474775600163730ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_ENTRY_H #define KIWIX_ENTRY_H #include #include #include #include #include "common.h" using namespace std; namespace kiwix { class NoEntry : public std::exception {}; /** * A entry represent an.. entry in a zim file. */ class Entry { public: /** * Default constructor. * * Construct an invalid entry. */ Entry() = default; /** * Construct an entry making reference to an zim article. * * @param article a zim::Article object */ Entry(zim::Article article); virtual ~Entry() = default; /** * Get the path of the entry. * * The path is the "key" of an entry. * * @return the path of the entry. */ std::string getPath() const; /** * Get the title of the entry. * * @return the title of the entry. */ std::string getTitle() const; /** * Get the content of the entry. * * The string is a copy of the content. * If you don't want to do a copy, use get_blob. * * @return the content of the entry. */ std::string getContent() const; /** * Get the blob of the entry. * * A blob make reference to the content without copying it. * * @param offset The starting offset of the blob. * @return the blob of the entry. */ zim::Blob getBlob(offset_type offset = 0) const; /** * Get the blob of the entry. * * A blob make reference to the content without copying it. * * @param offset The starting offset of the blob. * @param size The size of the blob. * @return the blob of the entry. */ zim::Blob getBlob(offset_type offset, size_type size) const; /** * Get the info for direct access to the content of the entry. * * Some entry (ie binary ones) have their content plain stored * in the zim file. Knowing the offset where the content is stored * an user can directly read the content in the zim file bypassing the * kiwix-lib/libzim. * * @return A pair specifying where to read the content. * The string is the real file to read (may be different that .zim * file if zim is cut). * The offset is the offset to read in the file. * Return <"",0> if is not possible to read directly. */ std::pair getDirectAccessInfo() const; /** * Get the size of the entry. * * @return the size of the entry. */ size_type getSize() const; /** * Get the mime_type of the entry. * * @return the mime_type of the entry. */ std::string getMimetype() const; /** * Get if the entry is a redirect entry. * * @return True if the entry is a redirect. */ bool isRedirect() const; /** * Get if the entry is a link target entry. * * @return True if the entry is a link target. */ bool isLinkTarget() const; /** * Get if the entry is a deleted entry. * * @return True if the entry is a deleted entry. */ bool isDeleted() const; /** * Get the entry pointed by this entry. * * @return the entry pointed. * @throw NoEntry if the entry is not a redirected entry. */ Entry getRedirectEntry() const; /** * Get the final entry pointed by this entry. * * Follow the redirection until a "not redirecting" entry is found. * If the entry is not a redirected entry, return the entry itself. * * @return the final entry. */ Entry getFinalEntry() const; /** * Convert the entry to a boolean value. * * @return True if the entry is valid. */ explicit operator bool() const { return good(); } private: zim::Article article; mutable zim::Article final_article; bool good() const { return article.good(); } }; } #endif // KIWIX_ENTRY_H kiwix-lib-3.1.1/include/kiwix.h000066400000000000000000000015001340474775600163570ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_H #define KIWIX_H #include "library.h" #endifkiwix-lib-3.1.1/include/library.h000066400000000000000000000132051340474775600166750ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_LIBRARY_H #define KIWIX_LIBRARY_H #include #include #include #include "book.h" #include "bookmark.h" #define KIWIX_LIBRARY_VERSION "20110515" namespace kiwix { class OPDSDumper; enum supportedListSortBy { UNSORTED, TITLE, SIZE, DATE, CREATOR, PUBLISHER }; enum supportedListMode { ALL = 0, LOCAL = 1, REMOTE = 1 << 1, NOLOCAL = 1 << 2, NOREMOTE = 1 << 3, VALID = 1 << 4, NOVALID = 1 << 5 }; /** * A Library store several books. */ class Library { std::map m_books; std::vector m_bookmarks; public: Library(); ~Library(); /** * Add a book to the library. * * If a book already exist in the library with the same id, update * the existing book instead of adding a new one. * * @param book The book to add. * @return True if the book has been added. * False if a book has been updated. */ bool addBook(const Book& book); /** * Add a bookmark to the library. * * @param bookmark the book to add. */ void addBookmark(const Bookmark& bookmark); /** * Remove a bookmarkk * * @param zimId The zimId of the bookmark. * @param url The url of the bookmark. * @return True if the bookmark has been removed. */ bool removeBookmark(const std::string& zimId, const std::string& url); Book& getBookById(const std::string& id); /** * Remove a book from the library. * * @param id the id of the book to remove. * @return True if the book were in the lirbrary and has been removed. */ bool removeBookById(const std::string& id); /** * Write the library to a file. * * @param path the path of the file to write to. * @return True if the library has been correctly saved. */ bool writeToFile(const std::string& path); /** * Write the library bookmarks to a file. * * @param path the path of the file to write to. * @return True if the library has been correctly saved. */ bool writeBookmarksToFile(const std::string& path); /** * Get the number of book in the library. * * @param localBooks If we must count local books (books with a path). * @param remoteBooks If we must count remote books (books with an url) * @return The number of books. */ unsigned int getBookCount(const bool localBooks, const bool remoteBooks); /** * Get all langagues of the books in the library. * * @return A list of languages. */ std::vector getBooksLanguages(); /** * Get all book creators of the books in the library. * * @return A list of book creators. */ std::vector getBooksCreators(); /** * Get all book publishers of the books in the library. * * @return A list of book publishers. */ std::vector getBooksPublishers(); /** * Get all bookmarks. * * @return A list of bookmarks */ const std::vector& getBookmarks() { return m_bookmarks; } /** * Get all book ids of the books in the library. * * @return A list of book ids. */ std::vector getBooksIds(); /** * Filter the library and generate a new one with the keep elements. * * This is equivalent to `listBookIds(ALL, UNSORTED, search)`. * * @param search List only books with search in the title or description. * @return The list of bookIds corresponding to the query. */ std::vector filter(const std::string& search); /** * List books in the library. * * @param mode The mode of listing : * - LOCAL  : list only local books (with a path). * - REMOTE : list only remote books (with an url). * - VALID  : list only valid books (without a path or with a * path pointing to a valid zim file). * - NOLOCAL : list only books without valid path. * - NOREMOTE : list only books without url. * - NOVALID : list only books not valid. * - ALL : Do not do any filter (LOCAL or REMOTE) * - Flags can be combined. * @param sortBy Attribute to sort by the book list. * @param search List only books with search in the title, description. * @param language List only books in this language. * @param creator List only books of this creator. * @param publisher List only books of this publisher. * @param maxSize Do not list book bigger than maxSize. * Set to 0 to cancel this filter. * @return The list of bookIds corresponding to the query. */ std::vector listBooksIds( int supportedListMode = ALL, supportedListSortBy sortBy = UNSORTED, const std::string& search = "", const std::string& language = "", const std::string& creator = "", const std::string& publisher = "", size_t maxSize = 0); friend class OPDSDumper; friend class libXMLDumper; }; } #endif kiwix-lib-3.1.1/include/libxml_dumper.h000066400000000000000000000037201340474775600200750ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_LIBXML_DUMPER_H #define KIWIX_LIBXML_DUMPER_H #include #include #include #include "library.h" namespace kiwix { /** * A tool to dump a `Library` into a basic library.xml * */ class LibXMLDumper { public: LibXMLDumper() = default; LibXMLDumper(Library* library); ~LibXMLDumper(); /** * Dump the library.xml * * @param id The id of the library. * @return The library.xml content. */ std::string dumpLibXMLContent(const std::vector& bookIds); /** * Dump the bookmark of the library. * * @return The bookmark.xml content. */ std::string dumpLibXMLBookmark(); /** * Set the base directory used. * * @param baseDir the base directory to use. */ void setBaseDir(const std::string& baseDir) { this->baseDir = baseDir; } /** * Set the library to dump. * * @param library The library to dump. */ void setLibrary(Library* library) { this->library = library; } protected: kiwix::Library* library; std::string baseDir; private: void handleBook(Book book, pugi::xml_node root_node); void handleBookmark(Bookmark bookmark, pugi::xml_node root_node); }; } #endif // KIWIX_OPDS_DUMPER_H kiwix-lib-3.1.1/include/manager.h000066400000000000000000000167211340474775600166510ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_MANAGER_H #define KIWIX_MANAGER_H #include "book.h" #include "library.h" #include "reader.h" #include #include namespace pugi { class xml_document; } namespace kiwix { class LibraryManipulator { public: virtual ~LibraryManipulator() {} virtual bool addBookToLibrary(Book book) = 0; virtual void addBookmarkToLibrary(Bookmark bookmark) = 0; }; class DefaultLibraryManipulator : public LibraryManipulator { public: DefaultLibraryManipulator(Library* library) : library(library) {} virtual ~DefaultLibraryManipulator() {} bool addBookToLibrary(Book book) { return library->addBook(book); } void addBookmarkToLibrary(Bookmark bookmark) { library->addBookmark(bookmark); } private: kiwix::Library* library; }; /** * A tool to manage a `Library`. * * A `Manager` handle a internal `Library`. * This `Library` can be retrived with `cloneLibrary` method. */ class Manager { public: Manager(LibraryManipulator* manipulator); Manager(Library* library); ~Manager(); /** * Read a `library.xml` and add book in the file to the library. * * @param path The path to the `library.xml`. * @param readOnly Set if the libray path could be overwritten latter with * updated content. * @return True if file has been properly parsed. */ bool readFile(const std::string& path, const bool readOnly = true); /** * Read a `library.xml` and add book in the file to the library. * * @param nativePath The path of the `library.xml` * @param UTF8Path The utf8 version (?) of the path. Also the path where the * library will be writen i readOnly is False. * @param readOnly Set if the libray path could be overwritten latter with * updated content. * @return True if file has been properly parsed. */ bool readFile(const std::string& nativePath, const std::string& UTF8Path, const bool readOnly = true); /** * Load a library content store in the string. * * @param xml The content corresponding of the library xml * @param readOnly Set if the libray path could be overwritten latter with * updated content. * @param libraryPath The library path (used to resolve relative path) * @return True if the content has been properly parsed. */ bool readXml(const std::string& xml, const bool readOnly = true, const std::string& libraryPath = ""); /** * Load a library content stored in a OPDS stream. * * @param content The content of the OPDS stream. * @param readOnly Set if the library path could be overwritten later with * updated content. * @param libraryPath The library path (used to resolve relative path) * @return True if the content has been properly parsed. */ bool readOpds(const std::string& content, const std::string& urlHost); /** * Load a bookmark file. * * @param path The path of the file to read. * @return True if the content has been properly parsed. */ bool readBookmarkFile(const std::string& path); /** * Add a book to the library. * * @param pathToOpen The path to the zim file to add. * @param pathToSave The path to store in the library in place of pathToOpen. * @param url The url of the book to store in the library. * @param checMetaData Tell if we check metadata before adding book to the * library. * @return The id of the book if the book has been added to the library. * Else, an empty string. */ std::string addBookFromPathAndGetId(const std::string& pathToOpen, const std::string& pathToSave = "", const std::string& url = "", const bool checkMetaData = false); /** * Add a book to the library. * * @param pathToOpen The path to the zim file to add. * @param pathToSave The path to store in the library in place of pathToOpen. * @param url The url of the book to store in the library. * @param checMetaData Tell if we check metadata before adding book to the * library. * @return True if the book has been added to the library. */ bool addBookFromPath(const std::string& pathToOpen, const std::string& pathToSave = "", const std::string& url = "", const bool checkMetaData = false); /** * Get the book corresponding to an id. * * @param[in] id The id of the book * @param[out] book The book corresponding to the id. * @return True if the book has been found. */ bool getBookById(const std::string& id, Book& book); /** * Update the "last open date" of a book * * @param id the id of the book. * @return True if the book is in the library. */ bool updateBookLastOpenDateById(const std::string& id); /** * Remove (set to empty) paths of all books in the library. */ void removeBookPaths(); /** * List books in the library. * * The books list will be available in public vector member `bookIdList`. * * @param mode The mode of listing : * - LASTOPEN sort by last opened book. * - LOCAL list only local file. * - REMOTE list only remote file. * @param sortBy Attribute to sort by the book list. * @param maxSize Do not list book bigger than maxSize MiB. * Set to 0 to cancel this filter. * @param language List only books in this language. * @param creator List only books of this creator. * @param publisher List only books of this publisher. * @param search List only books with search in the title, description or * language. * @return True */ bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize, const std::string& language, const std::string& creator, const std::string& publisher, const std::string& search); std::string writableLibraryPath; bool m_hasSearchResult = false; uint64_t m_totalBooks = 0; uint64_t m_startIndex = 0; uint64_t m_itemsPerPage = 0; protected: kiwix::LibraryManipulator* manipulator; bool mustDeleteManipulator; bool readBookFromPath(const std::string& path, Book* book); bool parseXmlDom(const pugi::xml_document& doc, const bool readOnly, const std::string& libraryPath); bool parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost); private: void checkAndCleanBookPaths(Book& book, const std::string& libraryPath); }; } #endif kiwix-lib-3.1.1/include/meson.build000066400000000000000000000011211340474775600172140ustar00rootroot00000000000000headers = [ 'book.h', 'bookmark.h', 'common.h', 'library.h', 'manager.h', 'libxml_dumper.h', 'opds_dumper.h', 'downloader.h', 'reader.h', 'entry.h', 'searcher.h' ] if xapian_dep.found() headers += ['xapianSearcher.h'] endif install_headers(headers, subdir:'kiwix') install_headers( 'common/base64.h', 'common/networkTools.h', 'common/otherTools.h', 'common/pathTools.h', 'common/regexTools.h', 'common/stringTools.h', subdir:'kiwix/common' ) if has_ctpp2_dep install_headers( 'ctpp2/CTPP2VMStringLoader.hpp', subdir:'kiwix/ctpp2' ) endif kiwix-lib-3.1.1/include/opds_dumper.h000066400000000000000000000056741340474775600175650ustar00rootroot00000000000000/* * Copyright 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_OPDS_DUMPER_H #define KIWIX_OPDS_DUMPER_H #include #include #include #include #include "common/base64.h" #include "common/pathTools.h" #include "common/regexTools.h" #include "library.h" #include "reader.h" using namespace std; namespace kiwix { /** * A tool to dump a `Library` into a opds stream. * */ class OPDSDumper { public: OPDSDumper() = default; OPDSDumper(Library* library); ~OPDSDumper(); /** * Dump the OPDS feed. * * @param id The id of the library. * @return The OPDS feed. */ std::string dumpOPDSFeed(const std::vector& bookIds); /** * Set the id of the opds stream. * * @param id the id to use. */ void setId(const std::string& id) { this->id = id;} /** * Set the title oft the opds stream. * * @param title the title to use. */ void setTitle(const std::string& title) { this->title = title; } /** * Set the root location used when generating url. * * @param rootLocation the root location to use. */ void setRootLocation(const std::string& rootLocation) { this->rootLocation = rootLocation; } /** * Set the search url. * * @param searchUrl the search url to use. */ void setSearchDescriptionUrl(const std::string& searchDescriptionUrl) { this->searchDescriptionUrl = searchDescriptionUrl; } /** * Set some informations about the search results. * * @param totalResult the total number of results of the search. * @param startIndex the start index of the result. * @param count the number of result of the current set (or page). */ void setOpenSearchInfo(int totalResult, int startIndex, int count); /** * Set the library to dump. * * @param library The library to dump. */ void setLibrary(Library* library) { this->library = library; } protected: kiwix::Library* library; std::string id; std::string title; std::string date; std::string rootLocation; std::string searchDescriptionUrl; int m_totalResults; int m_startIndex; int m_count; bool m_isSearchResult = false; private: pugi::xml_node handleBook(Book book, pugi::xml_node root_node); }; } #endif // KIWIX_OPDS_DUMPER_H kiwix-lib-3.1.1/include/reader.h000066400000000000000000000350261340474775600165000ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_READER_H #define KIWIX_READER_H #include #include #include #include #include #include #include #include #include #include "common.h" #include "entry.h" #include "common/pathTools.h" #include "common/stringTools.h" using namespace std; namespace kiwix { /** * The Reader class is the class who allow to get an entry content from a zim * file. */ class Reader { public: /** * Create a Reader to read a zim file specified by zimFilePath. * * @param zimFilePath The path to the zim file to read. * The zim file can be splitted (.zimaa, .zimab, ...). * In this case, the file path must still point to the * unsplitted path as if the file were not splitted * (.zim extesion). */ Reader(const string zimFilePath); ~Reader(); /** * Get the number of "displayable" entries in the zim file. * * @return If the zim file has a /M/Counter metadata, return the number of * entries with the 'text/html' MIMEtype specified in the metadata. * Else return the number of entries in the 'A' namespace. */ unsigned int getArticleCount() const; /** * Get the number of media in the zim file. * * @return If the zim file has a /M/Counter metadata, return the number of * entries with the 'image/jpeg', 'image/gif' and 'image/png' in * the metadata. * Else return the number of entries in the 'I' namespace. */ unsigned int getMediaCount() const; /** * Get the number of all entries in the zim file. * * @return Return the number of all the entries, whatever their MIMEtype or * their namespace. */ unsigned int getGlobalCount() const; /** * Get the path of the zim file. * * @return the path of the zim file as given in the constructor. */ string getZimFilePath() const; /** * Get the Id of the zim file. * * @return The uuid stored in the zim file. */ string getId() const; /** * Get the url of a random page. * * Deprecated : Use `getRandomPage` instead. * * @return Url of a random page. The page is picked from all entries in * the 'A' namespace. * The main page is excluded from the potential results. */ DEPRECATED string getRandomPageUrl() const; /** * Get a random page. * * @return A random Entry. The entry is picked from all entries in * the 'A' namespace. * The main entry is excluded from the potential results. */ Entry getRandomPage() const; /** * Get the url of the first page. * * Deprecated : Use `getFirstPage` instead. * * @return Url of the first entry in the 'A' namespace. */ DEPRECATED string getFirstPageUrl() const; /** * Get the entry of the first page. * * @return The first entry in the 'A' namespace. */ Entry getFirstPage() const; /** * Get the url of the main page. * * Deprecated : Use `getMainPage` instead. * * @return Url of the main page as specified in the zim file. */ DEPRECATED string getMainPageUrl() const; /** * Get the entry of the main page. * * @return Entry of the main page as specified in the zim file. */ Entry getMainPage() const; /** * Get the content of a metadata. * * @param[in] name The name of the metadata. * @param[out] value The value will be set to the content of the metadata. * @return True if it was possible to get the content of the metadata. */ bool getMetatag(const string& name, string& value) const; /** * Get the title of the zim file. * * @return The title of zim file as specified in the zim metadata. * If no title has been set, return a title computed from the * file path. */ string getTitle() const; /** * Get the description of the zim file. * * @return The description of the zim file as specified in the zim metadata. * If no description has been set, return the subtitle. */ string getDescription() const; /** * Get the language of the zim file. * * @return The language of the zim file as specified in the zim metadata. */ string getLanguage() const; /** * Get the name of the zim file. * * @return The name of the zim file as specified in the zim metadata. */ string getName() const; /** * Get the tags of the zim file. * * @return The tags of the zim file as specified in the zim metadata. */ string getTags() const; /** * Get the date of the zim file. * * @return The date of the zim file as specified in the zim metadata. */ string getDate() const; /** * Get the creator of the zim file. * * @return The creator of the zim file as specified in the zim metadata. */ string getCreator() const; /** * Get the publisher of the zim file. * * @return The publisher of the zim file as specified in the zim metadata. */ string getPublisher() const; /** * Get the origId of the zim file. * * The origId is only used in the case of patch zim file and is the Id * of the original zim file. * * @return The origId of the zim file as specified in the zim metadata. */ string getOrigId() const; /** * Get the favicon of the zim file. * * @param[out] content The content of the favicon. * @param[out] mimeType The mimeType of the favicon. * @return True if a favicon has been found. */ bool getFavicon(string& content, string& mimeType) const; /** * Get an entry associated to an path. * * @param path The path of the entry. * @return The entry. * @throw NoEntry If no entry correspond to the path. */ Entry getEntryFromPath(const std::string& path) const; /** * Get an entry associated to an url encoded path. * * Equivalent to `getEntryFromPath(urlDecode(path));` * * @param path The url encoded path. * @return The entry. * @throw NoEntry If no entry correspond to the path. */ Entry getEntryFromEncodedPath(const std::string& path) const; /** * Get un entry associated to a title. * * @param title The title. * @return The entry * throw NoEntry If no entry correspond to the url. */ Entry getEntryFromTitle(const std::string& title) const; /** * Get the url of a page specified by a title. * * @param[in] title the title of the page. * @param[out] url the url of the page. * @return True if the page can be found. */ DEPRECATED bool getPageUrlFromTitle(const string& title, string& url) const; /** * Get the mimetype of a entry specified by a url. * * @param[in] url the url of the entry. * @param[out] mimeType the mimeType of the entry. * @return True if the mimeType has been found. */ DEPRECATED bool getMimeTypeByUrl(const string& url, string& mimeType) const; /** * Get the content of an entry specifed by a url. * * Alias to `getContentByEncodedUrl` */ DEPRECATED bool getContentByUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const; /** * Get the content of an entry specified by a url encoded url. * * Equivalent to getContentByDecodedUrl(urlDecode(url), ...). */ DEPRECATED bool getContentByEncodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType, string& baseUrl) const; /** * Get the content of an entry specified by an url encoded url. * * Equivalent to getContentByEncodedUrl but without baseUrl. */ DEPRECATED bool getContentByEncodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const; /** * Get the content of an entry specified by a url. * * @param[in] url The url of the entry. * @param[out] content The content of the entry. * @param[out] title the title of the entry. * @param[out] contentLength The size of the entry (size of content). * @param[out] contentType The mimeType of the entry. * @param[out] baseUrl Return the true url of the entry. * If the specified entry is a redirection, contains * the url of the targeted entry. * @return True if the entry has been found. */ DEPRECATED bool getContentByDecodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType, string& baseUrl) const; /** * Get the content of an entry specified by a url. * * Equivalent to getContentByDecodedUrl but withou the baseUrl. */ DEPRECATED bool getContentByDecodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const; /** * Search for entries with title starting with prefix (case sensitive). * * Suggestions are stored in an internal vector and can be retrieved using * `getNextSuggestion` method. * * @param prefix The prefix to search. * @param suggestionsCount How many suggestions to search for. * @param reset If true, remove previous suggestions in the internal vector. * If false, add suggestions to the internal vector * (until internal vector size is suggestionCount (or no more * suggestion)) * @return True if some suggestions where added to the internal vector. */ bool searchSuggestions(const string& prefix, unsigned int suggestionsCount, const bool reset = true); /** * Search for entries for the given prefix. * * If the zim file has a internal fulltext index, the suggestions will be * searched using it. * Else the suggestions will be search using `searchSuggestions` while trying * to be smart about case sensitivity (using `getTitleVariants`). * * In any case, suggestions are stored in an internal vector and can be * retrieved using `getNextSuggestion` method. * The internal vector will be reset. * * @param prefix The prefix to search for. * @param suggestionsCount How many suggestions to search for. */ bool searchSuggestionsSmart(const string& prefix, unsigned int suggestionsCount); /** * Check if the url exists in the zim file. * * Deprecated : Use `pathExists` instead. * * @param url the url to check. * @return True if the url exits in the zim file. */ DEPRECATED bool urlExists(const string& url) const; /** * Check if the path exists in the zim file. * * @param path the path to check. * @return True if the path exists in the zim file. */ bool pathExists(const string& path) const; /** * Check if the zim file has a embedded fulltext index. * * @return True if the zim file has a embedded fulltext index * and is not split (else the fulltext is not accessible). */ bool hasFulltextIndex() const; /** * Get potential case title variations for a title. * * @param title a title. * @return the list of variantions. */ std::vector getTitleVariants(const std::string& title) const; /** * Get the next suggestion title. * * @param[out] title the title of the suggestion. * @return True if title has been set. */ bool getNextSuggestion(string& title); /** * Get the next suggestion title and url. * * @param[out] title the title of the suggestion. * @param[out] url the url of the suggestion. * @return True if title and url have been set. */ bool getNextSuggestion(string& title, string& url); /** * Get if we can check zim file integrity (has a checksum). * * @return True if zim file have a checksum. */ bool canCheckIntegrity() const; /** * Check is zim file is corrupted. * * @return True if zim file is corrupted. */ bool isCorrupted() const; /** * Parse a full url into a namespace and url. * * @param[in] url The full url ("/N/url"). * @param[out] ns The namespace (N). * @param[out] title The url (url). * @return True */ DEPRECATED bool parseUrl(const string& url, char* ns, string& title) const; /** * Return the total size of the zim file. * * If zim file is split, return the sum of all parts' size. * * @return Size of the size file is KiB. */ unsigned int getFileSize() const; /** * Get the zim file handler. * * @return The libzim file handler. */ zim::File* getZimFileHandler() const; /** * Get the zim article object associated to a url. * * @param[in] url The url of the article. * @param[out] article The libzim article object. * @return True if the url is good (article.good()). */ DEPRECATED bool getArticleObjectByDecodedUrl(const string& url, zim::Article& article) const; protected: zim::File* zimFileHandler; zim::size_type firstArticleOffset; zim::size_type lastArticleOffset; zim::size_type nsACount; zim::size_type nsICount; std::string zimFilePath; std::vector> suggestions; std::vector>::iterator suggestionsOffset; private: std::map parseCounterMetadata() const; }; } #endif kiwix-lib-3.1.1/include/searcher.h000066400000000000000000000162301340474775600170260ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_SEARCHER_H #define KIWIX_SEARCHER_H #include #include #include #include #include #include #include #include #include #include "common/pathTools.h" #include "common/stringTools.h" #include "kiwix_config.h" using namespace std; namespace kiwix { class Reader; class Result { public: virtual ~Result(){}; virtual std::string get_url() = 0; virtual std::string get_title() = 0; virtual int get_score() = 0; virtual std::string get_snippet() = 0; virtual std::string get_content() = 0; virtual int get_wordCount() = 0; virtual int get_size() = 0; virtual int get_readerIndex() = 0; }; struct SearcherInternal; /** * The Searcher class is reponsible to do different kind of search using the * fulltext index. * * Historically, there are two kind of fulltext index : * - The legacy one, is the external fulltext index. A directory stored outside * of the zim file. * - The new one, a embedded fulltext index in the zim file. * * Legacy external fulltext index has to be considered as obsolet format with * less functionnalities: * - No multi zim search ; * - No geo_search ; * - No suggestions search ; * * To reflect this, there is two Search creation "API": * - One for the external fulltext index, using the constructor taking a * xapianDirectoryPath) ; * - One for the embedded fulltext index, using a "empty" constructor and the * `add_reader` method". * * On top of that, the Searcher may (if compiled with ctpp2) be used to * generate a html page for the search result. This use a template that need a * humanReaderName. This feature is only used by kiwix-serve and this should be * move outside of Searcher (and with a better API). If you don't use the html * rendering (getHtml method), you better should simply ignore the different * humanReadeableName attributes (or give an empty string). */ class Searcher { public: /** * The default constructor. * * @param humanReadableName The global zim's humanReadableName. * Used to generate pagination links. */ Searcher(const string& humanReadableName = ""); /** * The constructor for legacy external fulltext index. * * @param xapianDirectoryPath The path to the external index directory. * @param reader The reader associated to the external index. * It will be used retrive the article content or generate * the snippet. * @param humanReadableName The humanReadableName for the zim. */ Searcher(const string& xapianDirectoryPath, Reader* reader, const string& humanReadableName); ~Searcher(); /** * Add a reader (containing embedded fulltext index) to the search. * * @param reader The Reader for the zim containing the fulltext index. * @param humanReaderName The human readable name of the reader. * @return true if the reader has been added. * false if the reader cannot be added (no embedded fulltext index present) */ bool add_reader(Reader* reader, const std::string& humanReaderName); /** * Start a search on the zim associated to the Searcher. * * Search results should be retrived using the getNextResult method. * * @param search The search query. * @param resultStart the start offset of the search results (used for pagination). * @param resultEnd the end offset of the search results (used for pagination). * @param verbose print some info on stdout if true. */ void search(std::string& search, unsigned int resultStart, unsigned int resultEnd, const bool verbose = false); /** * Start a geographique search. * The search return result for entry in a disc of center latitude/longitude * and radius distance. * * Search results should be retrived using the getNextResult method. * * @param latitude The latitude of the center point. * @param longitude The longitude of the center point. * @param distance The radius of the disc. * @param resultStart the start offset of the search results (used for pagination). * @param resultEnd the end offset of the search results (used for pagination). * @param verbose print some info on stdout if true. */ void geo_search(float latitude, float longitude, float distance, unsigned int resultStart, unsigned int resultEnd, const bool verbose = false); /** * Start a suggestion search. * The search made depend of the "version" of the embedded index. * - If the index is newer enough and have a title namespace, the search is * made in the titles only. * - Else the search is made on the whole article content. * In any case, the search is made "partial" (as adding '*' at the end of the query) * * @param search The search query. * @param verbose print some info on stdout if true. */ void suggestions(std::string& search, const bool verbose = false); /** * Get the next result of a started search. * This is the method to use to loop hover the search results. */ Result* getNextResult(); /** * Restart the previous search. * Next call to getNextResult will return the first result. */ void restart_search(); /** * Get a estimation of the result count. */ unsigned int getEstimatedResultCount(); /** * Set protocol prefix. * Only used by getHtml. */ bool setProtocolPrefix(const std::string prefix); /** * Set search protocol prefix. * Only used by getHtml. */ bool setSearchProtocolPrefix(const std::string prefix); #ifdef ENABLE_CTPP2 /** * Generate the html page with the resutls of the search. */ string getHtml(); #endif protected: std::string beautifyInteger(const unsigned int number); void closeIndex(); void searchInIndex(string& search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose = false); std::vector readers; std::vector humanReaderNames; SearcherInternal* internal; std::string searchPattern; std::string protocolPrefix; std::string searchProtocolPrefix; unsigned int resultCountPerPage; unsigned int estimatedResultCount; unsigned int resultStart; unsigned int resultEnd; std::string contentHumanReadableId; private: void reset(); }; } #endif kiwix-lib-3.1.1/include/xapianSearcher.h000066400000000000000000000047341340474775600201750ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_XAPIAN_SEARCHER_H #define KIWIX_XAPIAN_SEARCHER_H #include #include "reader.h" #include "searcher.h" #include #include using namespace std; namespace kiwix { class XapianSearcher; class XapianResult : public Result { public: XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator); virtual ~XapianResult(){}; virtual std::string get_url(); virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); virtual int get_readerIndex() { return 0; }; private: XapianSearcher* searcher; Xapian::MSetIterator iterator; Xapian::Document document; }; class NoXapianIndexInZim : public exception { virtual const char* what() const throw() { return "There is no fulltext index in the zim file"; } }; class XapianSearcher { friend class XapianResult; public: XapianSearcher(const string& xapianDirectoryPath, Reader* reader); virtual ~XapianSearcher(){}; void searchInIndex(string& search, const unsigned int resultStart, const unsigned int resultEnd, const bool verbose = false); virtual Result* getNextResult(); void restart_search(); Xapian::MSet results; protected: void closeIndex(); void openIndex(const string& xapianDirectoryPath); void setup_queryParser(); Reader* reader; Xapian::Database readableDatabase; std::string language; std::string stopwords; Xapian::QueryParser queryParser; Xapian::Stem stemmer; Xapian::SimpleStopper stopper; Xapian::MSetIterator current_result; std::map valuesmap; }; } #endif kiwix-lib-3.1.1/kiwix.pc.in000066400000000000000000000004451340474775600155230ustar00rootroot00000000000000prefix=@prefix@ libdir=${prefix}/lib64 includedir=${prefix}/include Name: libkiwix Description: A library that contains a lot of things used by used by other kiwix programs Version: @version@ Requires: @requires@ Libs: -L${libdir} -lkiwix @extra_libs@ Cflags: -I${includedir}/ @extra_cflags@ kiwix-lib-3.1.1/meson.build000066400000000000000000000104111340474775600155730ustar00rootroot00000000000000project('kiwix-lib', 'cpp', version : '3.1.1', license : 'GPL', default_options : ['c_std=c11', 'cpp_std=c++11', 'werror=true']) compiler = meson.get_compiler('cpp') find_library_in_compiler = meson.version().version_compare('>=0.31.0') static_deps = get_option('android') or get_option('default_library') == 'static' if get_option('android') extra_libs = ['-llog'] else extra_libs = [] endif thread_dep = dependency('threads') libicu_dep = dependency('icu-i18n', static:static_deps) libzim_dep = dependency('libzim', version : '>=4.0.0', static:static_deps) pugixml_dep = dependency('pugixml', static:static_deps) libcurl_dep = dependency('libcurl', static:static_deps) extra_cflags = '' if target_machine.system() == 'windows' and static_deps add_project_arguments('-DCURL_STATICLIB', language : 'cpp') extra_cflags += '-DCURL_STATICLIB' endif ctpp2_include_path = '' has_ctpp2_dep = false ctpp2_prefix_install = get_option('ctpp2-install-prefix') ctpp2_link_args = [] if ctpp2_prefix_install == '' if compiler.has_header('ctpp2/CTPP2Logger.hpp') if find_library_in_compiler ctpp2_lib = compiler.find_library('ctpp2') else ctpp2_lib = find_library('ctpp2') endif ctpp2_link_args = ['-lctpp2'] if meson.is_cross_build() and host_machine.system() == 'windows' if find_library_in_compiler iconv_lib = compiler.find_library('iconv', required:false) else iconv_lib = find_library('iconv', required:false) endif if iconv_lib.found() ctpp2_link_args += ['-liconv'] endif endif has_ctpp2_dep = true ctpp2_dep = declare_dependency(link_args:ctpp2_link_args) else message('ctpp2/CTPP2Logger.hpp not found. Compiling without CTPP2 support') endif else if not find_library_in_compiler error('For custom ctpp2_prefix_install you need a meson version >=0.31.0') endif ctpp2_include_path = ctpp2_prefix_install + '/include' ctpp2_include_args = ['-I'+ctpp2_include_path] if compiler.has_header('ctpp2/CTPP2Logger.hpp', args:ctpp2_include_args) ctpp2_include_dir = include_directories(ctpp2_include_path, is_system:true) ctpp2_lib_path = join_paths(ctpp2_prefix_install, get_option('libdir')) message(ctpp2_lib_path) ctpp2_lib = compiler.find_library('ctpp2', dirs:ctpp2_lib_path, required:false) if not ctpp2_lib.found() ctpp2_lib_path = join_paths(ctpp2_prefix_install, 'lib') message(ctpp2_lib_path) ctpp2_lib = compiler.find_library('ctpp2', dirs:ctpp2_lib_path) endif ctpp2_link_args = ['-L'+ctpp2_lib_path, '-lctpp2'] if meson.is_cross_build() and host_machine.system() == 'windows' iconv_lib = compiler.find_library('iconv', required:false) if iconv_lib.found() ctpp2_link_args += ['-liconv'] endif endif has_ctpp2_dep = true ctpp2_dep = declare_dependency(include_directories:ctpp2_include_dir, link_args:ctpp2_link_args) else message('ctpp2/CTPP2Logger.hpp not found. Compiling without CTPP2 support') endif endif xapian_dep = dependency('xapian-core', required:false, static:static_deps) all_deps = [thread_dep, libicu_dep, libzim_dep, xapian_dep, pugixml_dep, libcurl_dep] if has_ctpp2_dep all_deps += [ctpp2_dep] endif inc = include_directories('include') conf = configuration_data() conf.set('VERSION', '"@0@"'.format(meson.project_version())) conf.set('ENABLE_CTPP2', has_ctpp2_dep) if build_machine.system() == 'windows' extra_link_args = ['-lshlwapi', '-lwinmm'] else extra_link_args = [] endif subdir('include') subdir('scripts') subdir('static') subdir('src') subdir('test') pkg_requires = ['libzim', 'icu-i18n', 'pugixml', 'libcurl'] if xapian_dep.found() pkg_requires += ['xapian-core'] endif if has_ctpp2_dep extra_libs += ctpp2_link_args if ctpp2_include_path != '' extra_cflags = ' -I'+ctpp2_include_path endif endif pkg_conf = configuration_data() pkg_conf.set('prefix', get_option('prefix')) pkg_conf.set('requires', ' '.join(pkg_requires)) pkg_conf.set('extra_libs', ' '.join(extra_libs)) pkg_conf.set('extra_cflags', extra_cflags) pkg_conf.set('version', meson.project_version()) configure_file(output : 'kiwix.pc', configuration : pkg_conf, input : 'kiwix.pc.in', install_dir: get_option('libdir')+'/pkgconfig' ) kiwix-lib-3.1.1/meson_options.txt000066400000000000000000000003421340474775600170700ustar00rootroot00000000000000option('ctpp2-install-prefix', type : 'string', value : '', description : 'Prefix where ctpp libs has been installed') option('android', type : 'boolean', value : false, description : 'Do we make a kiwix-lib for android') kiwix-lib-3.1.1/scripts/000077500000000000000000000000001340474775600151235ustar00rootroot00000000000000kiwix-lib-3.1.1/scripts/ctpp2c.sh000077500000000000000000000001201340474775600166460ustar00rootroot00000000000000#!/usr/bin/env bash ctpp2c=$1 SOURCE=$(pwd)/$2 DEST=$3 $ctpp2c $SOURCE $DEST kiwix-lib-3.1.1/scripts/kiwix-compile-resources000077500000000000000000000142431340474775600216460ustar00rootroot00000000000000#!/usr/bin/env python3 ''' Copyright 2016 Matthieu Gautier This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ''' import argparse import os.path import re def full_identifier(filename): parts = os.path.normpath(filename).split(os.sep) parts = [to_identifier(part) for part in parts] print(filename, parts) return parts def to_identifier(name): ident = re.sub(r'[^0-9a-zA-Z]', '_', name) if ident[0].isnumeric(): return "_"+ident return ident resource_impl_template = """ static const unsigned char {data_identifier}[] = {{ {resource_content} }}; namespace RESOURCE {{ {namespaces_open} const std::string {identifier} = init_resource("{env_identifier}", {data_identifier}, {resource_len}); {namespaces_close} }} """ resource_getter_template = """ if (name == "{common_name}") return RESOURCE::{identifier}; """ resource_decl_template = """{namespaces_open} extern const std::string {identifier}; {namespaces_close}""" class Resource: def __init__(self, base_dirs, filename): filename = filename.strip() self.filename = filename self.identifier = full_identifier(filename) found = False for base_dir in base_dirs: try: with open(os.path.join(base_dir, filename), 'rb') as f: self.data = f.read() found = True break except FileNotFoundError: continue if not found: raise Exception("Impossible to found {}".format(filename)) def dump_impl(self): nb_row = len(self.data)//16 + (1 if len(self.data) % 16 else 0) sliced = (self.data[i*16:(i+1)*16] for i in range(nb_row)) return resource_impl_template.format( data_identifier="_".join([""]+self.identifier), resource_content=",\n ".join(", ".join("{:#04x}".format(i) for i in r) for r in sliced), resource_len=len(self.data), namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]), namespaces_close=" ".join(["}"]*(len(self.identifier)-1)), identifier=self.identifier[-1], env_identifier="RES_"+"_".join(self.identifier)+"_PATH" ) def dump_getter(self): return resource_getter_template.format( common_name=self.filename, identifier="::".join(self.identifier) ) def dump_decl(self): return resource_decl_template.format( namespaces_open=" ".join("namespace {} {{".format(id) for id in self.identifier[:-1]), namespaces_close=" ".join(["}"]*(len(self.identifier)-1)), identifier=self.identifier[-1] ) master_c_template = """//This file is automaically generated. Do not modify it. #include #include #include "{include_file}" static std::string init_resource(const char* name, const unsigned char* content, int len) {{ char * resPath = getenv(name); if (NULL == resPath) return std::string(reinterpret_cast(content), len); std::ifstream ifs(resPath); if (!ifs.good()) return std::string(reinterpret_cast(content), len); return std::string( (std::istreambuf_iterator(ifs)), (std::istreambuf_iterator() )); }} const std::string& getResource_{basename}(const std::string& name) {{ {RESOURCES_GETTER} throw ResourceNotFound("Resource not found."); }} {RESOURCES} """ def gen_c_file(resources, basename): return master_c_template.format( RESOURCES="\n\n".join(r.dump_impl() for r in resources), RESOURCES_GETTER="\n\n".join(r.dump_getter() for r in resources), include_file=basename, basename=to_identifier(basename) ) master_h_template = """//This file is automaically generated. Do not modify it. #ifndef KIWIX_{BASENAME} #define KIWIX_{BASENAME} #include #include namespace RESOURCE {{ {RESOURCES} }}; class ResourceNotFound : public std::runtime_error {{ public: ResourceNotFound(const std::string& what_arg): std::runtime_error(what_arg) {{ }}; }}; const std::string& getResource_{basename}(const std::string& name); #define getResource(a) (getResource_{basename}(a)) #endif // KIWIX_{BASENAME} """ def gen_h_file(resources, basename): return master_h_template.format( RESOURCES="\n ".join(r.dump_decl() for r in resources), BASENAME=basename.upper(), basename=basename, ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--cxxfile', help='The Cpp file name to generate') parser.add_argument('--hfile', help='The h file name to generate') parser.add_argument('--source_dir', help="Additional directory where to look for resources.", action='append') parser.add_argument('resource_file', help='The list of resources to compile.') args = parser.parse_args() base_dir = os.path.dirname(os.path.realpath(args.resource_file)) source_dir = args.source_dir or [] with open(args.resource_file, 'r') as f: resources = [Resource([base_dir]+source_dir, filename) for filename in f.readlines()] h_identifier = to_identifier(os.path.basename(args.hfile)) with open(args.hfile, 'w') as f: f.write(gen_h_file(resources, h_identifier)) with open(args.cxxfile, 'w') as f: f.write(gen_c_file(resources, os.path.basename(args.hfile))) kiwix-lib-3.1.1/scripts/meson.build000066400000000000000000000002551340474775600172670ustar00rootroot00000000000000 res_compiler = find_program('kiwix-compile-resources') intermediate_ctpp2c = find_program('ctpp2c.sh') install_data(res_compiler.path(), install_dir:get_option('bindir')) kiwix-lib-3.1.1/src/000077500000000000000000000000001340474775600142235ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/000077500000000000000000000000001340474775600156435ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/AndroidManifest.xml000066400000000000000000000003711340474775600214350ustar00rootroot00000000000000 kiwix-lib-3.1.1/src/android/kiwix.cpp000066400000000000000000000025271340474775600175100ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include "org_kiwix_kiwixlib_JNIKiwix.h" #include #include #include "unicode/putil.h" #include "utils.h" pthread_mutex_t globalLock = PTHREAD_RECURSIVE_MUTEX_INITIALIZER; JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory( JNIEnv* env, jobject obj, jstring dirStr) { std::string cPath = jni2c(dirStr, env); Lock l; try { u_setDataDirectory(cPath.c_str()); } catch (...) { std::cerr << "Unable to set data directory " << cPath << std::endl; } } kiwix-lib-3.1.1/src/android/kiwixreader.cpp000066400000000000000000000313561340474775600206750ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include #include #include "org_kiwix_kiwixlib_JNIKiwixReader.h" #include "common/base64.h" #include "reader.h" #include "utils.h" /* Kiwix Reader JNI functions */ JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader( JNIEnv* env, jobject obj, jstring filename) { std::string cPath = jni2c(filename, env); __android_log_print(ANDROID_LOG_INFO, "kiwix", "Attempting to create reader with: %s", cPath.c_str()); Lock l; try { kiwix::Reader* reader = new kiwix::Reader(cPath); return reinterpret_cast(new Handle(reader)); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_WARN, "kiwix", "Error opening ZIM file"); __android_log_print(ANDROID_LOG_WARN, "kiwix", e.what()); return 0; } } JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj) { Handle::dispose(env, obj); } #define READER (Handle::getHandle(env, obj)) /* Kiwix library functions */ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMainPage(JNIEnv* env, jobject obj) { jstring url; try { std::string cUrl = READER->getMainPage().getPath(); url = c2jni(cUrl, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM main page"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); url = NULL; } return url; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getId(JNIEnv* env, jobject obj) { jstring id; try { std::string cId = READER->getId(); id = c2jni(cId, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM id"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); id = NULL; } return id; } JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getFileSize(JNIEnv* env, jobject obj) { jint size; try { int cSize = READER->getFileSize(); size = c2jni(cSize); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM file size"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); } return size; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getCreator(JNIEnv* env, jobject obj) { jstring creator; try { std::string cCreator = READER->getCreator(); creator = c2jni(cCreator, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM creator"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); creator = NULL; } return creator; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getPublisher(JNIEnv* env, jobject obj) { jstring publisher; try { std::string cPublisher = READER->getPublisher(); publisher = c2jni(cPublisher, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM publish"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); publisher = NULL; } return publisher; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getName(JNIEnv* env, jobject obj) { jstring name; try { std::string cName = READER->getName(); name = c2jni(cName, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM name"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); name = NULL; } return name; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getFavicon(JNIEnv* env, jobject obj) { jstring favicon; try { std::string cContent; std::string cMime; READER->getFavicon(cContent, cMime); favicon = c2jni( base64_encode(cContent), env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM favicon"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); favicon = NULL; } return favicon; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getDate(JNIEnv* env, jobject obj) { jstring date; try { std::string cDate = READER->getDate(); date = c2jni(cDate, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM date"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); date = NULL; } return date; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getLanguage(JNIEnv* env, jobject obj) { jstring language; try { std::string cLanguage = READER->getLanguage(); language = c2jni(cLanguage, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get ZIM language"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); language = NULL; } return language; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getMimeType( JNIEnv* env, jobject obj, jstring url) { jstring mimeType; std::string cUrl = jni2c(url, env); try { auto entry = READER->getEntryFromEncodedPath(cUrl); auto cMimeType = entry.getMimetype(); mimeType = c2jni(cMimeType, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get mime-type for url: %s", cUrl.c_str()); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); mimeType = NULL; } return mimeType; } JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContent( JNIEnv* env, jobject obj, jstring url, jobject titleObj, jobject mimeTypeObj, jobject sizeObj) { /* Default values */ setStringObjValue("", titleObj, env); setStringObjValue("", mimeTypeObj, env); setIntObjValue(0, sizeObj, env); jbyteArray data = env->NewByteArray(0); /* Retrieve the content */ std::string cUrl = jni2c(url, env); unsigned int cSize = 0; try { auto entry = READER->getEntryFromEncodedPath(cUrl); entry = entry.getFinalEntry(); cSize = entry.getSize(); setIntObjValue(cSize, sizeObj, env); data = env->NewByteArray(cSize); env->SetByteArrayRegion( data, 0, cSize, reinterpret_cast(entry.getBlob().data())); setStringObjValue(entry.getMimetype(), mimeTypeObj, env); setStringObjValue(entry.getTitle(), titleObj, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get content for url: %s", cUrl.c_str()); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); } return data; } JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getContentPart( JNIEnv* env, jobject obj, jstring url, jint offset, jint len, jobject sizeObj) { jbyteArray data = env->NewByteArray(0); setIntObjValue(0, sizeObj, env); /* Default values */ /* Retrieve the content */ std::string cUrl = jni2c(url, env); unsigned int cOffset = jni2c(offset); unsigned int cLen = jni2c(len); try { auto entry = READER->getEntryFromEncodedPath(cUrl); entry = entry.getFinalEntry(); if (cLen == 0) { setIntObjValue(entry.getSize(), sizeObj, env); } else if (cOffset+cLen < entry.getSize()) { auto blob = entry.getBlob(cOffset, cLen); data = env->NewByteArray(cLen); env->SetByteArrayRegion( data, 0, cLen, reinterpret_cast(blob.data())); setIntObjValue(cLen, sizeObj, env); } } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get partial content for url: %s (%u : %u)", cUrl.c_str(), cOffset, cLen); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); } return data; } JNIEXPORT jobject JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation( JNIEnv* env, jobject obj, jstring url) { jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair"); jmethodID midPairinit = env->GetMethodID(classPair, "", "()V"); jobject pair = env->NewObject(classPair, midPairinit); setPairObjValue("", 0, pair, env); std::string cUrl = jni2c(url, env); try { auto entry = READER->getEntryFromEncodedPath(cUrl); entry = entry.getFinalEntry(); auto part_info = entry.getDirectAccessInfo(); setPairObjValue(part_info.first, part_info.second, pair, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get direct access info for url: %s", cUrl.c_str()); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); } return pair; } JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_searchSuggestions(JNIEnv* env, jobject obj, jstring prefix, jint count) { jboolean retVal = JNI_FALSE; std::string cPrefix = jni2c(prefix, env); unsigned int cCount = jni2c(count); try { if (READER->searchSuggestionsSmart(cPrefix, cCount)) { retVal = JNI_TRUE; } } catch (std::exception& e) { __android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get search results for pattern: %s", cPrefix.c_str()); __android_log_print(ANDROID_LOG_WARN, "kiwix", e.what()); } return retVal; } JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNextSuggestion(JNIEnv* env, jobject obj, jobject titleObj) { jboolean retVal = JNI_FALSE; std::string cTitle; try { if (READER->getNextSuggestion(cTitle)) { setStringObjValue(cTitle, titleObj, env); retVal = JNI_TRUE; } } catch (std::exception& e) { __android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get next suggestion"); __android_log_print(ANDROID_LOG_WARN, "kiwix", e.what()); } return retVal; } JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getPageUrlFromTitle(JNIEnv* env, jobject obj, jstring title, jobject urlObj) { std::string cTitle = jni2c(title, env); try { auto entry = READER->getEntryFromTitle(cTitle); entry = entry.getFinalEntry(); setStringObjValue(entry.getPath(), urlObj, env); return JNI_TRUE; } catch (std::exception& e) { __android_log_print(ANDROID_LOG_WARN, "kiwix", "Unable to get url for title %s: ", cTitle.c_str()); __android_log_print(ANDROID_LOG_WARN, "kiwix", e.what()); } return JNI_FALSE; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getTitle( JNIEnv* env, jobject obj) { jstring title; try { std::string cTitle = READER->getTitle(); title = c2jni(cTitle, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get zim title"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); title = NULL; } return title; } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getDescription(JNIEnv* env, jobject obj) { jstring description; try { std::string cDescription = READER->getDescription(); description = c2jni(cDescription, env); } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get zim description"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); description = NULL; } return description; } JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getRandomPage( JNIEnv* env, jobject obj, jobject urlObj) { jboolean retVal = JNI_FALSE; std::string cUrl; try { std::string cUrl = READER->getRandomPage().getPath(); setStringObjValue(cUrl, urlObj, env); retVal = JNI_TRUE; } catch (std::exception& e) { __android_log_print(ANDROID_LOG_ERROR, "kiwix", "Unable to get random page"); __android_log_print(ANDROID_LOG_ERROR, "kiwix", e.what()); } return retVal; } kiwix-lib-3.1.1/src/android/kiwixsearcher.cpp000066400000000000000000000075761340474775600212360ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include "org_kiwix_kiwixlib_JNIKiwixSearcher.h" #include "org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h" #include "reader.h" #include "searcher.h" #include "utils.h" #define SEARCHER (Handle::getHandle(env, obj)) #define RESULT (Handle::getHandle(env, obj)) JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_dispose(JNIEnv* env, jobject obj) { Handle::dispose(env, obj); } /* Kiwix Reader JNI functions */ JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNativeHandle(JNIEnv* env, jobject obj) { kiwix::Searcher* searcher = new kiwix::Searcher(); return reinterpret_cast(new Handle(searcher)); } /* Kiwix library functions */ JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_addReader( JNIEnv* env, jobject obj, jobject reader) { auto searcher = SEARCHER; searcher->add_reader(*(Handle::getHandle(env, reader)), ""); } JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_search( JNIEnv* env, jobject obj, jstring query, jint count) { std::string cquery = jni2c(query, env); unsigned int ccount = jni2c(count); SEARCHER->search(cquery, 0, ccount); } JNIEXPORT jobject JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_getNextResult(JNIEnv* env, jobject obj) { jobject result = nullptr; kiwix::Result* cresult = SEARCHER->getNextResult(); if (cresult != nullptr) { jclass resultclass = env->FindClass("org/kiwix/kiwixlib/JNIKiwixSearcher$Result"); jmethodID ctor = env->GetMethodID( resultclass, "", "(Lorg/kiwix/kiwixlib/JNIKiwixSearcher;JLorg/kiwix/kiwixlib/JNIKiwixSearcher;)V"); result = env->NewObject(resultclass, ctor, obj, reinterpret_cast(new Handle(cresult)), obj); } return result; } JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_dispose( JNIEnv* env, jobject obj) { Handle::dispose(env, obj); } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getUrl(JNIEnv* env, jobject obj) { try { return c2jni(RESULT->get_url(), env); } catch (...) { return nullptr; } } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getTitle(JNIEnv* env, jobject obj) { try { return c2jni(RESULT->get_title(), env); } catch (...) { return nullptr; } } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getSnippet(JNIEnv* env, jobject obj) { return c2jni(RESULT->get_snippet(), env); } JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwixSearcher_00024Result_getContent(JNIEnv* env, jobject obj) { return c2jni(RESULT->get_content(), env); } kiwix-lib-3.1.1/src/android/meson.build000066400000000000000000000017461340474775600200150ustar00rootroot00000000000000 kiwix_jni = custom_target('jni', input: ['org/kiwix/kiwixlib/JNIKiwix.java', 'org/kiwix/kiwixlib/JNIKiwixReader.java', 'org/kiwix/kiwixlib/JNIKiwixSearcher.java', 'org/kiwix/kiwixlib/JNIKiwixInt.java', 'org/kiwix/kiwixlib/JNIKiwixString.java', 'org/kiwix/kiwixlib/JNIKiwixBool.java', 'org/kiwix/kiwixlib/JNIKiwixException.java', 'org/kiwix/kiwixlib/Pair.java'], output: ['org_kiwix_kiwixlib_JNIKiwix.h', 'org_kiwix_kiwixlib_JNIKiwixReader.h', 'org_kiwix_kiwixlib_JNIKiwixSearcher.h', 'org_kiwix_kiwixlib_JNIKiwixSearcher_Result.h'], command:['javac', '-d', '@OUTDIR@', '-h', '@OUTDIR@', '@INPUT@'] ) kiwix_sources += [ 'android/kiwix.cpp', 'android/kiwixreader.cpp', 'android/kiwixsearcher.cpp', kiwix_jni] install_subdir('org', install_dir: 'kiwix-lib/java') install_subdir('res', install_dir: 'kiwix-lib') install_data('AndroidManifest.xml', install_dir: 'kiwix-lib') kiwix-lib-3.1.1/src/android/org/000077500000000000000000000000001340474775600164325ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/org/kiwix/000077500000000000000000000000001340474775600175655ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/000077500000000000000000000000001340474775600214075ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwix.java000066400000000000000000000020651340474775600237110ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; import org.kiwix.kiwixlib.JNIKiwixReader; import org.kiwix.kiwixlib.JNIKiwixString; public class JNIKiwix { static { System.loadLibrary("kiwix"); } public native void setDataDirectory(String icuDataDir); } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixBool.java000066400000000000000000000015321340474775600245230ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; public class JNIKiwixBool { public boolean value; } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixException.java000066400000000000000000000016401340474775600255660ustar00rootroot00000000000000/* * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; public class JNIKiwixException extends Exception { public JNIKiwixException(String message) { super(message); } } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixInt.java000066400000000000000000000015251340474775600243640ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; public class JNIKiwixInt { public int value; } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixReader.java000066400000000000000000000076251340474775600250430ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; import org.kiwix.kiwixlib.JNIKiwixException; import org.kiwix.kiwixlib.JNIKiwixString; import org.kiwix.kiwixlib.JNIKiwixInt; import org.kiwix.kiwixlib.JNIKiwixSearcher; import org.kiwix.kiwixlib.Pair; public class JNIKiwixReader { public native String getMainPage(); public native String getTitle(); public native String getId(); public native String getLanguage(); public native String getMimeType(String url); public native byte[] getContent(String url, JNIKiwixString title, JNIKiwixString mimeType, JNIKiwixInt size); /** * getContentPart. * * Get only a part of the content of the article. * Return a byte array of `len` size starting from offset `offset`. * Set `size` to the number of bytes read * (`len` if everything is ok, 0 in case of error). * If `len` == 0, no bytes are read but `size` is set to the total size of the * article. */ public native byte[] getContentPart(String url, int offest, int len, JNIKiwixInt size); /** * getDirectAccessInformation. * * Return information giving where the content is located in the zim file. * * Some contents (binary content) are stored uncompressed in the zim file. * Knowing this information, it could be interesting to directly open * the zim file (or zim part) and directly read the content from it (and so * bypassing the libzim). * * Return a `Pair` (filename, offset) where the content is located. * * If the content cannot be directly accessed (content is compressed or zim * file is cut in the middle of the content), the filename is an empty string * and offset is zero. */ public native Pair getDirectAccessInformation(String url); public native boolean searchSuggestions(String prefix, int count); public native boolean getNextSuggestion(JNIKiwixString title); public native boolean getPageUrlFromTitle(String title, JNIKiwixString url); public native String getDescription(); public native String getDate(); public native String getFavicon(); public native String getCreator(); public native String getPublisher(); public native String getName(); public native int getFileSize(); public native int getArticleCount(); public native int getMediaCount(); public native boolean getRandomPage(JNIKiwixString url); public JNIKiwixSearcher search(String query, int count) { JNIKiwixSearcher searcher = new JNIKiwixSearcher(); searcher.addKiwixReader(this); searcher.search(query, count); return searcher; } public JNIKiwixReader(String filename) throws JNIKiwixException { nativeHandle = getNativeReader(filename); if (nativeHandle == 0) { throw new JNIKiwixException("Cannot open zimfile "+filename); } } public JNIKiwixReader() { } public native void dispose(); private native long getNativeReader(String filename); private long nativeHandle; } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixSearcher.java000066400000000000000000000036151340474775600253700ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; import org.kiwix.kiwixlib.JNIKiwixReader; import java.util.Vector; public class JNIKiwixSearcher { public class Result { private long nativeHandle; private JNIKiwixSearcher searcher; public Result(long handle, JNIKiwixSearcher _searcher) { nativeHandle = handle; searcher = _searcher; } public native String getUrl(); public native String getTitle(); public native String getContent(); public native String getSnippet(); public native void dispose(); } public JNIKiwixSearcher() { nativeHandle = getNativeHandle(); usedReaders = new Vector(); } public native void dispose(); private native long getNativeHandle(); private long nativeHandle; private Vector usedReaders; public native void addReader(JNIKiwixReader reader); public void addKiwixReader(JNIKiwixReader reader) { addReader(reader); usedReaders.addElement(reader); }; public native void search(String query, int count); public native Result getNextResult(); public native boolean hasMoreResult(); } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/JNIKiwixString.java000066400000000000000000000015331340474775600250770ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; public class JNIKiwixString { public String value; } kiwix-lib-3.1.1/src/android/org/kiwix/kiwixlib/Pair.java000066400000000000000000000015521340474775600231500ustar00rootroot00000000000000/* * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ package org.kiwix.kiwixlib; public class Pair { public String filename; public int offset; } kiwix-lib-3.1.1/src/android/res/000077500000000000000000000000001340474775600164345ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/res/values/000077500000000000000000000000001340474775600177335ustar00rootroot00000000000000kiwix-lib-3.1.1/src/android/res/values/strings.xml000066400000000000000000000001061340474775600221430ustar00rootroot00000000000000 Kiwix Lib kiwix-lib-3.1.1/src/android/utils.h000066400000000000000000000104471340474775600171620ustar00rootroot00000000000000/* * Copyright (C) 2013 Emmanuel Engelhart * Copyright (C) 2017 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef _ANDROID_JNI_UTILS_H #define _ANDROID_JNI_UTILS_H #include #include #include extern pthread_mutex_t globalLock; inline jfieldID getHandleField(JNIEnv* env, jobject obj) { jclass c = env->GetObjectClass(obj); // J is the type signature for long: return env->GetFieldID(c, "nativeHandle", "J"); } class Lock { protected: pthread_mutex_t* lock; public: Lock() : lock(&globalLock) { pthread_mutex_lock(lock); } Lock(const Lock&) = delete; Lock& operator=(const Lock&) = delete; Lock(Lock&& other) : lock(&globalLock) { other.lock = nullptr; } virtual ~Lock() { if (lock) { pthread_mutex_unlock(lock); } } }; template class LockedHandle; template class Handle { protected: T* h; public: Handle(T* h) : h(h){}; // No destructor. This must and will be handled by dispose method. static LockedHandle getHandle(JNIEnv* env, jobject obj) { jlong handle = env->GetLongField(obj, getHandleField(env, obj)); return LockedHandle(reinterpret_cast*>(handle)); } static void dispose(JNIEnv* env, jobject obj) { auto lHandle = getHandle(env, obj); auto handle = lHandle.h; delete handle->h; delete handle; } friend class LockedHandle; }; template struct LockedHandle : public Lock { Handle* h; LockedHandle(Handle* h) : h(h) {} T* operator->() { return h->h; } T* operator*() { return h->h; } operator bool() const { return (h->h != nullptr); } }; /* c2jni type conversion functions */ inline jboolean c2jni(const bool& val) { return val ? JNI_TRUE : JNI_FALSE; } inline jstring c2jni(const std::string& val, JNIEnv* env) { return env->NewStringUTF(val.c_str()); } inline jint c2jni(const int val) { return (jint)val; } inline jint c2jni(const unsigned val) { return (unsigned)val; } /* jni2c type conversion functions */ inline bool jni2c(const jboolean& val) { return val == JNI_TRUE; } inline std::string jni2c(const jstring& val, JNIEnv* env) { const char* chars = env->GetStringUTFChars(val, 0); std::string ret(chars); env->ReleaseStringUTFChars(val, chars); return ret; } inline int jni2c(const jint val) { return (int)val; } /* Method to deal with variable passed by reference */ inline void setStringObjValue(const std::string& value, const jobject obj, JNIEnv* env) { jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;"); env->SetObjectField(obj, objFid, c2jni(value, env)); } inline void setIntObjValue(const int value, const jobject obj, JNIEnv* env) { jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "I"); env->SetIntField(obj, objFid, value); } inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env) { jclass objClass = env->GetObjectClass(obj); jfieldID objFid = env->GetFieldID(objClass, "value", "Z"); env->SetIntField(obj, objFid, c2jni(value)); } inline void setPairObjValue(const std::string& filename, const int offset, const jobject obj, JNIEnv* env) { jclass objClass = env->GetObjectClass(obj); jfieldID filenameFid = env->GetFieldID(objClass, "filename", "Ljava/lang/String;"); env->SetObjectField(obj, filenameFid, c2jni(filename, env)); jfieldID offsetFid = env->GetFieldID(objClass, "offset", "I"); env->SetIntField(obj, offsetFid, offset); } #endif // _ANDROID_JNI_UTILS_H kiwix-lib-3.1.1/src/aria2.cpp000066400000000000000000000137261340474775600157360ustar00rootroot00000000000000 #include "aria2.h" #include "xmlrpc.h" #include #include #include #include #include #include // For AriaError #ifdef _WIN32 # define ARIA2_CMD "aria2c.exe" #else # define ARIA2_CMD "aria2c" #endif namespace kiwix { Aria2::Aria2(): mp_aria(nullptr), m_port(42042), m_secret("kiwixariarpc"), mp_curl(nullptr), m_lock(PTHREAD_MUTEX_INITIALIZER) { m_downloadDir = getDataDirectory(); makeDirectory(m_downloadDir); std::vector callCmd; std::string rpc_port = "--rpc-listen-port=" + to_string(m_port); std::string download_dir = "--dir=" + getDataDirectory(); std::string session_file = appendToDirectory(getDataDirectory(), "kiwix.session"); std::string session = "--save-session=" + session_file; std::string inputFile = "--input-file=" + session_file; // std::string log_dir = "--log=\"" + logDir + "\""; #ifdef _WIN32 int pid = GetCurrentProcessId(); #else pid_t pid = getpid(); #endif std::string stop_with_pid = "--stop-with-process=" + to_string(pid); std::string rpc_secret = "--rpc-secret=" + m_secret; m_secret = "token:"+m_secret; std::string aria2cmd = appendToDirectory( removeLastPathElement(getExecutablePath(), true, true), ARIA2_CMD); if (fileExists(aria2cmd)) { // A local aria2c exe exists (packaged with kiwix-desktop), use it. callCmd.push_back(aria2cmd.c_str()); } else { // Try to use a potential installed aria2c. callCmd.push_back(ARIA2_CMD); } callCmd.push_back("--enable-rpc"); callCmd.push_back(rpc_secret.c_str()); callCmd.push_back(rpc_port.c_str()); callCmd.push_back(download_dir.c_str()); if (fileExists(session_file)) { callCmd.push_back(inputFile.c_str()); } callCmd.push_back(session.c_str()); // callCmd.push_back(log_dir.c_str()); callCmd.push_back("--auto-save-interval=10"); callCmd.push_back(stop_with_pid.c_str()); callCmd.push_back("--allow-overwrite=true"); callCmd.push_back("--dht-entry-point=router.bittorrent.com:6881"); callCmd.push_back("--dht-entry-point6=router.bittorrent.com:6881"); callCmd.push_back("--quiet=true"); callCmd.push_back("--bt-enable-lpd=true"); callCmd.push_back("--always-resume=true"); callCmd.push_back("--max-concurrent-downloads=42"); callCmd.push_back("--rpc-max-request-size=6M"); callCmd.push_back("--file-allocation=none"); mp_aria = Subprocess::run(callCmd); mp_curl = curl_easy_init(); curl_easy_setopt(mp_curl, CURLOPT_URL, "http://localhost/rpc"); curl_easy_setopt(mp_curl, CURLOPT_PORT, m_port); curl_easy_setopt(mp_curl, CURLOPT_POST, 1L); int watchdog = 50; while(--watchdog) { std::this_thread::sleep_for(std::chrono::microseconds(10000)); auto res = curl_easy_perform(mp_curl); if (res == CURLE_OK) { break; } } if (!watchdog) { curl_easy_cleanup(mp_curl); throw std::runtime_error("Cannot connect to aria2c rpc"); } } Aria2::~Aria2() { curl_easy_cleanup(mp_curl); } void Aria2::close() { saveSession(); shutdown(); } size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata) { auto str = static_cast(userdata); str->write(ptr, nmemb); return nmemb; } std::string Aria2::doRequest(const MethodCall& methodCall) { pthread_mutex_lock(&m_lock); auto requestContent = methodCall.toString(); std::stringstream stringstream; CURLcode res; curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDSIZE, requestContent.size()); curl_easy_setopt(mp_curl, CURLOPT_POSTFIELDS, requestContent.c_str()); curl_easy_setopt(mp_curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss); curl_easy_setopt(mp_curl, CURLOPT_WRITEDATA, &stringstream); res = curl_easy_perform(mp_curl); if (res == CURLE_OK) { long response_code; curl_easy_getinfo(mp_curl, CURLINFO_RESPONSE_CODE, &response_code); pthread_mutex_unlock(&m_lock); if (response_code != 200) { throw std::runtime_error("Invalid return code from aria"); } auto responseContent = stringstream.str(); MethodResponse response(responseContent); if (response.isFault()) { throw AriaError(response.getFault().getFaultString()); } return responseContent; } pthread_mutex_unlock(&m_lock); throw std::runtime_error("Cannot perform request"); } std::string Aria2::addUri(const std::vector& uris) { MethodCall methodCall("aria2.addUri", m_secret); auto uriParams = methodCall.newParamValue().getArray(); for (auto& uri : uris) { uriParams.addValue().set(uri); } auto ret = doRequest(methodCall); MethodResponse response(ret); return response.getParamValue(0).getAsS(); } std::string Aria2::tellStatus(const std::string& gid, const std::vector& statusKey) { MethodCall methodCall("aria2.tellStatus", m_secret); methodCall.newParamValue().set(gid); if (!statusKey.empty()) { auto statusArray = methodCall.newParamValue().getArray(); for (auto& key : statusKey) { statusArray.addValue().set(key); } } return doRequest(methodCall); } std::vector Aria2::tellActive() { MethodCall methodCall("aria2.tellActive", m_secret); auto statusArray = methodCall.newParamValue().getArray(); statusArray.addValue().set(std::string("gid")); statusArray.addValue().set(std::string("following")); auto responseContent = doRequest(methodCall); MethodResponse response(responseContent); std::vector activeGID; int index = 0; while(true) { try { auto structNode = response.getParamValue(0).getArray().getValue(index++).getStruct(); auto gidNode = structNode.getMember("gid"); activeGID.push_back(gidNode.getValue().getAsS()); } catch (InvalidRPCNode& e) { break; } } return activeGID; } void Aria2::saveSession() { MethodCall methodCall("aria2.saveSession", m_secret); doRequest(methodCall); std::cout << "session saved" << std::endl; } void Aria2::shutdown() { MethodCall methodCall("aria2.shutdown", m_secret); doRequest(methodCall); } } // end namespace kiwix kiwix-lib-3.1.1/src/aria2.h000066400000000000000000000016161340474775600153760ustar00rootroot00000000000000 #ifndef KIWIXLIB_ARIA2_H_ #define KIWIXLIB_ARIA2_H_ #ifdef _WIN32 // winsock2.h need to be included before windows.h (included by curl.h) # include #endif #include "subprocess.h" #include "xmlrpc.h" #include #include #include namespace kiwix { class Aria2 { private: std::unique_ptr mp_aria; int m_port; std::string m_secret; std::string m_downloadDir; CURL* mp_curl; pthread_mutex_t m_lock; std::string doRequest(const MethodCall& methodCall); public: Aria2(); virtual ~Aria2(); void close(); std::string addUri(const std::vector& uri); std::string tellStatus(const std::string& gid, const std::vector& statusKey); std::vector tellActive(); void saveSession(); void shutdown(); }; }; //end namespace kiwix #endif // KIWIXLIB_ARIA2_H_ kiwix-lib-3.1.1/src/book.cpp000066400000000000000000000126621340474775600156700ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "book.h" #include "reader.h" #include "common/base64.h" #include "common/regexTools.h" #include "common/networkTools.h" #include namespace kiwix { /* Constructor */ Book::Book() : m_readOnly(false) { } /* Destructor */ Book::~Book() { } bool Book::update(const kiwix::Book& other) { if (m_readOnly) return false; m_readOnly = other.m_readOnly; if (m_path.empty()) { m_path = other.m_path; } if (m_url.empty()) { m_url = other.m_url; } if (m_tags.empty()) { m_tags = other.m_tags; } if (m_name.empty()) { m_name = other.m_name; } if (m_indexPath.empty()) { m_indexPath = other.m_indexPath; m_indexType = other.m_indexType; } if (m_faviconMimeType.empty()) { m_favicon = other.m_favicon; m_faviconMimeType = other.m_faviconMimeType; } return true; } void Book::update(const kiwix::Reader& reader) { m_path = reader.getZimFilePath(); m_id = reader.getId(); m_description = reader.getDescription(); m_language = reader.getLanguage(); m_date = reader.getDate(); m_creator = reader.getCreator(); m_publisher = reader.getPublisher(); m_title = reader.getTitle(); m_name = reader.getName(); m_tags = reader.getTags(); m_origId = reader.getOrigId(); m_articleCount = reader.getArticleCount(); m_mediaCount = reader.getMediaCount(); m_size = static_cast(reader.getFileSize()) << 10; reader.getFavicon(m_favicon, m_faviconMimeType); } #define ATTR(name) node.attribute(name).value() void Book::updateFromXml(const pugi::xml_node& node, const std::string& baseDir) { m_id = ATTR("id"); std::string path = ATTR("path"); if (isRelativePath(path)) { path = computeAbsolutePath(baseDir, path); } m_path = path; path = ATTR("indexPath"); if (!path.empty()) { if (isRelativePath(path)) { path = computeAbsolutePath(baseDir, path); } m_indexPath = path; m_indexType = XAPIAN; } m_title = ATTR("title"); m_name = ATTR("name"); m_tags = ATTR("tags"); m_description = ATTR("description"); m_language = ATTR("language"); m_date = ATTR("date"); m_creator = ATTR("creator"); m_publisher = ATTR("publisher"); m_url = ATTR("url"); m_origId = ATTR("origId"); m_articleCount = strtoull(ATTR("articleCount"), 0, 0); m_mediaCount = strtoull(ATTR("mediaCount"), 0, 0); m_size = strtoull(ATTR("size"), 0, 0) << 10; m_favicon = base64_decode(ATTR("favicon")); m_faviconMimeType = ATTR("faviconMimeType"); try { m_downloadId = ATTR("downloadId"); } catch(...) {} } #undef ATTR static std::string fromOpdsDate(const std::string& date) { //The opds date use the standard --
T::Z //and we want --
. That's easy, let's take the first 10 char return date.substr(0, 10); } #define VALUE(name) node.child(name).child_value() void Book::updateFromOpds(const pugi::xml_node& node, const std::string& urlHost) { m_id = VALUE("id"); if (!m_id.compare(0, 9, "urn:uuid:")) { m_id.erase(0, 9); } m_title = VALUE("title"); m_description = VALUE("description"); m_language = VALUE("language"); m_date = fromOpdsDate(VALUE("updated")); m_creator = node.child("author").child("name").child_value(); for(auto linkNode = node.child("link"); linkNode; linkNode = linkNode.next_sibling("link")) { std::string rel = linkNode.attribute("rel").value(); if (rel == "http://opds-spec.org/acquisition/open-access") { m_url = linkNode.attribute("href").value(); m_size = strtoull(linkNode.attribute("length").value(), 0, 0); } if (rel == "http://opds-spec.org/image/thumbnail") { m_faviconUrl = urlHost + linkNode.attribute("href").value(); m_faviconMimeType = linkNode.attribute("type").value(); } } } #undef VALUE std::string Book::getHumanReadableIdFromPath() { std::string id = m_path; if (!id.empty()) { kiwix::removeAccents(id); #ifdef _WIN32 id = replaceRegex(id, "", "^.*\\\\"); #else id = replaceRegex(id, "", "^.*/"); #endif id = replaceRegex(id, "", "\\.zim[a-z]*$"); id = replaceRegex(id, "_", " "); id = replaceRegex(id, "plus", "\\+"); } return id; } void Book::setPath(const std::string& path) { m_path = isRelativePath(path) ? computeAbsolutePath(getCurrentDirectory(), path) : path; } void Book::setIndexPath(const std::string& indexPath) { m_indexPath = isRelativePath(indexPath) ? computeAbsolutePath(getCurrentDirectory(), indexPath) : indexPath; } const std::string& Book::getFavicon() const { if (m_favicon.empty() && !m_faviconUrl.empty()) { try { m_favicon = download(m_faviconUrl); } catch(...) { std::cerr << "Cannot download favicon from " << m_faviconUrl; } } return m_favicon; } } kiwix-lib-3.1.1/src/bookmark.cpp000066400000000000000000000024571340474775600165440ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "bookmark.h" #include namespace kiwix { /* Constructor */ Bookmark::Bookmark() { } /* Destructor */ Bookmark::~Bookmark() { } void Bookmark::updateFromXml(const pugi::xml_node& node) { auto bookNode = node.child("book"); m_bookId = bookNode.child("id").child_value(); m_bookTitle = bookNode.child("title").child_value(); m_language = bookNode.child("language").child_value(); m_date = bookNode.child("date").child_value(); m_title = node.child("title").child_value(); m_url = node.child("url").child_value(); } } kiwix-lib-3.1.1/src/common/000077500000000000000000000000001340474775600155135ustar00rootroot00000000000000kiwix-lib-3.1.1/src/common/base64.cpp000066400000000000000000000073751340474775600173170ustar00rootroot00000000000000/* base64.cpp and base64.h Copyright (C) 2004-2008 René Nyffenegger This source code is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this source code must not be misrepresented; you must not claim that you wrote the original source code. If you use this source code in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original source code. 3. This notice may not be removed or altered from any source distribution. René Nyffenegger rene.nyffenegger@adp-gmbh.ch */ #include #include static const std::string base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; static inline bool is_base64(unsigned char c) { return (isalnum(c) || (c == '+') || (c == '/')); } std::string base64_encode(const std::string& inString) { std::string ret; auto in_len = inString.size(); const unsigned char* bytes_to_encode = reinterpret_cast(inString.data()); int i = 0; int j = 0; unsigned char char_array_3[3]; unsigned char char_array_4[4]; while (in_len--) { char_array_3[i++] = *(bytes_to_encode++); if (i == 3) { char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for(i = 0; (i <4) ; i++) ret += base64_chars[char_array_4[i]]; i = 0; } } if (i) { for(j = i; j < 3; j++) char_array_3[j] = '\0'; char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for (j = 0; (j < i + 1); j++) ret += base64_chars[char_array_4[j]]; while((i++ < 3)) ret += '='; } return ret; } std::string base64_decode(std::string const& encoded_string) { int in_len = encoded_string.size(); int i = 0; int j = 0; int in_ = 0; unsigned char char_array_4[4], char_array_3[3]; std::string ret; while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { char_array_4[i++] = encoded_string[in_]; in_++; if (i ==4) { for (i = 0; i <4; i++) char_array_4[i] = base64_chars.find(char_array_4[i]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (i = 0; (i < 3); i++) ret += char_array_3[i]; i = 0; } } if (i) { for (j = i; j <4; j++) char_array_4[j] = 0; for (j = 0; j <4; j++) char_array_4[j] = base64_chars.find(char_array_4[j]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; } return ret; } kiwix-lib-3.1.1/src/common/networkTools.cpp000066400000000000000000000141771340474775600207430ustar00rootroot00000000000000/* * Copyright 2012 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #ifdef _WIN32 #include #include #else #include #include #include #include #include #include #include #include #include #endif #include #include #include std::map kiwix::getNetworkInterfaces() { std::map interfaces; #ifdef _WIN32 SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0); if (sd == (SOCKET)SOCKET_ERROR) { std::cerr << "Failed to get a socket. Error " << WSAGetLastError() << std::endl; return interfaces; } INTERFACE_INFO InterfaceList[20]; unsigned long nBytesReturned; if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList, sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) { std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() << std::endl; return interfaces; } int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO); for (int i = 0; i < nNumInterfaces; ++i) { sockaddr_in* pAddress; pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress); /* Add to the map */ std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr)); std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr)); interfaces.insert( std::pair(interfaceName, interfaceIp)); } #else /* Get Network interfaces information */ char buf[16384]; struct ifconf ifconf; int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */ ifconf.ifc_len = sizeof buf; ifconf.ifc_buf = buf; if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) { perror("ioctl(SIOCGIFCONF)"); exit(EXIT_FAILURE); } /* Go through each interface */ int i; size_t len; struct ifreq* ifreq; ifreq = ifconf.ifc_req; for (i = 0; i < ifconf.ifc_len;) { if (ifreq->ifr_addr.sa_family == AF_INET) { /* Get the network interface ip */ char host[128] = {0}; const int error = getnameinfo(&(ifreq->ifr_addr), sizeof ifreq->ifr_addr, host, sizeof host, 0, 0, NI_NUMERICHOST); if (!error) { std::string interfaceName = std::string(ifreq->ifr_name); std::string interfaceIp = std::string(host); /* Add to the map */ interfaces.insert( std::pair(interfaceName, interfaceIp)); } else { perror("getnameinfo()"); } } /* some systems have ifr_addr.sa_len and adjust the length that * way, but not mine. weird */ #ifndef __linux__ len = IFNAMSIZ + ifreq->ifr_addr.sa_len; #else len = sizeof *ifreq; #endif ifreq = (struct ifreq*)((char*)ifreq + len); i += len; } #endif return interfaces; } std::string kiwix::getBestPublicIp() { std::map interfaces = kiwix::getNetworkInterfaces(); #ifndef _WIN32 const char* const prioritizedNames[] = {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"}; const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]); for (int i = 0; i < count; ++i) { std::map::const_iterator it = interfaces.find(prioritizedNames[i]); if (it != interfaces.end()) { return it->second; } } #endif for (std::map::iterator iter = interfaces.begin(); iter != interfaces.end(); ++iter) { std::string interfaceIp = iter->second; if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") { return interfaceIp; } } for (std::map::iterator iter = interfaces.begin(); iter != interfaces.end(); ++iter) { std::string interfaceIp = iter->second; if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") { return interfaceIp; } } for (std::map::iterator iter = interfaces.begin(); iter != interfaces.end(); ++iter) { std::string interfaceIp = iter->second; if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") { return interfaceIp; } } return "127.0.0.1"; } size_t write_callback_to_iss(char* ptr, size_t size, size_t nmemb, void* userdata) { auto str = static_cast(userdata); str->write(ptr, nmemb); return nmemb; } std::string kiwix::download(const std::string& url) { auto curl = curl_easy_init(); std::stringstream ss; curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); curl_easy_setopt(curl, CURLOPT_HTTPGET, 1L); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &write_callback_to_iss); curl_easy_setopt(curl, CURLOPT_WRITEDATA, &ss); auto res = curl_easy_perform(curl); if (res != CURLE_OK) { curl_easy_cleanup(curl); throw std::runtime_error("Cannot perform request"); } long response_code; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code); curl_easy_cleanup(curl); if (response_code != 200) { throw std::runtime_error("Invalid return code from server"); } return ss.str(); } kiwix-lib-3.1.1/src/common/otherTools.cpp000066400000000000000000000141431340474775600203640ustar00rootroot00000000000000/* * Copyright 2014 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include static std::map codeisomapping { //a { "ad", "and" }, { "ae", "are" }, { "af", "afg" }, { "ag", "atg" }, { "ai", "aia" }, { "al", "alb" }, { "am", "arm" }, { "an", "ant" }, { "ao", "ago" }, { "aq", "ata" }, { "ar", "arg" }, { "as", "asm" }, { "at", "aut" }, { "au", "aus" }, { "aw", "abw" }, { "ax", "ala" }, { "az", "aze" }, //b { "ba", "bih" }, { "bb", "brb" }, { "bd", "bgd" }, { "be", "bel" }, { "bf", "bfa" }, { "bg", "bgr" }, { "bh", "bhr" }, { "bi", "bdi" }, { "bj", "ben" }, { "bl", "blm" }, { "bn", "brn" }, { "bm", "bmu" }, { "bo", "bol" }, { "br", "bra" }, { "bs", "bhs" }, { "bt", "btn" }, { "bv", "bvt" }, { "bw", "bwa" }, { "by", "blr" }, { "bz", "blz" }, //c { "ca", "can" }, { "cc", "cck" }, { "cd", "cod" }, { "cf", "caf" }, { "cg", "cog" }, { "ch", "che" }, { "ci", "civ" }, { "ck", "cok" }, { "cl", "chl" }, { "cm", "cmr" }, { "cn", "chn" }, { "co", "col" }, { "cr", "cri" }, { "cu", "cub" }, { "cv", "cpv" }, { "cx", "cxr" }, { "cy", "cyp" }, { "cz", "cze" }, //d { "de", "deu" }, { "dj", "dji" }, { "dk", "dnk" }, { "dm", "dma" }, { "do", "dom" }, { "dz", "dza" }, //e { "ec", "ecu" }, { "ee", "est" }, { "eg", "egy" }, { "eh", "esh" }, { "en", "eng" }, { "er", "eri" }, { "es", "esp" }, { "et", "eth" }, //f { "fi", "fin" }, { "fj", "fji" }, { "fk", "flk" }, { "fm", "fsm" }, { "fo", "fro" }, { "fr", "fra" }, //g { "ga", "gab" }, { "gb", "gbr" }, { "gd", "grd" }, { "ge", "geo" }, { "gf", "guf" }, { "gg", "ggy" }, { "gh", "gha" }, { "gi", "gib" }, { "gl", "grl" }, { "gm", "gmb" }, { "gn", "gin" }, { "gp", "glp" }, { "gq", "gnq" }, { "gr", "grc" }, { "gs", "sgs" }, { "gt", "gtm" }, { "gu", "gum" }, { "gw", "gnb" }, { "gy", "guy" }, //h { "hk", "hkg" }, { "hm", "hmd" }, { "hn", "hnd" }, { "hr", "hrv" }, { "ht", "hti" }, { "hu", "hun" }, //i { "id", "idn" }, { "ie", "irl" }, { "il", "isr" }, { "im", "imn" }, { "in", "ind" }, { "io", "iot" }, { "iq", "irq" }, { "ir", "irn" }, { "is", "isl" }, { "it", "ita" }, //j { "je", "jey" }, { "jm", "jam" }, { "jo", "jor" }, { "jp", "jpn" }, //k { "ke", "ken" }, { "kg", "kgz" }, { "kh", "khm" }, { "ki", "kir" }, { "km", "com" }, { "kn", "kna" }, { "kp", "prk" }, { "kr", "kor" }, { "kw", "kwt" }, { "ky", "cym" }, { "kz", "kaz" }, //l { "la", "lao" }, { "lb", "lbn" }, { "lc", "lca" }, { "li", "lie" }, { "lk", "lka" }, { "lr", "lbr" }, { "ls", "lso" }, { "lt", "ltu" }, { "lu", "lux" }, { "lv", "lva" }, { "ly", "lby" }, //m { "ma", "mar" }, { "mc", "mco" }, { "md", "mda" }, { "me", "mne" }, { "mf", "maf" }, { "mg", "mdg" }, { "mh", "mhl" }, { "mk", "mkd" }, { "ml", "mli" }, { "mm", "mmr" }, { "mn", "mng" }, { "mo", "mac" }, { "mp", "mnp" }, { "mq", "mtq" }, { "mr", "mrt" }, { "ms", "msr" }, { "mt", "mlt" }, { "mu", "mus" }, { "mv", "mdv" }, { "mw", "mwi" }, { "mx", "mex" }, { "my", "mys" }, { "mz", "moz" }, //n { "na", "nam" }, { "nc", "ncl" }, { "ne", "ner" }, { "nf", "nfk" }, { "ng", "nga" }, { "ni", "nic" }, { "nl", "nld" }, { "no", "nor" }, { "np", "npl" }, { "nr", "nru" }, { "nu", "niu" }, { "nz", "nzl" }, //o { "om", "omn" }, //p { "pa", "pan" }, { "pe", "per" }, { "pf", "pyf" }, { "pg", "png" }, { "ph", "phl" }, { "pk", "pak" }, { "pl", "pol" }, { "pm", "spm" }, { "pn", "pcn" }, { "pr", "pri" }, { "ps", "pse" }, { "pt", "prt" }, { "pw", "plw" }, { "py", "pry" }, //q { "qa", "qat" }, //r { "re", "reu" }, { "ro", "rou" }, { "rs", "srb" }, { "ru", "rus" }, { "rw", "rwa" }, //s { "sa", "sau" }, { "sb", "slb" }, { "sc", "syc" }, { "sd", "sdn" }, { "se", "swe" }, { "sg", "sgp" }, { "sh", "shn" }, { "si", "svn" }, { "sj", "sjm" }, { "sk", "svk" }, { "sl", "sle" }, { "sm", "smr" }, { "sn", "sen" }, { "so", "som" }, { "sr", "sur" }, { "ss", "ssd" }, { "st", "stp" }, { "sv", "slv" }, { "sy", "syr" }, { "sz", "swz" }, //t { "tc", "tca" }, { "td", "tcd" }, { "tf", "atf" }, { "tg", "tgo" }, { "th", "tha" }, { "tj", "tjk" }, { "tk", "tkl" }, { "tl", "tls" }, { "tm", "tkm" }, { "tn", "tun" }, { "to", "ton" }, { "tr", "tur" }, { "tt", "tto" }, { "tv", "tuv" }, { "tw", "twn" }, { "tz", "tza" }, //u { "ua", "ukr" }, { "ug", "uga" }, { "um", "umi" }, { "us", "usa" }, { "uy", "ury" }, { "uz", "uzb" }, //v { "va", "vat" }, { "vc", "vct" }, { "ve", "ven" }, { "vg", "vgb" }, { "vi", "vir" }, { "vn", "vnm" }, { "vu", "vut" }, //w { "wf", "wlf" }, { "ws", "wsm" }, //y { "ye", "yem" }, { "yt", "myt" }, // z { "za", "zaf" }, { "zm", "zmb" }, { "zw", "zwe" } }; void kiwix::sleep(unsigned int milliseconds) { #ifdef _WIN32 Sleep(milliseconds); #else usleep(1000 * milliseconds); #endif } struct XmlStringWriter: pugi::xml_writer { std::string result; virtual void write(const void* data, size_t size){ result.append(static_cast(data), size); } }; std::string kiwix::nodeToString(pugi::xml_node node) { XmlStringWriter writer; node.print(writer, " "); return writer.result; } std::string kiwix::converta2toa3(const std::string& a2code){ return codeisomapping.at(a2code); } kiwix-lib-3.1.1/src/common/pathTools.cpp000066400000000000000000000202021340474775600201700ustar00rootroot00000000000000/* * Copyright 2011-2014 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #ifdef __APPLE__ #include #include #elif _WIN32 #include #include #include "shlwapi.h" #define getcwd _getcwd // stupid MSFT "deprecation" warning #endif #ifdef _WIN32 #else #include #endif #ifdef _WIN32 const std::string SEPARATOR("\\"); #else const std::string SEPARATOR("/"); #include #endif #include #ifndef PATH_MAX #define PATH_MAX 1024 #endif bool isRelativePath(const string& path) { #ifdef _WIN32 return path.empty() || path.substr(1, 2) == ":\\" ? false : true; #else return path.empty() || path.substr(0, 1) == "/" ? false : true; #endif } string computeRelativePath(const string path, const string absolutePath) { std::vector pathParts = kiwix::split(path, SEPARATOR); std::vector absolutePathParts = kiwix::split(absolutePath, SEPARATOR); unsigned int commonCount = 0; while (commonCount < pathParts.size() && commonCount < absolutePathParts.size() && pathParts[commonCount] == absolutePathParts[commonCount]) { commonCount++; } string relativePath; #ifdef _WIN32 /* On Windows you have a token more because the root is represented by a letter */ if (commonCount == 0) { relativePath = ".." + SEPARATOR; } #endif for (unsigned int i = commonCount; i < pathParts.size(); i++) { relativePath += ".." + SEPARATOR; } for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) { relativePath += absolutePathParts[i]; relativePath += i + 1 < absolutePathParts.size() ? SEPARATOR : ""; } return relativePath; } /* Warning: the relative path must be with slashes */ string computeAbsolutePath(const string path, const string relativePath) { string absolutePath; if (path.empty()) { char* path = NULL; size_t size = 0; #ifdef _WIN32 path = _getcwd(path, size); #else path = getcwd(path, size); #endif absolutePath = string(path) + SEPARATOR; } else { absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR ? path : path + SEPARATOR; } #if _WIN32 char* cRelativePath = _strdup(relativePath.c_str()); #else char* cRelativePath = strdup(relativePath.c_str()); #endif char* token = strtok(cRelativePath, "/"); while (token != NULL) { if (string(token) == "..") { absolutePath = removeLastPathElement(absolutePath, true, false); token = strtok(NULL, "/"); } else if (strcmp(token, ".") && strcmp(token, "")) { absolutePath += string(token); token = strtok(NULL, "/"); if (token != NULL) { absolutePath += SEPARATOR; } } else { token = strtok(NULL, "/"); } } return absolutePath; } string removeLastPathElement(const string path, const bool removePreSeparator, const bool removePostSeparator) { string newPath = path; size_t offset = newPath.find_last_of(SEPARATOR); if (removePreSeparator && #ifndef _WIN32 offset != newPath.find_first_of(SEPARATOR) && #endif offset == newPath.length() - 1) { newPath = newPath.substr(0, offset); offset = newPath.find_last_of(SEPARATOR); } newPath = removePostSeparator ? newPath.substr(0, offset) : newPath.substr(0, offset + 1); return newPath; } string appendToDirectory(const string& directoryPath, const string& filename) { string newPath = directoryPath + SEPARATOR + filename; return newPath; } string getLastPathElement(const string& path) { return path.substr(path.find_last_of(SEPARATOR) + 1); } unsigned int getFileSize(const string& path) { #ifdef _WIN32 struct _stat filestatus; _stat(path.c_str(), &filestatus); #else struct stat filestatus; stat(path.c_str(), &filestatus); #endif return filestatus.st_size / 1024; } string getFileSizeAsString(const string& path) { ostringstream convert; convert << getFileSize(path); return convert.str(); } string getFileContent(const string& path) { std::ifstream f(path, std::ios::in|std::ios::ate); std::string content; if (f.is_open()) { auto size = f.tellg(); content.reserve(size); f.seekg(0, std::ios::beg); content.assign((std::istreambuf_iterator(f)), std::istreambuf_iterator()); } return content; } bool fileExists(const string& path) { #ifdef _WIN32 return PathFileExists(path.c_str()); #else bool flag = false; fstream fin; fin.open(path.c_str(), ios::in); if (fin.is_open()) { flag = true; } fin.close(); return flag; #endif } bool makeDirectory(const string& path) { #ifdef _WIN32 int status = _mkdir(path.c_str()); #else int status = mkdir(path.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); #endif return status == 0; } string makeTmpDirectory() { #ifdef _WIN32 char cbase[MAX_PATH+1]; int base_len = GetTempPath(MAX_PATH+1, cbase); UUID uuid; UuidCreate(&uuid); char* dir_name; UuidToString(&uuid, reinterpret_cast(&dir_name)); string dir(cbase, base_len); dir += dir_name; _mkdir(dir.c_str()); RpcStringFree(reinterpret_cast(&dir_name)); #else string base = "/tmp"; auto _template = base + "/kiwix-lib_XXXXXX"; char* _template_array = new char[_template.size()+1]; memcpy(_template_array, _template.c_str(), _template.size()); string dir = mkdtemp(_template_array); delete[] _template_array; #endif return dir; } /* Try to create a link and if does not work then make a copy */ bool copyFile(const string& sourcePath, const string& destPath) { try { #ifndef _WIN32 if (link(sourcePath.c_str(), destPath.c_str()) != 0) { #endif std::ifstream infile(sourcePath.c_str(), std::ios_base::binary); std::ofstream outfile(destPath.c_str(), std::ios_base::binary); outfile << infile.rdbuf(); #ifndef _WIN32 } #endif } catch (exception& e) { cerr << e.what() << endl; return false; } return true; } string getExecutablePath() { char binRootPath[PATH_MAX]; #ifdef _WIN32 GetModuleFileName(NULL, binRootPath, PATH_MAX); return std::string(binRootPath); #elif __APPLE__ uint32_t max = (uint32_t)PATH_MAX; _NSGetExecutablePath(binRootPath, &max); return std::string(binRootPath); #else ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX); if (size != -1) { return std::string(binRootPath, size); } #endif return ""; } bool writeTextFile(const string& path, const string& content) { std::ofstream file; file.open(path.c_str()); file << content; file.close(); return true; } string getCurrentDirectory() { char* a_cwd = getcwd(NULL, 0); string s_cwd(a_cwd); free(a_cwd); return s_cwd; } string getDataDirectory() { #ifdef _WIN32 char* cDataDir = ::getenv("APPDATA"); #else char* cDataDir = ::getenv("KIWIX_DATA_DIR"); #endif std::string dataDir = cDataDir==nullptr ? "" : cDataDir; if (!dataDir.empty()) return dataDir; #ifdef _WIN32 cDataDir = ::getenv("USERPROFILE"); dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir; #else cDataDir = ::getenv("XDG_DATA_HOME"); dataDir = cDataDir==nullptr ? "" : cDataDir; if (dataDir.empty()) { cDataDir = ::getenv("HOME"); dataDir = cDataDir==nullptr ? getCurrentDirectory() : cDataDir; dataDir = appendToDirectory(dataDir, ".local"); dataDir = appendToDirectory(dataDir, "share"); } #endif return appendToDirectory(dataDir, "kiwix"); } kiwix-lib-3.1.1/src/common/regexTools.cpp000066400000000000000000000053141340474775600203550ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include std::map regexCache; icu::RegexMatcher* buildRegex(const std::string& regex) { icu::RegexMatcher* matcher; auto itr = regexCache.find(regex); /* Regex is in cache */ if (itr != regexCache.end()) { matcher = itr->second; } /* Regex needs to be parsed (and cached) */ else { UErrorCode status = U_ZERO_ERROR; icu::UnicodeString uregex(regex.c_str()); matcher = new icu::RegexMatcher(uregex, UREGEX_CASE_INSENSITIVE, status); regexCache[regex] = matcher; } return matcher; } /* todo */ void freeRegexCache() { } bool matchRegex(const std::string& content, const std::string& regex) { ucnv_setDefaultName("UTF-8"); icu::UnicodeString ucontent(content.c_str()); auto matcher = buildRegex(regex); matcher->reset(ucontent); return matcher->find(); } std::string replaceRegex(const std::string& content, const std::string& replacement, const std::string& regex) { ucnv_setDefaultName("UTF-8"); icu::UnicodeString ucontent(content.c_str()); icu::UnicodeString ureplacement(replacement.c_str()); auto matcher = buildRegex(regex); matcher->reset(ucontent); UErrorCode status = U_ZERO_ERROR; auto uresult = matcher->replaceAll(ureplacement, status); std::string tmp; uresult.toUTF8String(tmp); return tmp; } std::string appendToFirstOccurence(const std::string& content, const std::string regex, const std::string& replacement) { ucnv_setDefaultName("UTF-8"); icu::UnicodeString ucontent(content.c_str()); icu::UnicodeString ureplacement(replacement.c_str()); auto matcher = buildRegex(regex); matcher->reset(ucontent); if (matcher->find()) { UErrorCode status = U_ZERO_ERROR; ucontent.insert(matcher->end(status), ureplacement); std::string tmp; ucontent.toUTF8String(tmp); return tmp; } return content; } kiwix-lib-3.1.1/src/common/stringTools.cpp000066400000000000000000000212321340474775600205460ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include #include #include #include #include #include /* tell ICU where to find its dat file (tables) */ void kiwix::loadICUExternalTables() { #ifdef __APPLE__ std::string executablePath = getExecutablePath(); std::string executableDirectory = removeLastPathElement(executablePath); std::string datPath = computeAbsolutePath(executableDirectory, "icudt58l.dat"); try { u_setDataDirectory(datPath.c_str()); } catch (exception& e) { std::cerr << e.what() << std::endl; } #endif } std::string kiwix::removeAccents(const std::string& text) { loadICUExternalTables(); ucnv_setDefaultName("UTF-8"); UErrorCode status = U_ZERO_ERROR; auto removeAccentsTrans = icu::Transliterator::createInstance( "Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status); icu::UnicodeString ustring(text.c_str()); removeAccentsTrans->transliterate(ustring); delete removeAccentsTrans; std::string unaccentedText; ustring.toUTF8String(unaccentedText); return unaccentedText; } #ifndef __ANDROID__ /* Prepare integer for display */ std::string kiwix::beautifyInteger(uint64_t number) { std::stringstream numberStream; numberStream << number; std::string numberString = numberStream.str(); signed int offset = numberString.size() - 3; while (offset > 0) { numberString.insert(offset, ","); offset -= 3; } return numberString; } std::string kiwix::beautifyFileSize(uint64_t number) { std::stringstream ss; ss << std::fixed << std::setprecision(2); if (number>>30) ss << (number/(1024.0*1024*1024)) << " GB"; else if (number>>20) ss << (number/(1024.0*1024)) << " MB"; else if (number>>10) ss << (number/1024.0) << " KB"; else ss << number << " B"; return ss.str(); } void kiwix::printStringInHexadecimal(icu::UnicodeString s) { std::cout << std::showbase << std::hex; for (int i = 0; i < s.length(); i++) { char c = (char)((s.getTerminatedBuffer())[i]); if (c & 0x80) { std::cout << (c & 0xffff) << " "; } else { std::cout << c << " "; } } std::cout << std::endl; } void kiwix::printStringInHexadecimal(const char* s) { std::cout << std::showbase << std::hex; for (char const* pc = s; *pc; ++pc) { if (*pc & 0x80) { std::cout << (*pc & 0xffff); } else { std::cout << *pc; } std::cout << ' '; } std::cout << std::endl; } void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr) { size_t pos = 0; while ((pos = str.find(oldStr, pos)) != std::string::npos) { str.replace(pos, oldStr.length(), newStr); pos += newStr.length(); } } /* Encode string to avoid XSS attacks */ std::string kiwix::encodeDiples(const std::string& str) { std::string result = str; kiwix::stringReplacement(result, "<", "<"); kiwix::stringReplacement(result, ">", ">"); return result; } #endif /* urlEncode() based on javascript encodeURI() & encodeURIComponent(). Mostly code from rstudio/httpuv (GPLv3) */ bool isReservedUrlChar(char c) { switch (c) { case ';': case ',': case '/': case '?': case ':': case '@': case '&': case '=': case '+': case '$': return true; default: return false; } } bool needsEscape(char c, bool encodeReserved) { if (c >= 'a' && c <= 'z') return false; if (c >= 'A' && c <= 'Z') return false; if (c >= '0' && c <= '9') return false; if (isReservedUrlChar(c)) return encodeReserved; switch (c) { case '-': case '_': case '.': case '!': case '~': case '*': case '\'': case '(': case ')': return false; } return true; } int hexToInt(char c) { switch (c) { case '0': return 0; case '1': return 1; case '2': return 2; case '3': return 3; case '4': return 4; case '5': return 5; case '6': return 6; case '7': return 7; case '8': return 8; case '9': return 9; case 'A': case 'a': return 10; case 'B': case 'b': return 11; case 'C': case 'c': return 12; case 'D': case 'd': return 13; case 'E': case 'e': return 14; case 'F': case 'f': return 15; default: return -1; } } std::string kiwix::urlEncode(const std::string& value, bool encodeReserved) { std::ostringstream os; os << std::hex << std::uppercase; for (std::string::const_iterator it = value.begin(); it != value.end(); it++) { if (!needsEscape(*it, encodeReserved)) { os << *it; } else { os << '%' << std::setw(2) << static_cast(static_cast(*it)); } } return os.str(); } std::string kiwix::urlDecode(const std::string& value, bool component) { std::ostringstream os; for (std::string::const_iterator it = value.begin(); it != value.end(); it++) { // If there aren't enough characters left for this to be a // valid escape code, just use the character and move on if (it > value.end() - 3) { os << *it; continue; } if (*it == '%') { char hi = *(++it); char lo = *(++it); int iHi = hexToInt(hi); int iLo = hexToInt(lo); if (iHi < 0 || iLo < 0) { // Invalid escape sequence os << '%' << hi << lo; continue; } char c = (char)(iHi << 4 | iLo); if (!component && isReservedUrlChar(c)) { os << '%' << hi << lo; } else { os << c; } } else { os << *it; } } return os.str(); } /* Split string in a token array */ std::vector kiwix::split(const std::string& str, const std::string& delims = " *-") { std::string::size_type lastPos = str.find_first_not_of(delims, 0); std::string::size_type pos = str.find_first_of(delims, lastPos); std::vector tokens; while (std::string::npos != pos || std::string::npos != lastPos) { tokens.push_back(str.substr(lastPos, pos - lastPos)); lastPos = str.find_first_not_of(delims, pos); pos = str.find_first_of(delims, lastPos); } return tokens; } std::vector kiwix::split(const char* lhs, const char* rhs) { const std::string m1(lhs), m2(rhs); return split(m1, m2); } std::vector kiwix::split(const char* lhs, const std::string& rhs) { return split(lhs, rhs.c_str()); } std::vector kiwix::split(const std::string& lhs, const char* rhs) { return split(lhs.c_str(), rhs); } std::string kiwix::ucFirst(const std::string& word) { if (word.empty()) { return ""; } std::string result; icu::UnicodeString unicodeWord(word.c_str()); auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toUpper(); unicodeWord.replace(0, 1, unicodeFirstLetter); unicodeWord.toUTF8String(result); return result; } std::string kiwix::ucAll(const std::string& word) { if (word.empty()) { return ""; } std::string result; icu::UnicodeString unicodeWord(word.c_str()); unicodeWord.toUpper().toUTF8String(result); return result; } std::string kiwix::lcFirst(const std::string& word) { if (word.empty()) { return ""; } std::string result; icu::UnicodeString unicodeWord(word.c_str()); auto unicodeFirstLetter = icu::UnicodeString(unicodeWord, 0, 1).toLower(); unicodeWord.replace(0, 1, unicodeFirstLetter); unicodeWord.toUTF8String(result); return result; } std::string kiwix::lcAll(const std::string& word) { if (word.empty()) { return ""; } std::string result; icu::UnicodeString unicodeWord(word.c_str()); unicodeWord.toLower().toUTF8String(result); return result; } std::string kiwix::toTitle(const std::string& word) { if (word.empty()) { return ""; } std::string result; icu::UnicodeString unicodeWord(word.c_str()); unicodeWord = unicodeWord.toTitle(0); unicodeWord.toUTF8String(result); return result; } std::string kiwix::normalize(const std::string& word) { return kiwix::lcAll(word); } kiwix-lib-3.1.1/src/config.h.in000066400000000000000000000000611340474775600162430ustar00rootroot00000000000000 #mesondefine VERSION #mesondefine ENABLE_CTPP2 kiwix-lib-3.1.1/src/ctpp2/000077500000000000000000000000001340474775600152535ustar00rootroot00000000000000kiwix-lib-3.1.1/src/ctpp2/CTPP2VMStringLoader.cpp000066400000000000000000000142671340474775600214020ustar00rootroot00000000000000/* * Copyright 2013 Renaud Gaudin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include namespace CTPP // C++ Template Engine { // // Convert byte order // static void ConvertExecutable(VMExecutable * oCore) { // Code entry point oCore -> entry_point = Swap32(oCore -> entry_point); // Offset of code segment oCore -> code_offset = Swap32(oCore -> code_offset); // Code segment size oCore -> code_size = Swap32(oCore -> code_size); // Offset of static text segment oCore -> syscalls_offset = Swap32(oCore -> syscalls_offset); // Static text segment size oCore -> syscalls_data_size = Swap32(oCore -> syscalls_data_size); // Offset of static text index segment oCore -> syscalls_index_offset = Swap32(oCore -> syscalls_index_offset); // Static text index segment size oCore -> syscalls_index_size = Swap32(oCore -> syscalls_index_size); // Offset of static data segment oCore -> static_data_offset = Swap32(oCore -> static_data_offset); // Static data segment size oCore -> static_data_data_size = Swap32(oCore -> static_data_data_size); // Offset of static text segment oCore -> static_text_offset = Swap32(oCore -> static_text_offset); // Static text segment size oCore -> static_text_data_size = Swap32(oCore -> static_text_data_size); // Offset of static text index segment oCore -> static_text_index_offset = Swap32(oCore -> static_text_index_offset); // Static text index segment size oCore -> static_text_index_size = Swap32(oCore -> static_text_index_size); // Version 2.2+ // Offset of static data bit index oCore -> static_data_bit_index_offset = Swap32(oCore -> static_data_bit_index_offset); /// Offset of static data bit index oCore -> static_data_bit_index_size = Swap32(oCore -> static_data_bit_index_size); // Platform oCore -> platform = Swap64(oCore -> platform); // Ugly-jolly hack! // ... dereferencing type-punned pointer will break strict-aliasing rules ... UINT_64 iTMP; memcpy(&iTMP, &(oCore -> ieee754double), sizeof(UINT_64)); iTMP = Swap64(iTMP); memcpy(&(oCore -> ieee754double), &iTMP, sizeof(UINT_64)); // Cyclic Redundancy Check oCore -> crc = 0; // Convert data structures // Convert code segment VMInstruction * pInstructions = const_cast(VMExecutable::GetCodeSeg(oCore)); UINT_32 iI = 0; UINT_32 iSteps = oCore -> code_size / sizeof(VMInstruction); for(iI = 0; iI < iSteps; ++iI) { pInstructions -> instruction = Swap32(pInstructions -> instruction); pInstructions -> argument = Swap32(pInstructions -> argument); pInstructions -> reserved = Swap64(pInstructions -> reserved); ++pInstructions; } // Convert syscalls index TextDataIndex * pTextIndex = const_cast(VMExecutable::GetSyscallsIndexSeg(oCore)); iSteps = oCore -> syscalls_index_size / sizeof(TextDataIndex); for(iI = 0; iI < iSteps; ++iI) { pTextIndex -> offset = Swap32(pTextIndex -> offset); pTextIndex -> length = Swap32(pTextIndex -> length); ++pTextIndex; } // Convert static text index pTextIndex = const_cast(VMExecutable::GetStaticTextIndexSeg(oCore)); iSteps = oCore -> static_text_index_size / sizeof(TextDataIndex); for(iI = 0; iI < iSteps; ++iI) { pTextIndex -> offset = Swap32(pTextIndex -> offset); pTextIndex -> length = Swap32(pTextIndex -> length); ++pTextIndex; } // Convert static data StaticDataVar * pStaticDataVar = const_cast(VMExecutable::GetStaticDataSeg(oCore)); iSteps = oCore -> static_data_data_size / sizeof(StaticDataVar); for(iI = 0; iI < iSteps; ++iI) { (*pStaticDataVar).i_data = Swap64((*pStaticDataVar).i_data); ++pStaticDataVar; } } // // Constructor // VMStringLoader::VMStringLoader(CCHAR_P rawContent, size_t rawContentSize) { oCore = (VMExecutable *)malloc(rawContentSize + 1); memcpy(oCore, rawContent, rawContentSize); if (oCore -> magic[0] == 'C' && oCore -> magic[1] == 'T' && oCore -> magic[2] == 'P' && oCore -> magic[3] == 'P') { // Check version if (oCore -> version[0] >= 1) { // Platform-dependent data (byte order) if (oCore -> platform == 0x4142434445464748ull) { #ifdef _DEBUG fprintf(stderr, "Big/Little Endian conversion: Nothing to do\n"); #endif // Nothing to do, only check crc UINT_32 iCRC = oCore -> crc; oCore -> crc = 0; // Calculate CRC of file // KELSON: next line used to refer to oStat.st_size // changed it to rawContentSize if (iCRC != crc32((UCCHAR_P)oCore, rawContentSize)) { free(oCore); throw CTPPLogicError("CRC checksum invalid"); } } // Platform-dependent data (byte order) else if (oCore -> platform == 0x4847464544434241ull) { // Need to reconvert data #ifdef _DEBUG fprintf(stderr, "Big/Little Endian conversion: Need to reconvert core\n"); #endif ConvertExecutable(oCore); } else { free(oCore); throw CTPPLogicError("Conversion of middle-end architecture does not supported."); } // Check IEEE 754 format if (oCore -> ieee754double != 15839800103804824402926068484019465486336.0) { free(oCore); throw CTPPLogicError("IEEE 754 format is broken, cannot convert file"); } } pVMMemoryCore = new VMMemoryCore(oCore); } else { free(oCore); throw CTPPLogicError("Not an CTPP bytecode file."); } } // // Get ready-to-run program // const VMMemoryCore * VMStringLoader::GetCore() const { return pVMMemoryCore; } // // A destructor // VMStringLoader::~VMStringLoader() throw() { delete pVMMemoryCore; free(oCore); } } // namespace CTPP // End. kiwix-lib-3.1.1/src/downloader.cpp000066400000000000000000000115011340474775600170630ustar00rootroot00000000000000/* * Copyright 2018 Matthieu Gautier * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "downloader.h" #include "common/pathTools.h" #include #include #include #include #include "aria2.h" #include "xmlrpc.h" #include "common/otherTools.h" #include namespace kiwix { void Download::updateStatus(bool follow) { static std::vector statusKey = {"status", "files", "totalLength", "completedLength", "followedBy", "downloadSpeed", "verifiedLength"}; std::string strStatus; if(follow && !m_followedBy.empty()) { strStatus = mp_aria->tellStatus(m_followedBy, statusKey); } else { strStatus = mp_aria->tellStatus(m_did, statusKey); } // std::cout << strStatus << std::endl; MethodResponse response(strStatus); if (response.isFault()) { m_status = Download::K_UNKNOWN; return; } auto structNode = response.getParams().getParam(0).getValue().getStruct(); auto _status = structNode.getMember("status").getValue().getAsS(); auto status = _status == "active" ? Download::K_ACTIVE : _status == "waiting" ? Download::K_WAITING : _status == "paused" ? Download::K_PAUSED : _status == "error" ? Download::K_ERROR : _status == "complete" ? Download::K_COMPLETE : _status == "removed" ? Download::K_REMOVED : Download::K_UNKNOWN; if (status == K_COMPLETE) { try { auto followedByMember = structNode.getMember("followedBy"); m_followedBy = followedByMember.getValue().getArray().getValue(0).getAsS(); if (follow) { status = K_ACTIVE; updateStatus(true); return; } } catch (InvalidRPCNode& e) { } } m_status = status; m_totalLength = extractFromString(structNode.getMember("totalLength").getValue().getAsS()); m_completedLength = extractFromString(structNode.getMember("completedLength").getValue().getAsS()); m_downloadSpeed = extractFromString(structNode.getMember("downloadSpeed").getValue().getAsS()); try { auto verifiedLengthValue = structNode.getMember("verifiedLength").getValue(); m_verifiedLength = extractFromString(verifiedLengthValue.getAsS()); } catch (InvalidRPCNode& e) { m_verifiedLength = 0; } auto filesMember = structNode.getMember("files"); auto fileStruct = filesMember.getValue().getArray().getValue(0).getStruct(); m_path = fileStruct.getMember("path").getValue().getAsS(); auto urisArray = fileStruct.getMember("uris").getValue().getArray(); int index = 0; m_uris.clear(); while(true) { try { auto uriNode = urisArray.getValue(index++).getStruct().getMember("uri"); m_uris.push_back(uriNode.getValue().getAsS()); } catch(InvalidRPCNode& e) { break; } } } /* Constructor */ Downloader::Downloader() : mp_aria(new Aria2()) { for (auto gid : mp_aria->tellActive()) { m_knownDownloads[gid] = std::unique_ptr(new Download(mp_aria, gid)); m_knownDownloads[gid]->updateStatus(); } } /* Destructor */ Downloader::~Downloader() { } void Downloader::close() { mp_aria->close(); } std::vector Downloader::getDownloadIds() { std::vector ret; for(auto& p:m_knownDownloads) { ret.push_back(p.first); } return ret; } Download* Downloader::startDownload(const std::string& uri) { for (auto& p: m_knownDownloads) { auto& d = p.second; auto& uris = d->getUris(); if (std::find(uris.begin(), uris.end(), uri) != uris.end()) return d.get(); } std::vector uris = {uri}; auto gid = mp_aria->addUri(uris); m_knownDownloads[gid] = std::unique_ptr(new Download(mp_aria, gid)); return m_knownDownloads[gid].get(); } Download* Downloader::getDownload(const std::string& did) { try { return m_knownDownloads.at(did).get(); } catch(exception& e) { for (auto gid : mp_aria->tellActive()) { if (gid == did) { m_knownDownloads[gid] = std::unique_ptr(new Download(mp_aria, gid)); return m_knownDownloads[gid].get(); } } throw e; } } } kiwix-lib-3.1.1/src/entry.cpp000066400000000000000000000055631340474775600161010ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "reader.h" #include #include namespace kiwix { Entry::Entry(zim::Article article) : article(article) { } #define RETURN_IF_INVALID(WHAT) if(!good()) { return (WHAT); } std::string Entry::getPath() const { RETURN_IF_INVALID(""); return article.getLongUrl(); } std::string Entry::getTitle() const { RETURN_IF_INVALID(""); return article.getTitle(); } std::string Entry::getContent() const { RETURN_IF_INVALID(""); return article.getData(); } zim::Blob Entry::getBlob(offset_type offset) const { RETURN_IF_INVALID(zim::Blob()); return article.getData(offset); } zim::Blob Entry::getBlob(offset_type offset, size_type size) const { RETURN_IF_INVALID(zim::Blob()); return article.getData(offset, size); } std::pair Entry::getDirectAccessInfo() const { RETURN_IF_INVALID(std::make_pair("", 0)); return article.getDirectAccessInformation(); } size_type Entry::getSize() const { RETURN_IF_INVALID(0); return article.getArticleSize(); } std::string Entry::getMimetype() const { RETURN_IF_INVALID(""); try { return article.getMimeType(); } catch (exception& e) { return "application/octet-stream"; } } bool Entry::isRedirect() const { RETURN_IF_INVALID(false); return article.isRedirect(); } bool Entry::isLinkTarget() const { RETURN_IF_INVALID(false); return article.isLinktarget(); } bool Entry::isDeleted() const { RETURN_IF_INVALID(false); return article.isDeleted(); } Entry Entry::getRedirectEntry() const { RETURN_IF_INVALID(Entry()); if ( !article.isRedirect() ) { throw NoEntry(); } auto targeted_article = article.getRedirectArticle(); if ( !targeted_article.good()) { throw NoEntry(); } return targeted_article; } Entry Entry::getFinalEntry() const { RETURN_IF_INVALID(Entry()); if (final_article.good()) { return final_article; } int loopCounter = 42; final_article = article; while (final_article.isRedirect() && loopCounter--) { final_article = final_article.getRedirectArticle(); if ( !final_article.good()) { throw NoEntry(); } } return final_article; } } kiwix-lib-3.1.1/src/library.cpp000066400000000000000000000167631340474775600164100ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "library.h" #include "book.h" #include "libxml_dumper.h" #include "common/base64.h" #include "common/regexTools.h" #include "common/pathTools.h" #include #include namespace kiwix { /* Constructor */ Library::Library() { } /* Destructor */ Library::~Library() { } bool Library::addBook(const Book& book) { /* Try to find it */ try { auto& oldbook = m_books.at(book.getId()); oldbook.update(book); return false; } catch (std::out_of_range&) { m_books[book.getId()] = book; return true; } } void Library::addBookmark(const Bookmark& bookmark) { m_bookmarks.push_back(bookmark); } bool Library::removeBookmark(const std::string& zimId, const std::string& url) { for(auto it=m_bookmarks.begin(); it!=m_bookmarks.end(); it++) { if (it->getBookId() == zimId && it->getUrl() == url) { m_bookmarks.erase(it); return true; } } return false; } bool Library::removeBookById(const std::string& id) { return m_books.erase(id) == 1; } Book& Library::getBookById(const std::string& id) { return m_books.at(id); } unsigned int Library::getBookCount(const bool localBooks, const bool remoteBooks) { unsigned int result = 0; for (auto& pair: m_books) { auto& book = pair.second; if ((!book.getPath().empty() && localBooks) || (book.getPath().empty() && remoteBooks)) { result++; } } return result; } bool Library::writeToFile(const std::string& path) { auto baseDir = removeLastPathElement(path, true, false); LibXMLDumper dumper(this); dumper.setBaseDir(baseDir); return writeTextFile(path, dumper.dumpLibXMLContent(getBooksIds())); } bool Library::writeBookmarksToFile(const std::string& path) { LibXMLDumper dumper(this); return writeTextFile(path, dumper.dumpLibXMLBookmark()); } std::vector Library::getBooksLanguages() { std::vector booksLanguages; std::map booksLanguagesMap; for (auto& pair: m_books) { auto& book = pair.second; auto& language = book.getLanguage(); if (booksLanguagesMap.find(language) == booksLanguagesMap.end()) { if (book.getOrigId().empty()) { booksLanguagesMap[language] = true; booksLanguages.push_back(language); } } } return booksLanguages; } std::vector Library::getBooksCreators() { std::vector booksCreators; std::map booksCreatorsMap; for (auto& pair: m_books) { auto& book = pair.second; auto& creator = book.getCreator(); if (booksCreatorsMap.find(creator) == booksCreatorsMap.end()) { if (book.getOrigId().empty()) { booksCreatorsMap[creator] = true; booksCreators.push_back(creator); } } } return booksCreators; } std::vector Library::getBooksPublishers() { std::vector booksPublishers; std::map booksPublishersMap; for (auto& pair:m_books) { auto& book = pair.second; auto& publisher = book.getPublisher(); if (booksPublishersMap.find(publisher) == booksPublishersMap.end()) { if (book.getOrigId().empty()) { booksPublishersMap[publisher] = true; booksPublishers.push_back(publisher); } } } return booksPublishers; } std::vector Library::getBooksIds() { std::vector bookIds; for (auto& pair: m_books) { bookIds.push_back(pair.first); } return bookIds; } std::vector Library::filter(const std::string& search) { if (search.empty()) { return getBooksIds(); } std::vector bookIds; for(auto& pair:m_books) { auto& book = pair.second; if (matchRegex(book.getTitle(), "\\Q" + search + "\\E") || matchRegex(book.getDescription(), "\\Q" + search + "\\E")) { bookIds.push_back(pair.first); } } return bookIds; } template struct Comparator { Library* lib; Comparator(Library* lib) : lib(lib) {} bool operator() (const std::string& id1, const std::string& id2) { return get_keys(id1) < get_keys(id2); } std::string get_keys(const std::string& id); unsigned int get_keyi(const std::string& id); }; template<> std::string Comparator::get_keys(const std::string& id) { return lib->getBookById(id).getTitle(); } template<> unsigned int Comparator<SIZE>::get_keyi(const std::string& id) { return lib->getBookById(id).getSize(); } template<> bool Comparator<SIZE>::operator() (const std::string& id1, const std::string& id2) { return get_keyi(id1) < get_keyi(id2); } template<> std::string Comparator<DATE>::get_keys(const std::string& id) { return lib->getBookById(id).getDate(); } template<> std::string Comparator<CREATOR>::get_keys(const std::string& id) { return lib->getBookById(id).getCreator(); } template<> std::string Comparator<PUBLISHER>::get_keys(const std::string& id) { return lib->getBookById(id).getPublisher(); } std::vector<std::string> Library::listBooksIds( int mode, supportedListSortBy sortBy, const std::string& search, const std::string& language, const std::string& creator, const std::string& publisher, size_t maxSize) { std::vector<std::string> bookIds; for(auto& pair:m_books) { auto& book = pair.second; auto local = !book.getPath().empty(); if (mode & LOCAL && !local) continue; if (mode & NOLOCAL && local) continue; auto valid = book.isPathValid(); if (mode & VALID && !valid) continue; if (mode & NOVALID && valid) continue; auto remote = !book.getUrl().empty(); if (mode & REMOTE && !remote) continue; if (mode & NOREMOTE && remote) continue; if (maxSize != 0 && book.getSize() > maxSize) continue; if (!language.empty() && book.getLanguage() != language) continue; if (!publisher.empty() && book.getPublisher() != publisher) continue; if (!creator.empty() && book.getCreator() != creator) continue; if (!search.empty() && !(matchRegex(book.getTitle(), "\\Q" + search + "\\E") || matchRegex(book.getDescription(), "\\Q" + search + "\\E"))) continue; bookIds.push_back(pair.first); } switch(sortBy) { case TITLE: std::sort(bookIds.begin(), bookIds.end(), Comparator<TITLE>(this)); break; case SIZE: std::sort(bookIds.begin(), bookIds.end(), Comparator<SIZE>(this)); break; case DATE: std::sort(bookIds.begin(), bookIds.end(), Comparator<DATE>(this)); break; case CREATOR: std::sort(bookIds.begin(), bookIds.end(), Comparator<CREATOR>(this)); break; case PUBLISHER: std::sort(bookIds.begin(), bookIds.end(), Comparator<PUBLISHER>(this)); break; default: break; } return bookIds; } } �������������kiwix-lib-3.1.1/src/libxml_dumper.cpp���������������������������������������������������������������0000664�0000000�0000000�00000011352�13404747756�0017574�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "libxml_dumper.h" #include "book.h" #include <common/base64.h> #include <common/stringTools.h> #include <common/otherTools.h> namespace kiwix { /* Constructor */ LibXMLDumper::LibXMLDumper(Library* library) : library(library) { } /* Destructor */ LibXMLDumper::~LibXMLDumper() { } #define ADD_ATTRIBUTE(node, name, value) { (node).append_attribute((name)) = (value).c_str(); } #define ADD_ATTR_NOT_EMPTY(node, name, value) { if (!(value).empty()) ADD_ATTRIBUTE(node, name, value); } void LibXMLDumper::handleBook(Book book, pugi::xml_node root_node) { if (book.readOnly()) return; auto entry_node = root_node.append_child("book"); ADD_ATTRIBUTE(entry_node, "id", book.getId()); if (!book.getPath().empty()) { ADD_ATTRIBUTE(entry_node, "path", computeRelativePath(baseDir, book.getPath())); } if (!book.getIndexPath().empty()) { ADD_ATTRIBUTE(entry_node, "indexPath", computeRelativePath(baseDir, book.getIndexPath())); entry_node.append_attribute("indexType") = "xapian"; } if (book.getOrigId().empty()) { ADD_ATTR_NOT_EMPTY(entry_node, "title", book.getTitle()); ADD_ATTR_NOT_EMPTY(entry_node, "name", book.getName()); ADD_ATTR_NOT_EMPTY(entry_node, "tags", book.getTags()); ADD_ATTR_NOT_EMPTY(entry_node, "description", book.getDescription()); ADD_ATTR_NOT_EMPTY(entry_node, "language", book.getLanguage()); ADD_ATTR_NOT_EMPTY(entry_node, "creator", book.getCreator()); ADD_ATTR_NOT_EMPTY(entry_node, "publisher", book.getPublisher()); ADD_ATTR_NOT_EMPTY(entry_node, "faviconMimeType", book.getFaviconMimeType()); if (!book.getFavicon().empty()) ADD_ATTRIBUTE(entry_node, "favicon", base64_encode(book.getFavicon())); } else { ADD_ATTRIBUTE(entry_node, "origId", book.getOrigId()); } ADD_ATTR_NOT_EMPTY(entry_node, "date", book.getDate()); ADD_ATTR_NOT_EMPTY(entry_node, "url", book.getUrl()); if (book.getArticleCount()) ADD_ATTRIBUTE(entry_node, "articleCount", to_string(book.getArticleCount())); if (book.getMediaCount()) ADD_ATTRIBUTE(entry_node, "mediaCount", to_string(book.getMediaCount())); if (book.getSize()) ADD_ATTRIBUTE(entry_node, "size", to_string(book.getSize()>>10)); ADD_ATTR_NOT_EMPTY(entry_node, "downloadId", book.getDownloadId()); } #define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str()) void LibXMLDumper::handleBookmark(Bookmark bookmark, pugi::xml_node root_node) { auto entry_node = root_node.append_child("bookmark"); auto book_node = entry_node.append_child("book"); try { auto book = library->getBookById(bookmark.getBookId()); ADD_TEXT_ENTRY(book_node, "id", book.getId()); ADD_TEXT_ENTRY(book_node, "title", book.getTitle()); ADD_TEXT_ENTRY(book_node, "language", book.getLanguage()); ADD_TEXT_ENTRY(book_node, "date", book.getDate()); } catch (...) { ADD_TEXT_ENTRY(book_node, "id", bookmark.getBookId()); ADD_TEXT_ENTRY(book_node, "title", bookmark.getBookTitle()); ADD_TEXT_ENTRY(book_node, "language", bookmark.getLanguage()); ADD_TEXT_ENTRY(book_node, "date", bookmark.getDate()); } ADD_TEXT_ENTRY(entry_node, "title", bookmark.getTitle()); ADD_TEXT_ENTRY(entry_node, "url", bookmark.getUrl()); } std::string LibXMLDumper::dumpLibXMLContent(const std::vector<std::string>& bookIds) { pugi::xml_document doc; /* Add the library node */ pugi::xml_node libraryNode = doc.append_child("library"); libraryNode.append_attribute("version") = KIWIX_LIBRARY_VERSION; if (library) { for (auto& bookId: bookIds) { handleBook(library->getBookById(bookId), libraryNode); } } return nodeToString(libraryNode); } std::string LibXMLDumper::dumpLibXMLBookmark() { pugi::xml_document doc; /* Add the library node */ pugi::xml_node bookmarksNode = doc.append_child("bookmarks"); if (library) { for (auto& bookmark: library->getBookmarks()) { handleBookmark(bookmark, bookmarksNode); } } return nodeToString(bookmarksNode); } } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kiwix-lib-3.1.1/src/manager.cpp���������������������������������������������������������������������0000664�0000000�0000000�00000015010�13404747756�0016336�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "manager.h" #include <pugixml.hpp> namespace kiwix { /* Constructor */ Manager::Manager(LibraryManipulator* manipulator): writableLibraryPath(""), manipulator(manipulator), mustDeleteManipulator(false) { } Manager::Manager(Library* library) : writableLibraryPath(""), manipulator(new DefaultLibraryManipulator(library)), mustDeleteManipulator(true) { } /* Destructor */ Manager::~Manager() { if (mustDeleteManipulator) { delete manipulator; } } bool Manager::parseXmlDom(const pugi::xml_document& doc, const bool readOnly, const std::string& libraryPath) { pugi::xml_node libraryNode = doc.child("library"); std::string libraryVersion = libraryNode.attribute("version").value(); for (pugi::xml_node bookNode = libraryNode.child("book"); bookNode; bookNode = bookNode.next_sibling("book")) { kiwix::Book book; book.setReadOnly(readOnly); book.updateFromXml(bookNode, removeLastPathElement(libraryPath, true, false)); /* Update the book properties with the new importer */ if (libraryVersion.empty() || atoi(libraryVersion.c_str()) <= atoi(KIWIX_LIBRARY_VERSION)) { if (!book.getPath().empty()) { this->readBookFromPath(book.getPath(), &book); } } manipulator->addBookToLibrary(book); } return true; } bool Manager::readXml(const std::string& xml, const bool readOnly, const std::string& libraryPath) { pugi::xml_document doc; pugi::xml_parse_result result = doc.load_buffer_inplace((void*)xml.data(), xml.size()); if (result) { this->parseXmlDom(doc, readOnly, libraryPath); } return true; } bool Manager::parseOpdsDom(const pugi::xml_document& doc, const std::string& urlHost) { pugi::xml_node libraryNode = doc.child("feed"); try { m_totalBooks = strtoull(libraryNode.child("totalResults").child_value(), 0, 0); m_startIndex = strtoull(libraryNode.child("startIndex").child_value(), 0, 0); m_itemsPerPage = strtoull(libraryNode.child("itemsPerPage").child_value(), 0, 0); m_hasSearchResult = true; } catch(...) { m_hasSearchResult = false; } for (pugi::xml_node entryNode = libraryNode.child("entry"); entryNode; entryNode = entryNode.next_sibling("entry")) { kiwix::Book book; book.setReadOnly(false); book.updateFromOpds(entryNode, urlHost); /* Update the book properties with the new importer */ manipulator->addBookToLibrary(book); } return true; } bool Manager::readOpds(const std::string& content, const std::string& urlHost) { pugi::xml_document doc; pugi::xml_parse_result result = doc.load_buffer_inplace((void*)content.data(), content.size()); if (result) { this->parseOpdsDom(doc, urlHost); return true; } return false; } bool Manager::readFile(const std::string& path, const bool readOnly) { return this->readFile(path, path, readOnly); } bool Manager::readFile(const std::string& nativePath, const std::string& UTF8Path, const bool readOnly) { bool retVal = true; pugi::xml_document doc; pugi::xml_parse_result result = doc.load_file(nativePath.c_str()); if (result) { this->parseXmlDom(doc, readOnly, UTF8Path); } else { retVal = false; } /* This has to be set (although if the file does not exists) to be * able to know where to save the library if new content are * available */ if (!readOnly) { this->writableLibraryPath = UTF8Path; } return retVal; } /* Add a book to the library. Return empty string if failed, book id otherwise */ std::string Manager::addBookFromPathAndGetId(const std::string& pathToOpen, const std::string& pathToSave, const std::string& url, const bool checkMetaData) { kiwix::Book book; if (this->readBookFromPath(pathToOpen, &book)) { if (pathToSave != pathToOpen) { book.setPath(isRelativePath(pathToSave) ? computeAbsolutePath( removeLastPathElement(writableLibraryPath, true, false), pathToSave) : pathToSave); } if (!checkMetaData || (checkMetaData && !book.getTitle().empty() && !book.getLanguage().empty() && !book.getDate().empty())) { book.setUrl(url); manipulator->addBookToLibrary(book); return book.getId(); } } return ""; } /* Wrapper over Manager::addBookFromPath which return a bool instead of a string */ bool Manager::addBookFromPath(const std::string& pathToOpen, const std::string& pathToSave, const std::string& url, const bool checkMetaData) { return !( this->addBookFromPathAndGetId(pathToOpen, pathToSave, url, checkMetaData) .empty()); } bool Manager::readBookFromPath(const std::string& path, kiwix::Book* book) { try { kiwix::Reader reader(path); book->update(reader); book->setPathValid(true); } catch (const std::exception& e) { std::cerr << "Invalid " << path << " : " << e.what() << std::endl; book->setPathValid(false); return false; } return true; } bool Manager::readBookmarkFile(const std::string& path) { pugi::xml_document doc; pugi::xml_parse_result result = doc.load_file(path.c_str()); if (!result) { return false; } pugi::xml_node libraryNode = doc.child("bookmarks"); for (pugi::xml_node node = libraryNode.child("bookmark"); node; node = node.next_sibling("bookmark")) { kiwix::Bookmark bookmark; bookmark.updateFromXml(node); manipulator->addBookmarkToLibrary(bookmark); } return true; } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������kiwix-lib-3.1.1/src/meson.build���������������������������������������������������������������������0000664�0000000�0000000�00000002632�13404747756�0016370�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������kiwix_sources = [ 'book.cpp', 'bookmark.cpp', 'library.cpp', 'manager.cpp', 'libxml_dumper.cpp', 'opds_dumper.cpp', 'downloader.cpp', 'reader.cpp', 'entry.cpp', 'searcher.cpp', 'subprocess.cpp', 'aria2.cpp', 'common/base64.cpp', 'common/pathTools.cpp', 'common/regexTools.cpp', 'common/stringTools.cpp', 'common/networkTools.cpp', 'common/otherTools.cpp', 'xapian/htmlparse.cc', 'xapian/myhtmlparse.cc' ] kiwix_sources += lib_resources if host_machine.system() == 'windows' kiwix_sources += 'subprocess_windows.cpp' else kiwix_sources += 'subprocess_unix.cpp' endif if xapian_dep.found() kiwix_sources += ['xapianSearcher.cpp'] endif if get_option('android') subdir('android') install_dir = 'kiwix-lib/jniLibs/' + meson.get_cross_property('android_abi') else install_dir = get_option('libdir') endif if has_ctpp2_dep kiwix_sources += ['ctpp2/CTPP2VMStringLoader.cpp'] endif config_h = configure_file(output : 'kiwix_config.h', configuration : conf, input : 'config.h.in') install_headers(config_h, subdir:'kiwix') kiwixlib = library('kiwix', kiwix_sources, include_directories : inc, dependencies : all_deps, version: meson.project_version(), install: true, install_dir: install_dir, install_rpath: '$ORIGIN') ������������������������������������������������������������������������������������������������������kiwix-lib-3.1.1/src/opds_dumper.cpp�����������������������������������������������������������������0000664�0000000�0000000�00000011723�13404747756�0017254�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Copyright 2017 Matthieu Gautier <mgautier@kymeria.fr> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "opds_dumper.h" #include "book.h" #include <common/otherTools.h> namespace kiwix { /* Constructor */ OPDSDumper::OPDSDumper(Library* library) : library(library) { } /* Destructor */ OPDSDumper::~OPDSDumper() { } std::string gen_date_str() { auto now = time(0); auto tm = localtime(&now); std::stringstream is; is << std::setw(2) << std::setfill('0') << 1900+tm->tm_year << "-" << std::setw(2) << std::setfill('0') << tm->tm_mon << "-" << std::setw(2) << std::setfill('0') << tm->tm_mday << "T" << std::setw(2) << std::setfill('0') << tm->tm_hour << ":" << std::setw(2) << std::setfill('0') << tm->tm_min << ":" << std::setw(2) << std::setfill('0') << tm->tm_sec << "Z"; return is.str(); } static std::string gen_date_from_yyyy_mm_dd(const std::string& date) { std::stringstream is; is << date << "T00:00::00:Z"; return is.str(); } void OPDSDumper::setOpenSearchInfo(int totalResults, int startIndex, int count) { m_totalResults = totalResults; m_startIndex = startIndex, m_count = count; m_isSearchResult = true; } #define ADD_TEXT_ENTRY(node, child, value) (node).append_child((child)).append_child(pugi::node_pcdata).set_value((value).c_str()) pugi::xml_node OPDSDumper::handleBook(Book book, pugi::xml_node root_node) { auto entry_node = root_node.append_child("entry"); ADD_TEXT_ENTRY(entry_node, "title", book.getTitle()); ADD_TEXT_ENTRY(entry_node, "id", "urn:uuid:"+book.getId()); ADD_TEXT_ENTRY(entry_node, "icon", rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()); ADD_TEXT_ENTRY(entry_node, "updated", gen_date_from_yyyy_mm_dd(book.getDate())); ADD_TEXT_ENTRY(entry_node, "summary", book.getDescription()); auto content_node = entry_node.append_child("link"); content_node.append_attribute("type") = "text/html"; content_node.append_attribute("href") = (rootLocation + "/" + book.getHumanReadableIdFromPath()).c_str(); auto author_node = entry_node.append_child("author"); ADD_TEXT_ENTRY(author_node, "name", book.getCreator()); if (! book.getUrl().empty()) { auto acquisition_link = entry_node.append_child("link"); acquisition_link.append_attribute("rel") = "http://opds-spec.org/acquisition/open-access"; acquisition_link.append_attribute("type") = "application/x-zim"; acquisition_link.append_attribute("href") = book.getUrl().c_str(); acquisition_link.append_attribute("length") = to_string(book.getSize()).c_str(); } if (! book.getFaviconMimeType().empty() ) { auto image_link = entry_node.append_child("link"); image_link.append_attribute("rel") = "http://opds-spec.org/image/thumbnail"; image_link.append_attribute("type") = book.getFaviconMimeType().c_str(); image_link.append_attribute("href") = (rootLocation + "/meta?name=favicon&content=" + book.getHumanReadableIdFromPath()).c_str(); } return entry_node; } string OPDSDumper::dumpOPDSFeed(const std::vector<std::string>& bookIds) { date = gen_date_str(); pugi::xml_document doc; auto root_node = doc.append_child("feed"); root_node.append_attribute("xmlns") = "http://www.w3.org/2005/Atom"; root_node.append_attribute("xmlns:opds") = "http://opds-spec.org/2010/catalog"; ADD_TEXT_ENTRY(root_node, "id", id); ADD_TEXT_ENTRY(root_node, "title", title); ADD_TEXT_ENTRY(root_node, "updated", date); if (m_isSearchResult) { ADD_TEXT_ENTRY(root_node, "totalResults", to_string(m_totalResults)); ADD_TEXT_ENTRY(root_node, "startIndex", to_string(m_startIndex)); ADD_TEXT_ENTRY(root_node, "itemsPerPage", to_string(m_count)); } auto self_link_node = root_node.append_child("link"); self_link_node.append_attribute("rel") = "self"; self_link_node.append_attribute("href") = ""; self_link_node.append_attribute("type") = "application/atom+xml"; if (!searchDescriptionUrl.empty() ) { auto search_link = root_node.append_child("link"); search_link.append_attribute("rel") = "search"; search_link.append_attribute("type") = "application/opensearchdescription+xml"; search_link.append_attribute("href") = searchDescriptionUrl.c_str(); } if (library) { for (auto& bookId: bookIds) { handleBook(library->getBookById(bookId), root_node); } } return nodeToString(root_node); } } ���������������������������������������������kiwix-lib-3.1.1/src/reader.cpp����������������������������������������������������������������������0000664�0000000�0000000�00000053027�13404747756�0016200�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������/* * Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include "reader.h" #include <time.h> #include <zim/search.h> inline char hi(char v) { char hex[] = "0123456789abcdef"; return hex[(v >> 4) & 0xf]; } inline char lo(char v) { char hex[] = "0123456789abcdef"; return hex[v & 0xf]; } std::string hexUUID(std::string in) { std::ostringstream out; for (unsigned n = 0; n < 4; ++n) { out << hi(in[n]) << lo(in[n]); } out << '-'; for (unsigned n = 4; n < 6; ++n) { out << hi(in[n]) << lo(in[n]); } out << '-'; for (unsigned n = 6; n < 8; ++n) { out << hi(in[n]) << lo(in[n]); } out << '-'; for (unsigned n = 8; n < 10; ++n) { out << hi(in[n]) << lo(in[n]); } out << '-'; for (unsigned n = 10; n < 16; ++n) { out << hi(in[n]) << lo(in[n]); } std::string op = out.str(); return op; } namespace kiwix { /* Constructor */ Reader::Reader(const string zimFilePath) : zimFileHandler(NULL) { string tmpZimFilePath = zimFilePath; /* Remove potential trailing zimaa */ size_t found = tmpZimFilePath.rfind("zimaa"); if (found != string::npos && tmpZimFilePath.size() > 5 && found == tmpZimFilePath.size() - 5) { tmpZimFilePath.resize(tmpZimFilePath.size() - 2); } this->zimFileHandler = new zim::File(tmpZimFilePath); if (this->zimFileHandler != NULL) { this->firstArticleOffset = this->zimFileHandler->getNamespaceBeginOffset('A'); this->lastArticleOffset = this->zimFileHandler->getNamespaceEndOffset('A'); this->nsACount = this->zimFileHandler->getNamespaceCount('A'); this->nsICount = this->zimFileHandler->getNamespaceCount('I'); this->zimFilePath = zimFilePath; } /* initialize random seed: */ srand(time(NULL)); } /* Destructor */ Reader::~Reader() { if (this->zimFileHandler != NULL) { delete this->zimFileHandler; } } zim::File* Reader::getZimFileHandler() const { return this->zimFileHandler; } std::map<const std::string, unsigned int> Reader::parseCounterMetadata() const { std::map<const std::string, unsigned int> counters; string mimeType, item, counterString; unsigned int counter; zim::Article article = this->zimFileHandler->getArticle('M', "Counter"); if (article.good()) { stringstream ssContent(article.getData()); while (getline(ssContent, item, ';')) { stringstream ssItem(item); getline(ssItem, mimeType, '='); getline(ssItem, counterString, '='); if (!counterString.empty() && !mimeType.empty()) { sscanf(counterString.c_str(), "%u", &counter); counters.insert(pair<string, int>(mimeType, counter)); } } } return counters; } /* Get the count of articles which can be indexed/displayed */ unsigned int Reader::getArticleCount() const { std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata(); unsigned int counter = 0; if (counterMap.empty()) { counter = this->nsACount; } else { auto it = counterMap.find("text/html"); if (it != counterMap.end()) { counter = it->second; } } return counter; } /* Get the count of medias content in the ZIM file */ unsigned int Reader::getMediaCount() const { std::map<const std::string, unsigned int> counterMap = this->parseCounterMetadata(); unsigned int counter = 0; if (counterMap.empty()) { counter = this->nsICount; } else { auto it = counterMap.find("image/jpeg"); if (it != counterMap.end()) { counter += it->second; } it = counterMap.find("image/gif"); if (it != counterMap.end()) { counter += it->second; } it = counterMap.find("image/png"); if (it != counterMap.end()) { counter += it->second; } } return counter; } /* Get the total of all items of a ZIM file, redirects included */ unsigned int Reader::getGlobalCount() const { return this->zimFileHandler->getCountArticles(); } /* Return the UID of the ZIM file */ string Reader::getId() const { std::ostringstream s; s << this->zimFileHandler->getFileheader().getUuid(); return s.str(); } /* Return a page url from a title */ bool Reader::getPageUrlFromTitle(const string& title, string& url) const { try { auto entry = getEntryFromTitle(title); entry = entry.getFinalEntry(); url = entry.getPath(); return true; } catch (NoEntry& e) { return false; } } /* Return an URL from a title */ string Reader::getRandomPageUrl() const { return getRandomPage().getPath(); } Entry Reader::getRandomPage() const { if (!this->zimFileHandler) { throw NoEntry(); } zim::Article article; std::string mainPagePath = this->getMainPage().getPath(); int watchdog = 42; do { auto idx = this->firstArticleOffset + (zim::size_type)((double)rand() / ((double)RAND_MAX + 1) * this->nsACount); article = zimFileHandler->getArticle(idx); if (!watchdog--) { throw NoEntry(); } } while (!article.good() && article.getLongUrl() == mainPagePath); return article; } /* Return the welcome page URL */ string Reader::getMainPageUrl() const { return getMainPage().getPath(); } Entry Reader::getMainPage() const { if (!this->zimFileHandler) { throw NoEntry(); } string url = ""; zim::Article article; if (this->zimFileHandler->getFileheader().hasMainPage()) { article = zimFileHandler->getArticle( this->zimFileHandler->getFileheader().getMainPage()); } if (!article.good()) { return getFirstPage(); } return article; } bool Reader::getFavicon(string& content, string& mimeType) const { static const char* const paths[] = {"-/favicon.png", "I/favicon.png", "I/favicon", "-/favicon"}; for (auto &path: paths) { try { auto entry = getEntryFromPath(path); entry = entry.getFinalEntry(); content = entry.getContent(); mimeType = entry.getMimetype(); return true; } catch(NoEntry& e) {}; } return false; } string Reader::getZimFilePath() const { return this->zimFilePath; } /* Return a metatag value */ bool Reader::getMetatag(const string& name, string& value) const { try { auto entry = getEntryFromPath("M/"+name); value = entry.getContent(); return true; } catch(NoEntry& e) { return false; } } string Reader::getTitle() const { string value; this->getMetatag("Title", value); if (value.empty()) { value = getLastPathElement(zimFileHandler->getFilename()); std::replace(value.begin(), value.end(), '_', ' '); size_t pos = value.find(".zim"); value = value.substr(0, pos); } return value; } string Reader::getName() const { string value; this->getMetatag("Name", value); return value; } string Reader::getTags() const { string value; this->getMetatag("Tags", value); return value; } string Reader::getDescription() const { string value; this->getMetatag("Description", value); /* Mediawiki Collection tends to use the "Subtitle" name */ if (value.empty()) { this->getMetatag("Subtitle", value); } return value; } string Reader::getLanguage() const { string value; this->getMetatag("Language", value); return value; } string Reader::getDate() const { string value; this->getMetatag("Date", value); return value; } string Reader::getCreator() const { string value; this->getMetatag("Creator", value); return value; } string Reader::getPublisher() const { string value; this->getMetatag("Publisher", value); return value; } string Reader::getOrigId() const { string value; this->getMetatag("startfileuid", value); if (value.empty()) { return ""; } std::string id = value; std::string origID; std::string temp = ""; unsigned int k = 0; char tempArray[16] = ""; for (unsigned int i = 0; i < id.size(); i++) { if (id[i] == '\n') { tempArray[k] = atoi(temp.c_str()); temp = ""; k++; } else { temp += id[i]; } } origID = hexUUID(tempArray); return origID; } /* Return the first page URL */ string Reader::getFirstPageUrl() const { return getFirstPage().getPath(); } Entry Reader::getFirstPage() const { if (!this->zimFileHandler) { throw NoEntry(); } auto firstPageOffset = zimFileHandler->getNamespaceBeginOffset('A'); auto article = zimFileHandler->getArticle(firstPageOffset); if (! article.good()) { throw NoEntry(); } return article; } bool _parseUrl(const string& url, char* ns, string& title) { /* Offset to visit the url */ unsigned int urlLength = url.size(); unsigned int offset = 0; /* Ignore the first '/' */ if (url[offset] == '/') offset++; if (url[offset] == '/' || offset >= urlLength) return false; /* Get namespace */ *ns = url[offset++]; if (url[offset] != '/' || offset >= urlLength) return false; offset++; if ( offset >= urlLength) return false; /* Get content title */ title = url.substr(offset, urlLength - offset); return true; } bool Reader::parseUrl(const string& url, char* ns, string& title) const { return _parseUrl(url, ns, title); } Entry Reader::getEntryFromPath(const std::string& path) const { char ns = 0; std::string short_url; if (!this->zimFileHandler) { throw NoEntry(); } _parseUrl(path, &ns, short_url); if (short_url.empty() && ns == 0) { return getMainPage(); } auto article = zimFileHandler->getArticle(ns, short_url); if (!article.good()) { throw NoEntry(); } return article; } Entry Reader::getEntryFromEncodedPath(const std::string& path) const { return getEntryFromPath(urlDecode(path, true)); } Entry Reader::getEntryFromTitle(const std::string& title) const { if (!this->zimFileHandler) { throw NoEntry(); } auto article = this->zimFileHandler->getArticleByTitle('A', title); if (!article.good()) { throw NoEntry(); } return article; } /* Return article by url */ bool Reader::getArticleObjectByDecodedUrl(const string& url, zim::Article& article) const { if (this->zimFileHandler == NULL) { return false; } /* Parse the url */ char ns = 0; string urlStr; _parseUrl(url, &ns, urlStr); /* Main page */ if (urlStr.empty() && ns == 0) { _parseUrl(this->getMainPage().getPath(), &ns, urlStr); } /* Extract the content from the zim file */ article = zimFileHandler->getArticle(ns, urlStr); return article.good(); } /* Return the mimeType without the content */ bool Reader::getMimeTypeByUrl(const string& url, string& mimeType) const { try { auto entry = getEntryFromPath(url); mimeType = entry.getMimetype(); return true; } catch (NoEntry& e) { mimeType = ""; return false; } } bool get_content_by_decoded_url(const Reader& reader, const string& url, string& content, string& title, unsigned int& contentLength, string& contentType, string& baseUrl) { content = ""; contentType = ""; contentLength = 0; try { auto entry = reader.getEntryFromPath(url); entry = entry.getFinalEntry(); baseUrl = entry.getPath(); contentType = entry.getMimetype(); content = entry.getContent(); contentLength = entry.getSize(); title = entry.getTitle(); /* Try to set a stub HTML header/footer if necesssary */ if (contentType.find("text/html") != string::npos && content.find("<body") == std::string::npos && content.find("<BODY") == std::string::npos) { content = "<html><head><title>" + title + "" + content + ""; } return true; } catch (NoEntry& e) { return false; } } /* Get a content from a zim file */ bool Reader::getContentByUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const { std::string stubRedirectUrl; return get_content_by_decoded_url(*this, kiwix::urlDecode(url), content, title, contentLength, contentType, stubRedirectUrl); } bool Reader::getContentByEncodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType, string& baseUrl) const { return get_content_by_decoded_url(*this, kiwix::urlDecode(url), content, title, contentLength, contentType, baseUrl); } bool Reader::getContentByEncodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const { std::string stubRedirectUrl; return get_content_by_decoded_url(*this, kiwix::urlDecode(url), content, title, contentLength, contentType, stubRedirectUrl); } bool Reader::getContentByDecodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType) const { std::string stubRedirectUrl; return get_content_by_decoded_url(*this, url, content, title, contentLength, contentType, stubRedirectUrl); } bool Reader::getContentByDecodedUrl(const string& url, string& content, string& title, unsigned int& contentLength, string& contentType, string& baseUrl) const { return get_content_by_decoded_url(*this, url, content, title, contentLength, contentType, baseUrl); } /* Check if an article exists */ bool Reader::urlExists(const string& url) const { return pathExists(url); } bool Reader::pathExists(const string& path) const { if (!zimFileHandler) { return false; } char ns = 0; string titleStr; _parseUrl(path, &ns, titleStr); zim::File::const_iterator findItr = zimFileHandler->find(ns, titleStr); return findItr != zimFileHandler->end() && findItr->getUrl() == titleStr; } /* Does the ZIM file has a fulltext index */ bool Reader::hasFulltextIndex() const { if (!zimFileHandler || zimFileHandler->is_multiPart() ) { return false; } return ( pathExists("Z//fulltextIndex/xapian") || pathExists("X/fulltext/xapian")); } /* Search titles by prefix */ bool Reader::searchSuggestions(const string& prefix, unsigned int suggestionsCount, const bool reset) { bool retVal = false; zim::File::const_iterator articleItr; /* Reset the suggestions otherwise check if the suggestions number is less * than the suggestionsCount */ if (reset) { this->suggestions.clear(); this->suggestionsOffset = this->suggestions.begin(); } else { if (this->suggestions.size() > suggestionsCount) { return false; } } /* Return if no prefix */ if (prefix.size() == 0) { return false; } for (articleItr = zimFileHandler->findByTitle('A', prefix); articleItr != zimFileHandler->end() && articleItr->getTitle().compare(0, prefix.size(), prefix) == 0 && this->suggestions.size() < suggestionsCount; ++articleItr) { /* Extract the interesting part of article title & url */ std::string normalizedArticleTitle = kiwix::normalize(articleItr->getTitle()); std::string articleFinalUrl = "/A/" + articleItr->getUrl(); if (articleItr->isRedirect()) { zim::Article article = *articleItr; unsigned int loopCounter = 0; while (article.isRedirect() && loopCounter++ < 42) { article = article.getRedirectArticle(); } articleFinalUrl = "/A/" + article.getUrl(); } /* Go through all already found suggestions and skip if this article is already in the suggestions list (with an other title) */ bool insert = true; std::vector>::iterator suggestionItr; for (suggestionItr = this->suggestions.begin(); suggestionItr != this->suggestions.end(); suggestionItr++) { int result = normalizedArticleTitle.compare((*suggestionItr)[2]); if (result == 0 && articleFinalUrl.compare((*suggestionItr)[1]) == 0) { insert = false; break; } else if (result < 0) { break; } } /* Insert if possible */ if (insert) { std::vector suggestion; suggestion.push_back(articleItr->getTitle()); suggestion.push_back(articleFinalUrl); suggestion.push_back(normalizedArticleTitle); this->suggestions.insert(suggestionItr, suggestion); } /* Suggestions where found */ retVal = true; } /* Set the cursor to the begining */ this->suggestionsOffset = this->suggestions.begin(); return retVal; } std::vector Reader::getTitleVariants( const std::string& title) const { std::vector variants; variants.push_back(title); variants.push_back(kiwix::ucFirst(title)); variants.push_back(kiwix::lcFirst(title)); variants.push_back(kiwix::toTitle(title)); return variants; } /* Try also a few variations of the prefix to have better results */ bool Reader::searchSuggestionsSmart(const string& prefix, unsigned int suggestionsCount) { std::vector variants = this->getTitleVariants(prefix); bool retVal; this->suggestions.clear(); this->suggestionsOffset = this->suggestions.begin(); /* Try to search in the title using fulltext search database */ const zim::Search* suggestionSearch = this->getZimFileHandler()->suggestions(prefix, 0, suggestionsCount); if (suggestionSearch->get_matches_estimated()) { for (auto current = suggestionSearch->begin(); current != suggestionSearch->end(); current++) { std::vector suggestion; suggestion.push_back(current->getTitle()); suggestion.push_back("/A/" + current->getUrl()); suggestion.push_back(kiwix::normalize(current->getTitle())); this->suggestions.push_back(suggestion); } this->suggestionsOffset = this->suggestions.begin(); retVal = true; } else { for (std::vector::iterator variantsItr = variants.begin(); variantsItr != variants.end(); variantsItr++) { retVal = this->searchSuggestions(*variantsItr, suggestionsCount, false) || retVal; } } return retVal; } /* Get next suggestion */ bool Reader::getNextSuggestion(string& title) { if (this->suggestionsOffset != this->suggestions.end()) { /* title */ title = (*(this->suggestionsOffset))[0]; /* increment the cursor for the next call */ this->suggestionsOffset++; return true; } return false; } bool Reader::getNextSuggestion(string& title, string& url) { if (this->suggestionsOffset != this->suggestions.end()) { /* title */ title = (*(this->suggestionsOffset))[0]; url = (*(this->suggestionsOffset))[1]; /* increment the cursor for the next call */ this->suggestionsOffset++; return true; } return false; } /* Check if the file has as checksum */ bool Reader::canCheckIntegrity() const { return this->zimFileHandler->getChecksum() != ""; } /* Return true if corrupted, false otherwise */ bool Reader::isCorrupted() const { try { if (this->zimFileHandler->verify() == true) { return false; } } catch (exception& e) { cerr << e.what() << endl; return true; } return true; } /* Return the file size, works also for splitted files */ unsigned int Reader::getFileSize() const { zim::File* file = this->getZimFileHandler(); zim::size_type size = 0; if (file != NULL) { size = file->getFilesize(); } return (size / 1024); } } kiwix-lib-3.1.1/src/searcher.cpp000066400000000000000000000306031340474775600165250ustar00rootroot00000000000000/* * Copyright 2011 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #include #include "searcher.h" #include "reader.h" #include "xapianSearcher.h" #include #ifdef ENABLE_CTPP2 #include #include #include #include "ctpp2/CTPP2VMStringLoader.hpp" #include "kiwixlib-resources.h" using namespace CTPP; #endif #define MAX_SEARCH_LEN 140 namespace kiwix { class _Result : public Result { public: _Result(zim::Search::iterator& iterator); virtual ~_Result(){}; virtual std::string get_url(); virtual std::string get_title(); virtual int get_score(); virtual std::string get_snippet(); virtual std::string get_content(); virtual int get_wordCount(); virtual int get_size(); virtual int get_readerIndex(); private: zim::Search::iterator iterator; }; struct SearcherInternal { const zim::Search* _search; XapianSearcher* _xapianSearcher; zim::Search::iterator current_iterator; SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {} ~SearcherInternal() { if (_search != NULL) { delete _search; } if (_xapianSearcher != NULL) { delete _xapianSearcher; } } }; /* Constructor */ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader, const string& humanReadableName) : internal(new SearcherInternal()), searchPattern(""), protocolPrefix("zim://"), searchProtocolPrefix("search://?"), resultCountPerPage(0), estimatedResultCount(0), resultStart(0), resultEnd(0), contentHumanReadableId(humanReadableName) { loadICUExternalTables(); if (!reader || !reader->hasFulltextIndex()) { internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader); } this->humanReaderNames.push_back(humanReadableName); } Searcher::Searcher(const std::string& humanReadableName) : internal(new SearcherInternal()), searchPattern(""), protocolPrefix("zim://"), searchProtocolPrefix("search://?"), resultCountPerPage(0), estimatedResultCount(0), resultStart(0), resultEnd(0), contentHumanReadableId(humanReadableName) { loadICUExternalTables(); } /* Destructor */ Searcher::~Searcher() { delete internal; } bool Searcher::add_reader(Reader* reader, const std::string& humanReadableName) { if (!reader->hasFulltextIndex()) { return false; } this->readers.push_back(reader); this->humanReaderNames.push_back(humanReadableName); return true; } /* Search strings in the database */ void Searcher::search(std::string& search, unsigned int resultStart, unsigned int resultEnd, const bool verbose) { this->reset(); if (verbose == true) { cout << "Performing query `" << search << "'" << endl; } /* If resultEnd & resultStart inverted */ if (resultStart > resultEnd) { resultEnd += resultStart; resultStart = resultEnd - resultStart; resultEnd -= resultStart; } /* Try to find results */ if (resultStart != resultEnd) { /* Avoid big researches */ this->resultCountPerPage = resultEnd - resultStart; if (this->resultCountPerPage > MAX_SEARCH_LEN) { resultEnd = resultStart + MAX_SEARCH_LEN; this->resultCountPerPage = MAX_SEARCH_LEN; } /* Perform the search */ this->searchPattern = search; this->resultStart = resultStart; this->resultEnd = resultEnd; string unaccentedSearch = removeAccents(search); if (internal->_xapianSearcher) { internal->_xapianSearcher->searchInIndex( unaccentedSearch, resultStart, resultEnd, verbose); this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated(); } else { std::vector zims; for (auto current = this->readers.begin(); current != this->readers.end(); current++) { if ( (*current)->hasFulltextIndex() ) { zims.push_back((*current)->getZimFileHandler()); } } zim::Search* search = new zim::Search(zims); search->set_query(unaccentedSearch); search->set_range(resultStart, resultEnd); internal->_search = search; internal->current_iterator = internal->_search->begin(); this->estimatedResultCount = internal->_search->get_matches_estimated(); } } return; } void Searcher::geo_search(float latitude, float longitude, float distance, unsigned int resultStart, unsigned int resultEnd, const bool verbose) { this->reset(); if (verbose == true) { cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl; } /* If resultEnd & resultStart inverted */ if (resultStart > resultEnd) { resultEnd += resultStart; resultStart = resultEnd - resultStart; resultEnd -= resultStart; } /* Try to find results */ if (resultStart == resultEnd) { return; } if (internal->_xapianSearcher) { return; } /* Avoid big researches */ this->resultCountPerPage = resultEnd - resultStart; if (this->resultCountPerPage > MAX_SEARCH_LEN) { resultEnd = resultStart + MAX_SEARCH_LEN; this->resultCountPerPage = MAX_SEARCH_LEN; } /* Perform the search */ std::ostringstream oss; oss << "Articles located less than " << distance << " meters of " << latitude << ";" << longitude; this->searchPattern = oss.str(); this->resultStart = resultStart; this->resultEnd = resultEnd; std::vector zims; for (auto current = this->readers.begin(); current != this->readers.end(); current++) { zims.push_back((*current)->getZimFileHandler()); } zim::Search* search = new zim::Search(zims); search->set_query(""); search->set_georange(latitude, longitude, distance); search->set_range(resultStart, resultEnd); internal->_search = search; internal->current_iterator = internal->_search->begin(); this->estimatedResultCount = internal->_search->get_matches_estimated(); } void Searcher::restart_search() { if (internal->_xapianSearcher) { internal->_xapianSearcher->restart_search(); } else if (internal->_search) { internal->current_iterator = internal->_search->begin(); } } Result* Searcher::getNextResult() { if (internal->_xapianSearcher) { return internal->_xapianSearcher->getNextResult(); } else if (internal->_search && internal->current_iterator != internal->_search->end()) { Result* result = new _Result(internal->current_iterator); internal->current_iterator++; return result; } return NULL; } /* Reset the results */ void Searcher::reset() { this->estimatedResultCount = 0; this->searchPattern = ""; return; } void Searcher::suggestions(std::string& search, const bool verbose) { this->reset(); if (verbose == true) { cout << "Performing suggestion query `" << search << "`" << endl; } this->searchPattern = search; this->resultStart = 0; this->resultEnd = 10; string unaccentedSearch = removeAccents(search); if (internal->_xapianSearcher) { /* [TODO] Suggestion on a external database ? * We do not support that. */ this->estimatedResultCount = 0; } else { std::vector zims; for (auto current = this->readers.begin(); current != this->readers.end(); current++) { zims.push_back((*current)->getZimFileHandler()); } zim::Search* search = new zim::Search(zims); search->set_query(unaccentedSearch); search->set_range(resultStart, resultEnd); search->set_suggestion_mode(true); internal->_search = search; internal->current_iterator = internal->_search->begin(); this->estimatedResultCount = internal->_search->get_matches_estimated(); } } /* Return the result count estimation */ unsigned int Searcher::getEstimatedResultCount() { return this->estimatedResultCount; } bool Searcher::setProtocolPrefix(const std::string prefix) { this->protocolPrefix = prefix; return true; } bool Searcher::setSearchProtocolPrefix(const std::string prefix) { this->searchProtocolPrefix = prefix; return true; } _Result::_Result(zim::Search::iterator& iterator) : iterator(iterator) { } std::string _Result::get_url() { return iterator.get_url(); } std::string _Result::get_title() { return iterator.get_title(); } int _Result::get_score() { return iterator.get_score(); } std::string _Result::get_snippet() { return iterator.get_snippet(); } std::string _Result::get_content() { if (iterator->good()) { return iterator->getData(); } return ""; } int _Result::get_size() { return iterator.get_size(); } int _Result::get_wordCount() { return iterator.get_wordCount(); } int _Result::get_readerIndex() { return iterator.get_fileIndex(); } #ifdef ENABLE_CTPP2 string Searcher::getHtml() { SimpleVM oSimpleVM( 1024, //iIMaxFunctions (default value) 4096, //iIMaxArgStackSize (default value) 4096, //iIMaxCodeStackSize (default value) 10240 * 2 //iIMaxSteps (default*2) ); // Fill data CDT oData; CDT resultsCDT(CDT::ARRAY_VAL); this->restart_search(); Result* p_result = NULL; while ((p_result = this->getNextResult())) { CDT result; result["title"] = p_result->get_title(); result["url"] = p_result->get_url(); result["snippet"] = p_result->get_snippet(); result["contentId"] = humanReaderNames[p_result->get_readerIndex()]; if (p_result->get_size() >= 0) { result["size"] = kiwix::beautifyInteger(p_result->get_size()); } if (p_result->get_wordCount() >= 0) { result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount()); } resultsCDT.PushBack(result); delete p_result; } this->restart_search(); oData["results"] = resultsCDT; // pages CDT pagesCDT(CDT::ARRAY_VAL); unsigned int pageStart = this->resultStart / this->resultCountPerPage >= 5 ? this->resultStart / this->resultCountPerPage - 4 : 0; unsigned int pageCount = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart; if (pageCount > 10) { pageCount = 10; } else if (pageCount == 1) { pageCount = 0; } for (unsigned int i = pageStart; i < pageStart + pageCount; i++) { CDT page; page["label"] = i + 1; page["start"] = i * this->resultCountPerPage; page["end"] = (i + 1) * this->resultCountPerPage; if (i * this->resultCountPerPage == this->resultStart) { page["selected"] = true; } pagesCDT.PushBack(page); } oData["pages"] = pagesCDT; oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount); oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern); oData["searchPatternEncoded"] = urlEncode(this->searchPattern); oData["resultStart"] = this->resultStart + 1; oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd); oData["resultRange"] = this->resultCountPerPage; oData["resultLastPageStart"] = this->estimatedResultCount > this->resultCountPerPage ? std::round(this->estimatedResultCount / this->resultCountPerPage) * this->resultCountPerPage : 0; oData["protocolPrefix"] = this->protocolPrefix; oData["searchProtocolPrefix"] = this->searchProtocolPrefix; oData["contentId"] = this->contentHumanReadableId; std::string template_ct2 = RESOURCE::results_ct2; VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size()); FileLogger oLogger(stderr); // DEBUG only (write output to stdout) // oSimpleVM.Run(oData, oLoader, stdout, oLogger); std::string sResult; oSimpleVM.Run(oData, oLoader, sResult, oLogger); return sResult; } #endif } kiwix-lib-3.1.1/src/subprocess.cpp000066400000000000000000000013461340474775600171230ustar00rootroot00000000000000 #include "subprocess.h" #ifdef _WIN32 # include "subprocess_windows.h" #else # include "subprocess_unix.h" #endif Subprocess::Subprocess(std::unique_ptr impl, commandLine_t& commandLine) : mp_impl(std::move(impl)) { mp_impl->run(commandLine); } Subprocess::~Subprocess() { mp_impl->kill(); } std::unique_ptr Subprocess::run(commandLine_t& commandLine) { #ifdef _WIN32 auto impl = std::unique_ptr(new WinImpl); #else auto impl = std::unique_ptr(new UnixImpl); #endif return std::unique_ptr(new Subprocess(std::move(impl), commandLine)); } bool Subprocess::isRunning() { return mp_impl->isRunning(); } bool Subprocess::kill() { return mp_impl->kill(); } kiwix-lib-3.1.1/src/subprocess.h000066400000000000000000000013521340474775600165650ustar00rootroot00000000000000 #ifndef KIWIX_SUBPROCESS_H_ #define KIWIX_SUBPROCESS_H_ #include #include #include typedef std::vector commandLine_t; class SubprocessImpl { public: virtual void run(commandLine_t& commandLine) = 0; virtual bool kill() = 0; virtual bool isRunning() = 0; virtual ~SubprocessImpl() = default; }; class Subprocess { private: // Impl depends of the system (window, unix, ...) std::unique_ptr mp_impl; Subprocess(std::unique_ptr impl, commandLine_t& commandLine); public: static std::unique_ptr run(commandLine_t& commandLine); ~Subprocess(); bool isRunning(); bool kill(); }; #endif // KIWIX_SUBPROCESS_H_ kiwix-lib-3.1.1/src/subprocess_unix.cpp000066400000000000000000000033461340474775600201700ustar00rootroot00000000000000 #include "subprocess_unix.h" #include #include #include #include #include #include UnixImpl::UnixImpl(): m_pid(0), m_running(false), m_mutex(PTHREAD_MUTEX_INITIALIZER), m_waitingThread() { } UnixImpl::~UnixImpl() { kill(); // Android has no pthread_cancel :( #ifdef __ANDROID__ pthread_kill(m_waitingThread, SIGUSR1); #else pthread_cancel(m_waitingThread); #endif } #ifdef __ANDROID__ void thread_exit_handler(int sig) { pthread_exit(0); } #endif void* UnixImpl::waitForPID(void* _self) { #ifdef __ANDROID__ struct sigaction actions; memset(&actions, 0, sizeof(actions)); sigemptyset(&actions.sa_mask); actions.sa_flags = 0; actions.sa_handler = thread_exit_handler; sigaction(SIGUSR1, &actions, NULL); #endif UnixImpl* self = static_cast(_self); waitpid(self->m_pid, NULL, WEXITED); pthread_mutex_lock(&self->m_mutex); self->m_running = false; pthread_mutex_unlock(&self->m_mutex); return self; } void UnixImpl::run(commandLine_t& commandLine) { const char* binary = commandLine[0]; int pid = fork(); switch(pid) { case -1: std::cerr << "cannot fork" << std::endl; break; case 0: commandLine.push_back(NULL); if (execvp(binary, const_cast(commandLine.data()))) { perror("Cannot launch\n"); exit(-1); } break; default: m_pid = pid; m_running = true; pthread_create(&m_waitingThread, NULL, waitForPID, this); break; } } bool UnixImpl::kill() { return (::kill(m_pid, SIGKILL) == 0); } bool UnixImpl::isRunning() { pthread_mutex_lock(&m_mutex); bool ret = m_running; pthread_mutex_unlock(&m_mutex); return ret; } kiwix-lib-3.1.1/src/subprocess_unix.h000066400000000000000000000007341340474775600176330ustar00rootroot00000000000000#ifndef KIWIX_SUBPROCESS_UNIX_H_ #define KIWIX_SUBPROCESS_UNIX_H_ #include "subprocess.h" #include class UnixImpl : public SubprocessImpl { private: int m_pid; bool m_running; pthread_mutex_t m_mutex; pthread_t m_waitingThread; public: UnixImpl(); virtual ~UnixImpl(); void run(commandLine_t& commandLine); bool kill(); bool isRunning(); static void* waitForPID(void* self); }; #endif //KIWIX_SUBPROCESS_UNIX_H_ kiwix-lib-3.1.1/src/subprocess_windows.cpp000066400000000000000000000037131340474775600206750ustar00rootroot00000000000000 #include "subprocess_windows.h" #include #include #include #include WinImpl::WinImpl(): m_pid(0), m_running(false), m_handle(INVALID_HANDLE_VALUE) { InitializeCriticalSection(&m_criticalSection); } WinImpl::~WinImpl() { kill(); CloseHandle(m_handle); DeleteCriticalSection(&m_criticalSection); } DWORD WINAPI WinImpl::waitForPID(void* _self) { WinImpl* self = static_cast(_self); WaitForSingleObject(self->m_handle, INFINITE); EnterCriticalSection(&self->m_criticalSection); self->m_running = false; LeaveCriticalSection(&self->m_criticalSection); return 0; } std::unique_ptr toWideChar(const std::string& value) { auto size = MultiByteToWideChar(CP_UTF8, 0, value.c_str(), -1, nullptr, 0); auto wdata = std::unique_ptr(new wchar_t[size]); auto ret = MultiByteToWideChar(CP_UTF8, 0, value.c_str(), -1, wdata.get(), size); if (0 == ret) { std::ostringstream oss; oss << "Cannot convert to wchar : " << GetLastError(); throw std::runtime_error(oss.str()); } return wdata; } void WinImpl::run(commandLine_t& commandLine) { STARTUPINFOW startInfo = {0}; PROCESS_INFORMATION procInfo; startInfo.cb = sizeof(startInfo); std::ostringstream oss; for(auto& item: commandLine) { oss << item << " "; } auto wCommandLine = toWideChar(oss.str()); if (CreateProcessW( NULL, wCommandLine.get(), NULL, NULL, false, CREATE_NO_WINDOW, NULL, NULL, &startInfo, &procInfo)) { m_pid = procInfo.dwProcessId; m_handle = procInfo.hProcess; CloseHandle(procInfo.hThread); m_running = true; CreateThread(NULL, 0, &waitForPID, this, 0, NULL ); } } bool WinImpl::kill() { return TerminateProcess(m_handle, 0); } bool WinImpl::isRunning() { EnterCriticalSection(&m_criticalSection); bool ret = m_running; LeaveCriticalSection(&m_criticalSection); return ret; } kiwix-lib-3.1.1/src/subprocess_windows.h000066400000000000000000000007771340474775600203510ustar00rootroot00000000000000#ifndef KIWIX_SUBPROCESS_WINDOWS_H_ #define KIWIX_SUBPROCESS_WINDOWS_H_ #include "subprocess.h" #include #include class WinImpl : public SubprocessImpl { private: int m_pid; bool m_running; HANDLE m_handle; CRITICAL_SECTION m_criticalSection; public: WinImpl(); virtual ~WinImpl(); void run(commandLine_t& commandLine); bool kill(); bool isRunning(); static DWORD WINAPI waitForPID(void* self); }; #endif //KIWIX_SUBPROCESS_WINDOWS_H_ kiwix-lib-3.1.1/src/xapian/000077500000000000000000000000001340474775600155035ustar00rootroot00000000000000kiwix-lib-3.1.1/src/xapian/htmlparse.cc000066400000000000000000000232701340474775600200150ustar00rootroot00000000000000/* htmlparse.cc: simple HTML parser for omega indexer * * Copyright 1999,2000,2001 BrightStation PLC * Copyright 2001 Ananova Ltd * Copyright 2002,2006,2007,2008 Olly Betts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 * USA */ // #include #include "htmlparse.h" #include // #include "utf8convert.h" #include #include #include #include #include using namespace std; inline void lowercase_string(string &str) { for (string::iterator i = str.begin(); i != str.end(); ++i) { *i = tolower(static_cast(*i)); } } map HtmlParser::named_ents; inline static bool p_notdigit(char c) { return !isdigit(static_cast(c)); } inline static bool p_notxdigit(char c) { return !isxdigit(static_cast(c)); } inline static bool p_notalnum(char c) { return !isalnum(static_cast(c)); } inline static bool p_notwhitespace(char c) { return !isspace(static_cast(c)); } inline static bool p_nottag(char c) { return !isalnum(static_cast(c)) && c != '.' && c != '-' && c != ':'; // ':' for XML namespaces. } inline static bool p_whitespacegt(char c) { return isspace(static_cast(c)) || c == '>'; } inline static bool p_whitespaceeqgt(char c) { return isspace(static_cast(c)) || c == '=' || c == '>'; } bool HtmlParser::get_parameter(const string & param, string & value) { map::const_iterator i = parameters.find(param); if (i == parameters.end()) return false; value = i->second; return true; } HtmlParser::HtmlParser() { static const struct ent { const char *n; unsigned int v; } ents[] = { #include "namedentities.h" { NULL, 0 } }; if (named_ents.empty()) { const struct ent *i = ents; while (i->n) { named_ents[string(i->n)] = i->v; ++i; } } } void HtmlParser::decode_entities(string &s) { // We need a const_iterator version of s.end() - otherwise the // find() and find_if() templates don't work... string::const_iterator amp = s.begin(), s_end = s.end(); while ((amp = find(amp, s_end, '&')) != s_end) { unsigned int val = 0; string::const_iterator end, p = amp + 1; if (p != s_end && *p == '#') { p++; if (p != s_end && (*p == 'x' || *p == 'X')) { // hex p++; end = find_if(p, s_end, p_notxdigit); sscanf(s.substr(p - s.begin(), end - p).c_str(), "%x", &val); } else { // number end = find_if(p, s_end, p_notdigit); val = atoi(s.substr(p - s.begin(), end - p).c_str()); } } else { end = find_if(p, s_end, p_notalnum); string code = s.substr(p - s.begin(), end - p); map::const_iterator i; i = named_ents.find(code); if (i != named_ents.end()) val = i->second; } if (end < s_end && *end == ';') end++; if (val) { string::size_type amp_pos = amp - s.begin(); if (val < 0x80) { s.replace(amp_pos, end - amp, 1u, char(val)); } else { // Convert unicode value val to UTF-8. char seq[4]; unsigned len = Xapian::Unicode::nonascii_to_utf8(val, seq); s.replace(amp_pos, end - amp, seq, len); } s_end = s.end(); // We've modified the string, so the iterators are no longer // valid... amp = s.begin() + amp_pos + 1; } else { amp = end; } } } void HtmlParser::parse_html(const string &body) { in_script = false; parameters.clear(); string::const_iterator start = body.begin(); while (true) { // Skip through until we find an HTML tag, a comment, or the end of // document. Ignore isolated occurrences of `<' which don't start // a tag or comment. string::const_iterator p = start; while (true) { p = find(p, body.end(), '<'); if (p == body.end()) break; unsigned char ch = *(p + 1); // Tag, closing tag, or comment (or SGML declaration). if ((!in_script && isalpha(ch)) || ch == '/' || ch == '!') break; if (ch == '?') { // PHP code or XML declaration. // XML declaration is only valid at the start of the first line. // FIXME: need to deal with BOMs... if (p != body.begin() || body.size() < 20) break; // XML declaration looks something like this: // if (p[2] != 'x' || p[3] != 'm' || p[4] != 'l') break; if (strchr(" \t\r\n", p[5]) == NULL) break; string::const_iterator decl_end = find(p + 6, body.end(), '?'); if (decl_end == body.end()) break; // Default charset for XML is UTF-8. charset = "UTF-8"; string decl(p + 6, decl_end); size_t enc = decl.find("encoding"); if (enc == string::npos) break; enc = decl.find_first_not_of(" \t\r\n", enc + 8); if (enc == string::npos || enc == decl.size()) break; if (decl[enc] != '=') break; enc = decl.find_first_not_of(" \t\r\n", enc + 1); if (enc == string::npos || enc == decl.size()) break; if (decl[enc] != '"' && decl[enc] != '\'') break; char quote = decl[enc++]; size_t enc_end = decl.find(quote, enc); if (enc != string::npos) charset = decl.substr(enc, enc_end - enc); break; } p++; } // Process text up to start of tag. if (p > start) { string text = body.substr(start - body.begin(), p - start); // convert_to_utf8(text, charset); decode_entities(text); process_text(text); } if (p == body.end()) break; start = p + 1; if (start == body.end()) break; if (*start == '!') { if (++start == body.end()) break; if (++start == body.end()) break; // comment or SGML declaration if (*(start - 1) == '-' && *start == '-') { ++start; string::const_iterator close = find(start, body.end(), '>'); // An unterminated comment swallows rest of document // (like Netscape, but unlike MSIE IIRC) if (close == body.end()) break; p = close; // look for --> while (p != body.end() && (*(p - 1) != '-' || *(p - 2) != '-')) p = find(p + 1, body.end(), '>'); if (p != body.end()) { // Check for htdig's "ignore this bit" comments. if (p - start == 15 && string(start, p - 2) == "htdig_noindex") { string::size_type i; i = body.find("", p + 1 - body.begin()); if (i == string::npos) break; start = body.begin() + i + 21; continue; } // If we found --> skip to there. start = p; } else { // Otherwise skip to the first > we found (as Netscape does). start = close; } } else { // just an SGML declaration, perhaps giving the DTD - ignore it start = find(start - 1, body.end(), '>'); if (start == body.end()) break; } ++start; } else if (*start == '?') { if (++start == body.end()) break; // PHP - swallow until ?> or EOF start = find(start + 1, body.end(), '>'); // look for ?> while (start != body.end() && *(start - 1) != '?') start = find(start + 1, body.end(), '>'); // unterminated PHP swallows rest of document (rather arbitrarily // but it avoids polluting the database when things go wrong) if (start != body.end()) ++start; } else { // opening or closing tag int closing = 0; if (*start == '/') { closing = 1; start = find_if(start + 1, body.end(), p_notwhitespace); } p = start; start = find_if(start, body.end(), p_nottag); string tag = body.substr(p - body.begin(), start - p); // convert tagname to lowercase lowercase_string(tag); if (closing) { closing_tag(tag); if (in_script && tag == "script") in_script = false; /* ignore any bogus parameters on closing tags */ p = find(start, body.end(), '>'); if (p == body.end()) break; start = p + 1; } else { // FIXME: parse parameters lazily. while (start < body.end() && *start != '>') { string name, value; p = find_if(start, body.end(), p_whitespaceeqgt); name.assign(body, start - body.begin(), p - start); p = find_if(p, body.end(), p_notwhitespace); start = p; if (start != body.end() && *start == '=') { start = find_if(start + 1, body.end(), p_notwhitespace); p = body.end(); int quote = *start; if (quote == '"' || quote == '\'') { start++; p = find(start, body.end(), quote); } if (p == body.end()) { // unquoted or no closing quote p = find_if(start, body.end(), p_whitespacegt); } value.assign(body, start - body.begin(), p - start); start = find_if(p, body.end(), p_notwhitespace); if (!name.empty()) { // convert parameter name to lowercase lowercase_string(name); // in case of multiple entries, use the first // (as Netscape does) parameters.insert(make_pair(name, value)); } } } #if 0 cout << "<" << tag; map::const_iterator x; for (x = parameters.begin(); x != parameters.end(); x++) { cout << " " << x->first << "=\"" << x->second << "\""; } cout << ">\n"; #endif opening_tag(tag); parameters.clear(); // In