pax_global_header00006660000000000000000000000064135546712310014521gustar00rootroot0000000000000052 comment=67a76c57caad7c07868a0fbd910806b0a025cc23 wikipedia2text-0.14/000077500000000000000000000000001355467123100144025ustar00rootroot00000000000000wikipedia2text-0.14/License000066400000000000000000000023671355467123100157170ustar00rootroot00000000000000Copyright (c) 2006,2007,2008 C.Brabandt All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY The AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. wikipedia2text-0.14/README.md000066400000000000000000000011461355467123100156630ustar00rootroot00000000000000# wikipedia2text > A Shell script to query the Wikipedia. This script fetches Wikipedia articles (currently supports around 30 Wikipedia languages) and displays them as plain text in a pager or just sends the text to standard out. Alternatively it opens the Wikipedia article in a (possibly GUI) web browser or just shows the URL of the appropriate Wikipedia article. ## Installation copy wikipedia2text into your `$PATH` alternatively, on debian or derivitatives, you can install it using your packages manager: `sudo apt-get install wikipedia2text` ## License & Copyright © Christian Brabandt, License: BSD wikipedia2text-0.14/wikipedia2text000077500000000000000000000273221355467123100172730ustar00rootroot00000000000000#!/bin/bash -- # # Shell script to query the Wikipedia. # # It can be used to output Wikipedia articles to the console, but can also # just open the article in any browser. # # Author: Christian Brabandt # License: BSD VERSION=0.14 set -e function display_help(){ #{{{1 cat << EOF NAME This script uses text-browser to query and render Wikipedia articles. The output will be printed to standard out. SYNOPSIS `basename $0` [-BCnNoOpPsSuU] [-b prog] [-c patt] [-i patt] [-l lang] [-X browseroptions] query `basename $0` -o [-b prog] [-l lang] query `basename $0` [-h] `basename $0` -v|-r -n do not colorize -N simple colorization (alias -C) -p display using a pager -P don't use pager -o open Wikipedia article -O don't open in browser -s display only a summary -S display whole article -u output the query URL -U open URL in browser -v display version -h display help -t show available sections -r open Random Page -d debug mode -i patt colorize pattern (case insensitive) -I patt colorize pattern (case-sensitive, alias -c) -b prog use prog as browser (by default to invoke elinks, links2, links, lynx or w3m, if found) -l lang use language (currently supported are: af, als, ca, cs, da, de, en, eo, es, fi, fr, hu, ia, is, it, la, lb, nds, nl, nn, no, pl, pt, rm, ro, simple, sk, sl, sv, tr) -T custom print custom section (anything in html h2 tag) -W url use url as base-URL for wikipedia (e.g. use a different Wiki, Querying this URL will happen by appending the search term. -X "options" pass through options to browser, e.g., "-width 180" (warnings: must be in quotes; browser specific, not checked) Query can be any term to search for at Wikipedia. Special characters will be taken care of. Note that only one query term is supported, however this term can consist of one or more words. Configuration can also be controlled by creating a runcontrol file .`basename $0`rc your home directory. Note that when requesting to open the article in a browser, other parameters will be ignored. The same holds for the options -h and -v. EOF } function getVersion(){ #{{{1 cat <&2 exit 3 } function colorize(){ #{{{1 if [ "${IGNCASE}" = "true" ]; then OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|gi") else OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|g") fi echo -e "${OUTPUT}" } function uri_decode(){ #{{{1 echo -e "$*" |perl -MURI::Escape -lne 's/ /_/g;s/"//g;print uri_escape($_);' } function localize(){ #{{{1 # Per default we use the english localized version of # Wikipedia LOCAL=$(echo ${LOCAL:="en"}) if [ "${LOCAL}" = "de" ]; then MARKER='^\s*Kategorien\?:' MARKER2='Bearbeiten' RANDOMP='Spezial:Zufällige Seite' elif [ "${LOCAL}" = "en" -o "${LOCAL}" = "simple" ]; then MARKER='^\s*Categories:\|^\s*Category:' MARKER2='edit' RANDOMP='Special:Random' elif [ "${LOCAL}" = "fr" ]; then MARKER='^\s*Catégories :' MARKER2='modifier' RANDOMP='Special:Random' elif [ "${LOCAL}" = "nl" ]; then MARKER='^\s*Categorie:' MARKER2='bewerk' RANDOMP='Speciaal:Willekeurig' elif [ "${LOCAL}" = "sv" ]; then MARKER='^\s*Kategorier:' MARKER2='redigera' RANDOMP='Special:Random' elif [ "${LOCAL}" = "es" ]; then MARKER='^\s*Categorías:' MARKER2='editar' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "pt" ]; then MARKER='^\s*Categorias:' MARKER2='editar' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "pl" ]; then MARKER='^\s*Kategorie:' MARKER2='Edytuj' RANDOMP='Specjalna:Losowa_strona' elif [ "${LOCAL}" = "it" ]; then MARKER='^\s*Categorie:' MARKER2='modifica' RANDOMP='Speciale:PaginaCasuale' elif [ "${LOCAL}" = "da" ]; then MARKER='^\s*Kategori:' MARKER2='\(redigér\|rediger\)' RANDOMP='Speciel:Tilfældig_side' elif [ "${LOCAL}" = "eo" ]; then MARKER='^\s*Kategorio:' MARKER2='redaktu' RANDOMP='Speciala:Random' elif [ "${LOCAL}" = "no" ]; then MARKER='^\s*Kategorier:' MARKER2='rediger' RANDOMP='Spesial:Tilfeldig_side' elif [ "${LOCAL}" = "nn" ]; then MARKER='^\s*Kategoriar:' MARKER2='endre' RANDOMP='Special:Random' elif [ "${LOCAL}" = "fi" ]; then MARKER='^\s*Luokat:' MARKER2='muokkaa' RANDOMP='Toiminnot:Satunnainen_sivu' elif [ "${LOCAL}" = "ca" ]; then MARKER='^\s*Categoria:' MARKER2='edita' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "ro" ]; then MARKER='^\s*Categorii:' MARKER2='modifica' RANDOMP='Special:Random' elif [ "${LOCAL}" = "cs" ]; then MARKER='^\s*Kategorie:' MARKER2='editovat' RANDOMP='Speciální:Random' elif [ "${LOCAL}" = "sk" ]; then MARKER='^\s*Kategórie:' MARKER2='úprava' RANDOMP='Špeciálne:Random' elif [ "${LOCAL}" = "sl" ]; then MARKER='^\s*Kategorije:' MARKER2='spremeni' RANDOMP='Posebno:Random' elif [ "${LOCAL}" = "lb" ]; then MARKER='^\s*Kategorie:' MARKER2='Änneren' RANDOMP='Special:Random' elif [ "${LOCAL}" = "la" ]; then MARKER='^\s*Categoriae:' MARKER2='recensere' RANDOMP='Specialis:Random' elif [ "${LOCAL}" = "rm" ]; then MARKER='Views' MARKER2='edit' RANDOMP='Special:Random' elif [ "${LOCAL}" = "ia" ]; then MARKER='Views' MARKER2='modificar' RANDOMP='Special:Random' elif [ "${LOCAL}" = "is" ]; then MARKER='^\s*Flokkar:' MARKER2='breyta' RANDOMP='Kerfissíða:Random' elif [ "${LOCAL}" = "hu" ]; then MARKER='^\s*Kategóriák:' MARKER2='szerkesztés' RANDOMP='Speciális:Lap_találomra' elif [ "${LOCAL}" = "tr" ]; then MARKER='^\s*Sayfa kategorisi:' MARKER2='degistir' RANDOMP='Özel:Random' elif [ "${LOCAL}" = "af" ]; then MARKER='^\s*Kategorieë: ' MARKER2='wysig' RANDOMP='Spesiaal:Random' elif [ "${LOCAL}" = "nds" ]; then MARKER='^\s*Kategorien:' MARKER2='Ännern' RANDOMP='Spezial:Random' elif [ "${LOCAL}" = "als" ]; then MARKER='^\s*Kategorie:' MARKER2='ändere' RANDOMP='Spezial:Zufällige_Seite' else MARKER='\(Views\|References\|Visible links\)' RANDOMP='Special:Random' fi } function stripOutput(){ #{{{1 # Now comes the magic: Strip everything from Marker to end, # cause this is only the linkdump SED='sed -e "s|\^\?\\[[0-9]*\\]||g" -e "s|\\[IMG\\]||g" -e "/${MARKER}/,$ D" ' if [ -n "${MARKER2}" ]; then echo "`cat`"| eval ${SED} -e '"s#\[${MARKER2}\]##g"' else echo "`cat`"| eval ${SED} fi } function openurl(){ #{{{1 "${BROWSER}" "${URL}" } function summary() { #{{{1 TMPFILE="/tmp/wiki-sum_$$.html" if [ "${COLOR}" = "true" ]; then summaryCommand="curl -s -L ${URL} | grep \/table -A400 | grep -v \/table | grep \
$TMPFILE && w3m -dump $TMPFILE | stripOutput | colorize && rm $TMPFILE" else summaryCommand="curl -s -L ${URL} | grep \/table -A400 | grep -v \/table | grep \
$TMPFILE && w3m -dump $TMPFILE | stripOutput && rm $TMPFILE" fi eval ${summaryCommand} } function print_sections() { #{{{1 TMPFILE="/tmp/wiki-sections_$$.html" Command="curl -s -L ${URL} | grep '\( $TMPFILE && w3m -dump $TMPFILE | stripOutput && rm $TMPFILE" eval ${Command} } function print_section_detail() { #{{{1 TMPFILE="/tmp/wiki-section_$$.html" Command="curl -s -L ${URL} | sed -n -e '/\(<\/\?html\)\|\(<\/\?body\)\|\(/p\" | sed -e 's/^.*

$TMPFILE && w3m -dump $TMPFILE | stripOutput" if [ "${COLOR}" = "true" ]; then eval "${Command} | colorize" else eval "${Command}" fi rm $TMPFILE } function getInfo(){ #{{{1 getInfoCommand="${BROWSER} ${BROWSEROPTIONS} -dump ${URL} | stripOutput" if [ "${COLOR}" = "true" ]; then getInfoCommand="${getInfoCommand} | colorize" fi eval ${getInfoCommand} } # First read in the Run configuration File, if one is found #{{{1 if [ -r ~/.`basename $0`rc ]; then source ~/.`basename $0`rc ABROWSER=${BROWSER} fi # Process commandline parameters {{{1 while getopts "BCdnNoOpPsStuvhrUl:b:c:i:I:B:T:W:X:-help" ARGS do case ${ARGS} in b) ABROWSER=${OPTARG} ;; d) DEBUG="true" ;; B) ABROWSER='' ;; c) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;; C) COLOR="true" ;; i) IGNCASE="true";COLOR="true"; PATT=${OPTARG} ;; I) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;; l) LOCAL=${OPTARG} ;; n) COLOR="false" ;; N) COLOR="true" ;; o) USEBROWSER="true" ;; O) USEBROWSER="false" ;; p) PAGER="true" ;; P) PAGER="false" ;; r) RAND="true" ;; s) SHORT="true" ;; S) SHORT="false" ;; t) SECTION="show" ;; T) SECTION=$OPTARG ;; u) OUTPUTURL="true" ;; U) OPENURL="true";; v) getVersion; exit 0 ;; W) WURL=${OPTARG} ;; X) BROWSEROPTIONS=${OPTARG} ;; h) display_help; exit 0 ;; -help) display_help; exit 0 ;; *) display_help; exit 1 ;; esac done shift `expr ${OPTIND} - 1` # Init some variables {{{1 localize # Setting Up some Variables, to determine, what actually to do if [ -z "$1" -a -z "${RAND}" ]; then display_help exit 1; fi IGNCASE=$(echo ${IGNCASE:="false"}) PAGER=$(echo ${PAGER:="false"}) OPENURL=$(echo ${OPENURL:="false"}) RAND=$(echo ${RAND:="false"}) if [ "$PAGER" = "true" ]; then { PAGER=$(which less) || PAGER=$(which more) ; } || errorExit "No Pager found!" ; fi PAGER=$(echo ${PAGER/less/less -Rr}) COLOR=$(echo ${COLOR:="false"}) if [ "$COLOR" = "true" -a -z "${PATT}" ]; then PATT="$*" fi if [ "$OPENURL" = "true" ]; then URL="$*" fi # Check for Alternative Browser if [ -n "${ABROWSER}" ]; then BROWSER=$(which "${ABROWSER}") || errorExit "${ABROWSER} not found" elif [ -n "${BROWSER}" ]; then BROWSER=$(which "${BROWSER}") || errorExit "${BROWSER} not found" else { BROWSER=$(which w3m) || BROWSER=$(which elinks) || BROWSER=$(which links2) || BROWSER=$(which lynx) || BROWSER=$(which links.main) || BROWSER=$(which links) ; } || errorExit "No Browser found" fi # Open page in Browser? USEBROWSER=$(echo ${USEBROWSER:="false"}) # Output only a summary? SHORT=$(echo ${SHORT:="false"}) # custom Section SECTION=$(echo ${SECTION:=""}) # Output only the URL? OUTPUTURL=$(echo ${OUTPUTURL:="false"}) # Now we do some input sanitizing. {{{1 ARGUMENT="$(uri_decode "$*")" LOCAL="$(echo "${LOCAL}"|tr '[:upper:]' '[:lower:]')" # Random page? if [ "${RAND}" = "true" ]; then ARGUMENT="$(uri_decode "${RANDOMP}")" fi if [ -z "${URL}" ]; then URL="http://${LOCAL}.wikipedia.org/wiki/${ARGUMENT}" fi if [ -n "${WURL}" ]; then WURL="$(echo "${WURL%%/}")" case "${WURL}" in http://*) URL="${WURL}"/wiki/"${ARGUMENT}" ;; *) URL="http://""${WURL}"/wiki/"${ARGUMENT}" ;; esac; # unset $LOCAL to force using an english-locale # this is used to strip the tags [edit], eg. LOCAL="en" fi # Debug mode? {{{1 if [ "${DEBUG:=false}" = "true" ]; then printf "PAGER: $PAGER Browser: $BROWSER Local: $LOCAL COLOR: $COLOR PATT: $PATT IGNCASE: $IGNCASE URL: $URL Summary: $SHORT\n" fi # Depending on some Variables, we do some different things here {{{1 if [ "${USEBROWSER}" = "true" ]; then openurl exit 0; fi if [ "${SHORT}" = "true" ]; then summary exit 0; fi if [ "${SECTION}" = "show" ]; then print_sections exit 0; elif [ -n "${SECTION}" ]; then print_section_detail ${SECTION} exit 0; fi if [ "${OUTPUTURL}" = "true" ]; then if [ "${COLOR}" = "false" ]; then echo "${URL}" echo "${BROWSER}" "${BROWSEROPTIONS}" -dump "${URL}" else echo -e "\033[0;34m${URL}\033[0m" fi exit 0; fi if [ "$PAGER" != "false" ]; then getInfo | ${PAGER} else getInfo fi # Vim Modeline {{{1 # vim: ft=sh et sts=-1 sw=0 ts=2