wikipedia2text-0.11/0000775000175000017500000000000011232671744012441 5ustar abeabewikipedia2text-0.11/License0000664000175000017500000000236711232671053013747 0ustar abeabeCopyright (c) 2006,2007,2008 C.Brabandt All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY The AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. wikipedia2text-0.11/wikipedia2text0000775000175000017500000002533111232671053015321 0ustar abeabe#!/bin/bash -- # # Shell script to query the Wikipedia. # # It can be used to output Wikipedia articles to the console, but can also # just open the article in any browser. # # Author: Christian Brabandt # License: BSD VERSION=0.11 # SVN-ID: $Id: wikipedia2text 31 2009-07-21 15:42:44Z cb $: set -e function display_help(){ cat << EOF NAME This script uses text-browser to query and render Wikipedia articles. The output will be printed to standard out. SYNOPSIS `basename $0` [-BCnNoOpPsSuU] [-b prog] [-c patt] [-i patt] [-l lang] query `basename $0` -o [-b prog] [-l lang] query `basename $0` [-h] `basename $0` -v|-r -n do not colorize -N simple colorization (alias -C) -p display using a pager -P don't use pager -o open Wikipedia article in browser -O don't open in browser -s display only a summary -S display whole article -u Just output the query URL -U open URL in browser -v display version -h display help -r open Random Page -i patt colorize pattern (case insensitive) -I patt colorize pattern (case-sensitive, alias -c) -b prog use prog as browser (by default to invoke elinks, links2, links, lynx or w3m, if found) -l lang use language (currently supported are: af, als, ca, cs, da, de, en, eo, es, fi, fr, hu, ia, is, it, la, lb, nds, nl, nn, no, pl, pt, rm, ro, simple, sk, sl, sv, tr) -W url use url as base-URL for wikipedia (e.g. use a different Wiki, Querying this URL will happen by appending the search term. Query can be any term to search for at Wikipedia. Special characters will be taken care of. Note that only one query term is supported, however this term can consist of one or more words. Configuration can also be controlled by creating a runcontrol file .`basename $0`rc your home directory. Note that when requesting to open the article in a browser, other parameters will be ignored. The same holds for the options -h and -v. EOF } function getVersion(){ cat <&2 exit 3 } function colorize(){ if [ "${IGNCASE}" = "true" ]; then OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|gi") else OUTPUT=$(echo -e "`cat`"|sed -s "s|\(${PATT}\)|\\\033\[0;31m\1\\\033\[0m|g") fi echo -e "${OUTPUT}" } function uri_decode(){ echo -e "$*" |perl -MURI::Escape -lne 's/ /_/g;s/"//g;print uri_escape($_);' } function localize(){ # Per default we use the english localized version of # Wikipedia LOCAL=$(echo ${LOCAL:="en"}) if [ "${LOCAL}" = "de" ]; then #MARKER='Diese Seite' MARKER='^\s*Kategorien\?:' # MARKER='Kategorien:' MARKER2='Bearbeiten' RANDOMP='Spezial:Zufällige Seite' elif [ "${LOCAL}" = "en" -o "${LOCAL}" = "simple" ]; then #MARKER='Views' MARKER='^\s*Categories:\|^\s*Category:' MARKER2='edit' RANDOMP='Special:Random' elif [ "${LOCAL}" = "fr" ]; then # MARKER='Affichages' MARKER='^\s*Catégories :' MARKER2='modifier' RANDOMP='Special:Random' elif [ "${LOCAL}" = "nl" ]; then # MARKER='Views' MARKER='^\s*Categorie:' MARKER2='bewerk' RANDOMP='Speciaal:Willekeurig' elif [ "${LOCAL}" = "sv" ]; then #MARKER='Visningar' MARKER='^\s*Kategorier:' MARKER2='redigera' RANDOMP='Special:Random' elif [ "${LOCAL}" = "es" ]; then #MARKER='Views' MARKER='^\s*Categorías:' MARKER2='editar' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "pt" ]; then #MARKER='Vistas' MARKER='^\s*Categorias:' MARKER2='editar' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "pl" ]; then #MARKER='Views' MARKER='^\s*Kategorie:' MARKER2='Edytuj' RANDOMP='Specjalna:Losowa_strona' elif [ "${LOCAL}" = "it" ]; then #MARKER='Views' MARKER='^\s*Categorie:' MARKER2='modifica' RANDOMP='Speciale:PaginaCasuale' elif [ "${LOCAL}" = "da" ]; then #MARKER='Views' MARKER='^\s*Kategori:' MARKER2='\(redigér\|rediger\)' RANDOMP='Speciel:Tilfældig_side' elif [ "${LOCAL}" = "eo" ]; then #MARKER='Vidoj' MARKER='^\s*Kategorio:' MARKER2='redaktu' RANDOMP='Speciala:Random' elif [ "${LOCAL}" = "no" ]; then #MARKER='Visninger' MARKER='^\s*Kategorier:' MARKER2='rediger' RANDOMP='Spesial:Tilfeldig_side' elif [ "${LOCAL}" = "nn" ]; then #MARKER='Visningar' MARKER='^\s*Kategoriar:' MARKER2='endre' RANDOMP='Special:Random' elif [ "${LOCAL}" = "fi" ]; then #MARKER='Views' MARKER='^\s*Luokat:' MARKER2='muokkaa' RANDOMP='Toiminnot:Satunnainen_sivu' elif [ "${LOCAL}" = "ca" ]; then # MARKER='Views' MARKER='^\s*Categoria:' MARKER2='edita' RANDOMP='Especial:Random' elif [ "${LOCAL}" = "ro" ]; then #MARKER='Vizualizari' MARKER='^\s*Categorii:' MARKER2='modifica' RANDOMP='Special:Random' elif [ "${LOCAL}" = "cs" ]; then #MARKER='Zobrazení' MARKER='^\s*Kategorie:' MARKER2='editovat' RANDOMP='Speciální:Random' elif [ "${LOCAL}" = "sk" ]; then #MARKER='Zobrazení' MARKER='^\s*Kategórie:' MARKER2='úprava' RANDOMP='Špeciálne:Random' elif [ "${LOCAL}" = "sl" ]; then #MARKER='Ogledov' MARKER='^\s*Kategorije:' MARKER2='spremeni' RANDOMP='Posebno:Random' elif [ "${LOCAL}" = "lb" ]; then #MARKER='Views' MARKER='^\s*Kategorie:' MARKER2='Änneren' RANDOMP='Special:Random' elif [ "${LOCAL}" = "la" ]; then #MARKER='Views' MARKER='^\s*Categoriae:' MARKER2='recensere' RANDOMP='Specialis:Random' elif [ "${LOCAL}" = "rm" ]; then MARKER='Views' MARKER2='edit' RANDOMP='Special:Random' elif [ "${LOCAL}" = "ia" ]; then MARKER='Views' MARKER2='modificar' RANDOMP='Special:Random' elif [ "${LOCAL}" = "is" ]; then #MARKER='Views' MARKER='^\s*Flokkar:' MARKER2='breyta' RANDOMP='Kerfissíða:Random' elif [ "${LOCAL}" = "hu" ]; then #MARKER='Views' MARKER='^\s*Kategóriák:' MARKER2='szerkesztés' RANDOMP='Speciális:Lap_találomra' elif [ "${LOCAL}" = "tr" ]; then #MARKER='Views' MARKER='^\s*Sayfa kategorisi:' MARKER2='degistir' RANDOMP='Özel:Random' elif [ "${LOCAL}" = "af" ]; then #MARKER='Views' MARKER='^\s*Kategorieë: ' MARKER2='wysig' RANDOMP='Spesiaal:Random' elif [ "${LOCAL}" = "nds" ]; then #MARKER='Views' MARKER='^\s*Kategorien:' MARKER2='Ännern' RANDOMP='Spezial:Random' elif [ "${LOCAL}" = "als" ]; then #MARKER='Views' MARKER='^\s*Kategorie:' MARKER2='ändere' RANDOMP='Spezial:Zufällige_Seite' else MARKER='\(Views\|References\|Visible links\)' RANDOMP='Special:Random' fi } function stripOutput(){ # Now comes the magic: Strip everything from Marker to end, # cause this is only the linkdump SED='sed -e "s|\\[[0-9]*\\]||g" -e "s|\\[IMG\\]||g" -e "/${MARKER}/,$ D" ' if [ -n "${MARKER2}" ]; then echo "`cat`"| eval ${SED} -e '"s#\[${MARKER2}\]##g"' else echo "`cat`"| eval ${SED} fi } function openurl(){ "${BROWSER}" "${URL}" } function summary(){ if [ "${COLOR}" = "true" ]; then "${BROWSER}" -dump "${URL}" |grep -v copyright | head -n 22 \ | tail -n 17 |stripOutput | colorize else "${BROWSER}" -dump "${URL}" |grep -v copyright | head -n 22 \ | tail -n 17 | stripOutput fi } function getInfo(){ #LINES=$("${BROWSER}" -dump "${URL}" |wc -l) #LINES=$(expr ${LINES} - 6) if [ "${COLOR}" = "false" ]; then #"${BROWSER}" -dump "${URL}"| tail -n ${LINES} |stripOutput "${BROWSER}" -dump "${URL}"| stripOutput else "${BROWSER}" -dump "${URL}"| stripOutput |colorize fi } # First read in the Run configuration File, if one is found if [ -r ~/.`basename $0`rc ]; then source ~/.`basename $0`rc ABROWSER=${BROWSER} fi # Process commandline parameters while getopts "BCnNoOpPsSuvhrUl:b:c:i:B:W:" ARGS do case ${ARGS} in b) ABROWSER=${OPTARG} ;; B) ABROWSER='' ;; c) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;; C) COLOR="true" ;; i) IGNCASE="true";COLOR="true"; PATT=${OPTARG} ;; I) IGNCASE="false";COLOR="true"; PATT=${OPTARG} ;; l) LOCAL=${OPTARG} ;; n) COLOR="false" ;; N) COLOR="true" ;; o) USEBROWSER="true" ;; O) USEBROWSER="false" ;; p) PAGER="true" ;; P) PAGER="false" ;; r) RAND="true" ;; s) SHORT="true" ;; S) SHORT="false" ;; u) OUTPUTURL="true" ;; U) OPENURL="true";; v) getVersion; exit 0 ;; W) WURL=${OPTARG} ;; h) display_help; exit 0 ;; *) display_help; exit 1 ;; esac done shift `expr ${OPTIND} - 1` localize # Setting Up some Variables, to determine, what actually to do if [ -z "$1" -a -z "${RAND}" ]; then display_help exit 1; fi IGNCASE=$(echo ${IGNCASE:="false"}) PAGER=$(echo ${PAGER:="false"}) OPENURL=$(echo ${OPENURL:="false"}) RAND=$(echo ${RAND:="false"}) if [ "$PAGER" = "true" ]; then { PAGER=$(which less) || PAGER=$(which more) ; } || errorExit "No Pager found!" ; fi #fi PAGER=$(echo ${PAGER/less/less -Rr}) COLOR=$(echo ${COLOR:="false"}) if [ "$COLOR" = "true" -a -z "${PATT}" ]; then PATT="$*" fi if [ "$OPENURL" = "true" ]; then URL="$*" fi # Check for Alternative Browser if [ -n "${ABROWSER}" -o "${BROWSER}" ]; then BROWSER=$(which "${ABROWSER}") || errorExit "${ABROWSER} not found" else { BROWSER=$(which w3m) || BROWSER=$(which elinks) || BROWSER=$(which links2) || BROWSER=$(which lynx) || BROWSER=$(which links.main) || BROWSER=$(which links) ; } || errorExit "No Browser found" fi # Open page in Browser? USEBROWSER=$(echo ${USEBROWSER:="false"}) # Output only a summary? SHORT=$(echo ${SHORT:="false"}) # Output only the URL? OUTPUTURL=$(echo ${OUTPUTURL:="false"}) # Now we do some input sanitizing. ARGUMENT="$(uri_decode "$*")" LOCAL="$(echo "${LOCAL}"|tr '[:upper:]' '[:lower:]')" # Random page? if [ "${RAND}" = "true" ]; then ARGUMENT="$(uri_decode "${RANDOMP}")" fi if [ -z "${URL}" ]; then URL="http://${LOCAL}.wikipedia.org/wiki/${ARGUMENT}" fi; if [ -n "${WURL}" ]; then WURL="$(echo "${WURL%%/}")" case "${WURL}" in http://*) URL="${WURL}"/wiki/"${ARGUMENT}" ;; *) URL="http://""${WURL}"/wiki/"${ARGUMENT}" ;; esac; # unset $LOCAL to force using an english-locale # this is used to strip the tags [edit], eg. LOCAL="en" fi; #errorExit "PAGER: $PAGER Browser: $BROWSER Local: $LOCAL COLOR: $COLOR PATT: $PATT IGNCASE: $IGNCASE URL: $URL" # Depending on some Variables, we do some different things here if [ "${USEBROWSER}" = "true" ]; then openurl exit 0; fi if [ "${SHORT}" = "true" ]; then summary exit 0; fi if [ "${OUTPUTURL}" = "true" ]; then if [ "${COLOR}" = "false" ]; then echo "${URL}" else echo -e "\033[0;34m${URL}\033[0m" fi exit 0; fi if [ "$PAGER" != "false" ]; then getInfo | ${PAGER} else getInfo fi # vim: ft=sh