diff --git a/.gitignore b/.gitignore index c3c4354d6..b9de5208e 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ Changelog.linkchecker* /todo /alexa*.log /testresults.txt +/linkchecker.prof diff --git a/Makefile b/Makefile index 291091fff..3392c124c 100644 --- a/Makefile +++ b/Makefile @@ -18,11 +18,11 @@ DEBORIGFILE:=$(DEBUILDDIR)/$(LAPPNAME)_$(VERSION).orig.tar.xz DEBPACKAGEDIR:=$(DEBUILDDIR)/$(APPNAME)-$(VERSION) FILESCHECK_URL:=http://localhost/~calvin/ SRCDIR:=${HOME}/src -PY_FILES_DIRS:=linkcheck tests *.py linkchecker linkchecker-nagios linkchecker-gui cgi-bin config doc +PY_FILES_DIRS:=linkcheck tests *.py linkchecker linkchecker-nagios linkchecker-gui cgi-bin config doc/examples MYPY_FILES_DIRS:=linkcheck/HtmlParser linkcheck/checker \ linkcheck/cache linkcheck/configuration linkcheck/director \ linkcheck/htmlutil linkcheck/logger linkcheck/network \ - linkcheck/bookmarks \ + linkcheck/bookmarks linkcheck/plugins linkcheck/parser \ linkcheck/gui/__init__.py \ linkcheck/gui/checker.py \ linkcheck/gui/contextmenu.py \ @@ -192,7 +192,7 @@ filescheck: localbuild done update-copyright: - update-copyright --holder="Bastian Kleineidam" + update-copyright --holder="Bastian Kleineidam" $(PY_FILES_DIRS) releasecheck: check update-certificates @if egrep -i "xx\.|xxxx|\.xx" doc/changelog.txt > /dev/null; then \ diff --git a/config/create.sql b/config/create.sql index ca6ad8d2c..8dab13de7 100644 --- a/config/create.sql +++ b/config/create.sql @@ -17,7 +17,7 @@ create table linksdb ( name varchar(256), checktime int, dltime int, - dlsize int, + size int, cached int, level int not null, modified varchar(256) diff --git a/config/linkcheckerrc b/config/linkcheckerrc index d48c2cf4e..36d5c8749 100644 --- a/config/linkcheckerrc +++ b/config/linkcheckerrc @@ -131,32 +131,18 @@ #threads=100 # connection timeout in seconds #timeout=60 -# check anchors? -#anchors=0 +# Time to wait for checks to finish after the user aborts the first time +# (with Ctrl-C or the abort button). +#aborttimeout=300 +# The recursion level determines how many times links inside pages are followed. #recursionlevel=1 -# supply a regular expression for which warnings are printed if found -# in any HTML files. -#warningregex=(Oracle DB Error|Page Not Found|badsite\.example\.com) # Basic NNTP server. Overrides NNTP_SERVER environment variable. -# warn if size info exceeds given maximum of bytes -#warnsizebytes=2000 #nntpserver= -# check HTML or CSS syntax with the W3C online validator -#checkhtml=1 -#checkcss=1 -# scan URL content for viruses with ClamAV -#scanvirus=1 -# ClamAV config file -#clamavconf=/etc/clamav/clamd.conf -# Send and store cookies -#cookies=1 # parse a cookiefile for initial cookie data #cookiefile=/path/to/cookies.txt # User-Agent header string to send to HTTP web servers +# Note that robots.txt are always checked with the original User-Agent. #useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) -# Pause the given number of seconds between two subsequent connection -# requests to the same host. -#pause=0 # When checking finishes, write a memory dump to a temporary file. # The memory dump is written both when checking finishes normally # and when checking gets canceled. @@ -175,22 +161,16 @@ # Check SSL certificates. Set to an absolute pathname for a custom # CA cert bundle to use. Set to zero to disable SSL certificate verification. #sslverify=1 -# Check that SSL certificates are at least the given number of days valid. -# The number must not be negative. -# If the number of days is zero a warning is printed only for certificates -# that are already expired. -# The default number of days is 14. -#sslcertwarndays=14 # Stop checking new URLs after the given number of seconds. Same as if the # user hits Ctrl-C after X seconds. #maxrunseconds=600 # Maximum number of URLs to check. New URLs will not be queued after the # given number of URLs is checked. #maxnumurls=153 -# Maximum number of connections to one single host for different connection types. -#maxconnectionshttp=10 -#maxconnectionshttps=10 -#maxconnectionsftp=2 +# Maximum number of requests per second to one host. +#maxrequestspersecond=10 +# Allowed URL schemes as a comma-separated list. +#allowedschemes=http,https ##################### filtering options ########################## [filtering] @@ -211,11 +191,12 @@ # recognized warnings). Add a comma-separated list of warnings here # that prevent a valid URL from being logged. Note that the warning # will be logged in invalid URLs. -#ignorewarnings=url-unicode-domain,anchor-not-found +#ignorewarnings=url-unicode-domain # Regular expression to add more URLs recognized as internal links. # Default is that URLs given on the command line are internal. - #internlinks=^http://www\.example\.net/ +# Check external links +#checkextern=1 ##################### password authentication ########################## @@ -247,3 +228,30 @@ #loginextrafields= # name1:value1 # name 2:value 2 + +############################ Plugins ################################### +# +# uncomment sections to enable plugins + +# Check HTML anchors +#[AnchorCheck] + +# Add country info to URLs +#[LocationInfo] + +# Run W3C syntax checks +#[CssSyntaxCheck] +#[HtmlSyntaxCheck] + +# Search for regular expression in page contents +#[RegexCheck] +#warningregex=Oracle Error + +# Search for viruses in page contents +#[VirusCheck] +#clamavconf=/etc/clamav/clam.conf + +# Check that SSL certificates are at least the given number of days valid. +#[SslCertificateCheck] +#sslcertwarndays=14 + diff --git a/doc/changelog.txt b/doc/changelog.txt index e47fe32b7..982ce7336 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,3 +1,34 @@ +8.7 "" (released xx.xx.2014) + +Features: +- checking: Support connection and content check plugins. +- checking: Move lots of custom checks like Antivirus and syntax + checks into plugins (see upgrading.txt for more info). +- checking: Add options to limit the number of requests per second, + allowed URL schemes and maximum file or download size. + +Changes: +- checking: Use the Python requests module for HTTP and HTTPS requests. +- logging: Removed download, domains and robots.txt statistics. +- logging: HTML output is now in HTML5. +- checking: Removed 301 warning since 301 redirects are used + a lot without updating the old URL links. +- checking: Disallowed access by robots.txt is an info now, not + a warning. Otherwise it produces a lot of warnings which + is counter-productive. +- checking: Do not check SMTP connections for mailto: URLs anymore. + It resulted in lots of false warnings since spam prevention + usually disallows direct SMTP connections from unrecognized + client IPs. +- checking: Only internal URLs are checked as default. To check + external urls use --check-extern. + +Fixes: +- logging: Status was printed every second regardless of the + configured wait time. +- checking: Several speed and memory usage improvements. + + 8.6 "About Time" (released 8.1.2014) Changes: diff --git a/doc/de.po b/doc/de.po index 614ce82e5..3224b262e 100644 --- a/doc/de.po +++ b/doc/de.po @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: linkchecker 3.4\n" -"POT-Creation-Date: 2014-01-08 22:34+0100\n" +"POT-Creation-Date: 2014-02-28 22:57+0100\n" "PO-Revision-Date: 2013-12-12 21:51+0100\n" "Last-Translator: Bastian Kleineidam \n" "Language-Team: de \n" @@ -90,7 +90,7 @@ msgstr "LinkChecker beinhaltet" #: en/linkchecker.1:17 en/linkchecker.1:19 en/linkchecker.1:21 #: en/linkchecker.1:23 en/linkchecker.1:25 en/linkchecker.1:27 #: en/linkchecker.1:29 en/linkchecker.1:31 en/linkchecker.1:33 -#: en/linkchecker.1:477 en/linkchecker.1:481 en/linkchecker.1:483 +#: en/linkchecker.1:457 en/linkchecker.1:461 en/linkchecker.1:463 #, no-wrap msgid "\\(bu" msgstr "\\(bu" @@ -172,12 +172,15 @@ msgstr "BEISPIELE" # type: Plain text #. type: Plain text -#: en/linkchecker.1:39 -#, no-wrap +#: en/linkchecker.1:38 +#, fuzzy, no-wrap +#| msgid "" +#| "The most common use checks the given domain recursively, plus any\n" +#| "URL pointing outside of the domain:\n" +#| " B\n" msgid "" -"The most common use checks the given domain recursively, plus any\n" -"URL pointing outside of the domain:\n" -" B\n" +"The most common use checks the given domain recursively:\n" +" B\n" msgstr "" "Der häufigste Gebrauchsfall prüft die angegebene Domäne rekursiv,\n" "inklusive aller einzelnen nach außen zeigenden Verknüpfungen:\n" @@ -185,7 +188,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:42 +#: en/linkchecker.1:41 msgid "" "Beware that this checks the whole site which can have thousands of URLs. " "Use the B<-r> option to restrict the recursion depth." @@ -196,18 +199,21 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:45 -#, no-wrap +#: en/linkchecker.1:44 +#, fuzzy, no-wrap +#| msgid "" +#| "Don't check B URLs. All other links are checked as usual:\n" +#| " B\n" msgid "" -"Don't check B URLs. All other links are checked as usual:\n" -" B\n" +"Don't check URLs with B in its name. All other links are checked as usual:\n" +" B\n" msgstr "" "Prüfe keine B URLs. Alle anderen Verknüpfungen werden wie üblich geprüft:\n" " B\n" # type: Plain text #. type: Plain text -#: en/linkchecker.1:48 +#: en/linkchecker.1:47 #, no-wrap msgid "" "Checking a local HTML file on Unix:\n" @@ -218,7 +224,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:51 +#: en/linkchecker.1:50 #, no-wrap msgid "" "Checking a local HTML file on Windows:\n" @@ -229,7 +235,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:54 +#: en/linkchecker.1:53 #, no-wrap msgid "" "You can skip the B url part if the domain starts with B:\n" @@ -240,18 +246,21 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:57 -#, no-wrap +#: en/linkchecker.1:56 +#, fuzzy, no-wrap +#| msgid "" +#| "You can skip the B url part if the domain starts with B:\n" +#| " B\n" msgid "" "You can skip the B url part if the domain starts with B:\n" -" B\n" +" B\n" msgstr "" "Sie können den B URL Anteil weglassen wenn die Domäne mit B beginnt:\n" " B\n" # type: Plain text #. type: Plain text -#: en/linkchecker.1:60 +#: en/linkchecker.1:59 #, no-wrap msgid "" "Generate a sitemap graph and convert it with the graphviz dot utility:\n" @@ -262,28 +271,28 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:61 +#: en/linkchecker.1:60 #, no-wrap msgid "OPTIONS" msgstr "OPTIONEN" # type: SS #. type: SS -#: en/linkchecker.1:62 +#: en/linkchecker.1:61 #, no-wrap msgid "General options" msgstr "Allgemeine Optionen" # type: TP #. type: TP -#: en/linkchecker.1:63 +#: en/linkchecker.1:62 #, no-wrap msgid "B<-f>I, B<--config=>I" msgstr "B<-f>I, B<--config=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:67 +#: en/linkchecker.1:66 msgid "" "Use I as configuration file. As default LinkChecker uses B<~/." "linkchecker/linkcheckerrc>." @@ -293,27 +302,27 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:67 +#: en/linkchecker.1:66 #, no-wrap msgid "B<-h>, B<--help>" msgstr "B<-h>, B<--help>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:70 +#: en/linkchecker.1:69 msgid "Help me! Print usage information for this program." msgstr "Hilfe! Gebe Gebrauchsanweisung für dieses Programm aus." # type: TP #. type: TP -#: en/linkchecker.1:70 +#: en/linkchecker.1:69 #, no-wrap msgid "B<--stdin>" msgstr "B<--stdin>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:73 +#: en/linkchecker.1:72 msgid "Read list of white-space separated URLs to check from stdin." msgstr "" "Lese Liste von URLs zum Prüfen von der Standardeingabe, getrennt durch " @@ -321,14 +330,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:73 +#: en/linkchecker.1:72 #, no-wrap msgid "B<-t>I, B<--threads=>I" msgstr "B<-t>I, B<--threads=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:77 en/linkcheckerrc.5:95 +#: en/linkchecker.1:76 en/linkcheckerrc.5:47 msgid "" "Generate no more than the given number of threads. Default number of threads " "is 100. To disable threading specify a non-positive number." @@ -339,77 +348,50 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:77 +#: en/linkchecker.1:76 #, no-wrap msgid "B<-V>, B<--version>" msgstr "B<-V>, B<--version>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:80 +#: en/linkchecker.1:79 msgid "Print version and exit." msgstr "Gebe die Version aus und beende das Programm." -# type: SS -#. type: SS -#: en/linkchecker.1:81 -#, no-wrap -msgid "Output options" -msgstr "Ausgabeoptionen" - -# type: TP -#. type: TP -#: en/linkchecker.1:82 -#, no-wrap -msgid "B<--check-css>" -msgstr "B<--check-css>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:85 en/linkcheckerrc.5:23 -msgid "Check syntax of CSS URLs with the W3C online validator." -msgstr "Prüfe Syntax von CSS URLs mit dem W3C Online Validator." - # type: TP #. type: TP -#: en/linkchecker.1:85 -#, no-wrap -msgid "B<--check-html>" -msgstr "B<--check-html>" +#: en/linkchecker.1:79 +#, fuzzy, no-wrap +#| msgid "B<--stdin>" +msgid "B<--list-plugins>" +msgstr "B<--stdin>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:88 en/linkcheckerrc.5:28 -msgid "Check syntax of HTML URLs with the W3C online validator." -msgstr "Prüfe Syntax von HTML URLs mit dem W3C Online Validator." +#: en/linkchecker.1:82 +#, fuzzy +#| msgid "Print version and exit." +msgid "Print available check plugins and exit." +msgstr "Gebe die Version aus und beende das Programm." -# type: TP -#. type: TP -#: en/linkchecker.1:88 +# type: SS +#. type: SS +#: en/linkchecker.1:83 #, no-wrap -msgid "B<--complete>" -msgstr "B<--complete>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:91 -msgid "" -"Log all URLs, including duplicates. Default is to log duplicate URLs only " -"once." -msgstr "" -"Gebe alle geprüften URLs aus. Standard ist es, doppelte URLs nur einmal " -"auszugeben." +msgid "Output options" +msgstr "Ausgabeoptionen" # type: TP #. type: TP -#: en/linkchecker.1:91 +#: en/linkchecker.1:84 #, no-wrap msgid "B<-D>I, B<--debug=>I" msgstr "B<-D>I, B<--debug=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:101 +#: en/linkchecker.1:94 msgid "" "Print debugging output for the given logger. Available loggers are " "B, B, B, B, B and B. Specifying " @@ -425,14 +407,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:101 +#: en/linkchecker.1:94 #, no-wrap msgid "B<-F>I[BI][BI], B<--file-output=>I[BI][BI]" msgstr "B<-F>I[BI][BI], B<--file-output=>I[BI][BI]" # type: Plain text #. type: Plain text -#: en/linkchecker.1:110 +#: en/linkchecker.1:103 msgid "" "Output to a file BI, B<$HOME/.linkchecker/blacklist> " "for B output, or I if specified. The I " @@ -448,7 +430,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:120 +#: en/linkchecker.1:113 msgid "" "The I and I parts of the B output type will be " "ignored, else if the file already exists, it will be overwritten. You can " @@ -469,40 +451,40 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:120 +#: en/linkchecker.1:113 #, no-wrap msgid "B<--no-status>" msgstr "B<--no-status>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:123 +#: en/linkchecker.1:116 msgid "Do not print check status messages." msgstr "Gebe keine Statusmeldungen aus." # type: TP #. type: TP -#: en/linkchecker.1:123 +#: en/linkchecker.1:116 #, no-wrap msgid "B<--no-warnings>" msgstr "B<--no-warnings>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:126 +#: en/linkchecker.1:119 msgid "Don't log warnings. Default is to log warnings." msgstr "Gebe keine Warnungen aus. Standard ist die Ausgabe von Warnungen." # type: TP #. type: TP -#: en/linkchecker.1:126 +#: en/linkchecker.1:119 #, no-wrap msgid "B<-o>I[BI], B<--output=>I[BI]" msgstr "B<-o>I[BI], B<--output=>I[BI]" # type: Plain text #. type: Plain text -#: en/linkchecker.1:133 +#: en/linkchecker.1:126 msgid "" "Specify output type as B, B, B, B, B, B, " "B, B, B or B. Default type is B. The " @@ -514,7 +496,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:137 +#: en/linkchecker.1:130 msgid "" "The I specifies the output encoding, the default is that of your " "locale. Valid encodings are listed at B, B<--quiet>" msgstr "B<-q>, B<--quiet>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:141 +#: en/linkchecker.1:134 msgid "" "Quiet operation, an alias for B<-o none>. This is only useful with B<-F>." msgstr "" @@ -542,40 +524,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:141 -#, no-wrap -msgid "B<--scan-virus>" -msgstr "B<--scan-virus>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:144 en/linkcheckerrc.5:89 -msgid "Scan content of URLs for viruses with ClamAV." -msgstr "Prüfe Inhalt von URLs auf Viren mit ClamAV." - -# type: TP -#. type: TP -#: en/linkchecker.1:144 -#, no-wrap -msgid "B<--trace>" -msgstr "B<--trace>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:147 -msgid "Print tracing information." -msgstr "Trace-Information ausgeben." - -# type: TP -#. type: TP -#: en/linkchecker.1:147 +#: en/linkchecker.1:134 #, no-wrap msgid "B<-v>, B<--verbose>" msgstr "B<-v>, B<--verbose>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:150 +#: en/linkchecker.1:137 msgid "Log all checked URLs. Default is to log only errors and warnings." msgstr "" "Gebe alle geprüften URLs aus. Standard ist es, nur fehlerhafte URLs und " @@ -583,14 +539,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:150 +#: en/linkchecker.1:137 #, no-wrap msgid "B<-W>I, B<--warning-regex=>I" msgstr "B<-W>I, B<--warning-regex=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:155 en/linkcheckerrc.5:115 +#: en/linkchecker.1:142 msgid "" "Define a regular expression which prints a warning if it matches any content " "of the checked link. This applies only to valid pages, so we can get their " @@ -602,7 +558,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:158 +#: en/linkchecker.1:145 msgid "" "Use this to check for pages that contain some form of error, for example " "\"This page has moved\" or \"Oracle Application error\"." @@ -613,7 +569,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:161 +#: en/linkchecker.1:148 en/linkcheckerrc.5:467 msgid "" "Note that multiple values can be combined in the regular expression, for " "example \"(This page has moved|Oracle Application error)\"." @@ -623,81 +579,27 @@ msgstr "" "Applikationsfehler)\"." #. type: Plain text -#: en/linkchecker.1:163 en/linkchecker.1:190 en/linkchecker.1:203 +#: en/linkchecker.1:150 en/linkchecker.1:165 en/linkchecker.1:178 msgid "See section B for more info." msgstr "Siehe Abschnitt B für weitere Infos." -# type: TP -#. type: TP -#: en/linkchecker.1:163 -#, no-wrap -msgid "B<--warning-size-bytes=>I" -msgstr "B<--warning-size-bytes=>I" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:167 en/linkcheckerrc.5:124 -msgid "" -"Print a warning if content size info is available and exceeds the given " -"number of I." -msgstr "" -"Gebe eine Warnung aus, wenn die Inhaltsgröße bekannt ist und die angegebene " -"Anzahl von Bytes übersteigt." - # type: SS #. type: SS -#: en/linkchecker.1:168 +#: en/linkchecker.1:150 #, no-wrap msgid "Checking options" msgstr "Optionen zum Prüfen" # type: TP #. type: TP -#: en/linkchecker.1:169 -#, no-wrap -msgid "B<-a>, B<--anchors>" -msgstr "B<-a>, B<--anchors>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:173 en/linkcheckerrc.5:18 -msgid "" -"Check HTTP anchor references. Default is not to check anchors. This option " -"enables logging of the warning B." -msgstr "" -"Prüfe HTTP Ankerverweise. Standard ist, Ankerverweise nicht zu prüfen. Diese " -"Option aktiviert die Ausgabe der Warnung B." - -# type: TP -#. type: TP -#: en/linkchecker.1:173 -#, no-wrap -msgid "B<-C>, B<--cookies>" -msgstr "B<-C>, B<--cookies>" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:179 -msgid "" -"Accept and send HTTP cookies according to RFC 2109. Only cookies which are " -"sent back to the originating server are accepted. Sent and accepted cookies " -"are provided as additional logging information." -msgstr "" -"Akzeptiere und sende HTTP Cookies nach der RFC 2109. Lediglich Cookies, die " -"zum ursprünglichen Server zurückgesendet werden, werden akzeptiert. " -"Gesendete und akzeptierte Cookies werden als zusätzlicheLoginformation " -"aufgeführt." - -# type: TP -#. type: TP -#: en/linkchecker.1:179 +#: en/linkchecker.1:151 #, no-wrap msgid "B<--cookiefile=>I" msgstr "B<--cookiefile=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:183 +#: en/linkchecker.1:155 msgid "" "Read a file with initial cookie data. The cookie data format is explained " "below." @@ -707,14 +609,27 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:183 +#: en/linkchecker.1:155 +#, fuzzy, no-wrap +#| msgid "B<--check-html>" +msgid "B<--check-extern>" +msgstr "B<--check-html>" + +#. type: Plain text +#: en/linkchecker.1:158 +msgid "Check external URLs." +msgstr "" + +# type: TP +#. type: TP +#: en/linkchecker.1:158 #, no-wrap msgid "B<--ignore-url=>I" msgstr "B<--ignore-url=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:186 +#: en/linkchecker.1:161 msgid "" "URLs matching the given regular expression will be ignored and not checked." msgstr "" @@ -723,20 +638,20 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:188 en/linkchecker.1:201 +#: en/linkchecker.1:163 en/linkchecker.1:176 msgid "This option can be given multiple times." msgstr "Diese Option kann mehrmals angegeben werden." # type: TP #. type: TP -#: en/linkchecker.1:190 +#: en/linkchecker.1:165 #, no-wrap msgid "B<-N>I, B<--nntp-server=>I" msgstr "B<-N>I, B<--nntp-server=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:195 en/linkcheckerrc.5:71 +#: en/linkchecker.1:170 en/linkcheckerrc.5:34 msgid "" "Specify an NNTP server for B links. Default is the environment " "variable B. If no host is given, only the syntax of the link is " @@ -748,14 +663,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:195 +#: en/linkchecker.1:170 #, no-wrap msgid "B<--no-follow-url=>I" msgstr "B<--no-follow-url=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:199 +#: en/linkchecker.1:174 msgid "" "Check but do not recurse into URLs matching the given regular expression." msgstr "" @@ -764,14 +679,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:203 +#: en/linkchecker.1:178 #, no-wrap msgid "B<-p>, B<--password>" msgstr "B<-p>, B<--password>" # type: Plain text #. type: Plain text -#: en/linkchecker.1:208 +#: en/linkchecker.1:183 msgid "" "Read a password from console and use it for HTTP and FTP authorization. For " "FTP the default password is B. For HTTP there is no default " @@ -783,32 +698,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:208 -#, no-wrap -msgid "B<-P>I, B<--pause=>I" -msgstr "B<-P>I, B<--pause=>I" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:212 -msgid "" -"Pause the given number of seconds between two subsequent connection requests " -"to the same host. Default is no pause between requests." -msgstr "" -"Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander " -"folgenden Verbindungen zum demselben Rechner. Standard ist keine Pause " -"zwischen Verbindungen." - -# type: TP -#. type: TP -#: en/linkchecker.1:212 +#: en/linkchecker.1:183 #, no-wrap msgid "B<-r>I, B<--recursion-level=>I" msgstr "B<-r>I, B<--recursion-level=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:217 en/linkcheckerrc.5:84 +#: en/linkchecker.1:188 en/linkcheckerrc.5:41 msgid "" "Check recursively all links up to given depth. A negative depth will enable " "infinite recursion. Default depth is infinite." @@ -818,14 +715,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:217 +#: en/linkchecker.1:188 #, no-wrap msgid "B<--timeout=>I" msgstr "B<--timeout=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:221 en/linkcheckerrc.5:101 +#: en/linkchecker.1:192 en/linkcheckerrc.5:53 msgid "" "Set the timeout for connection attempts in seconds. The default timeout is " "60 seconds." @@ -835,14 +732,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:221 +#: en/linkchecker.1:192 #, no-wrap msgid "B<-u>I, B<--user=>I" msgstr "B<-u>I, B<--user=>I" # type: Plain text #. type: Plain text -#: en/linkchecker.1:226 +#: en/linkchecker.1:197 msgid "" "Try the given username for HTTP and FTP authorization. For FTP the default " "username is B. For HTTP there is no default username. See also B<-" @@ -854,13 +751,13 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:226 +#: en/linkchecker.1:197 #, no-wrap msgid "B<--user-agent=>I" msgstr "B<--user-agent=>I" #. type: Plain text -#: en/linkchecker.1:231 en/linkcheckerrc.5:108 +#: en/linkchecker.1:202 en/linkcheckerrc.5:67 msgid "" "Specify the User-Agent string to send to the HTTP server, for example " "\"Mozilla/4.0\". The default is \"LinkChecker/X.Y\" where X.Y is the current " @@ -872,14 +769,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:232 +#: en/linkchecker.1:203 #, no-wrap msgid "CONFIGURATION FILES" msgstr "KONFIGURATIONSDATEIEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:236 +#: en/linkchecker.1:207 msgid "" "Configuration files can specify all options above. They can also specify " "some options that cannot be set on the command line. See B" @@ -891,14 +788,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:237 +#: en/linkchecker.1:208 #, no-wrap msgid "OUTPUT TYPES" msgstr "AUSGABETYPEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:241 +#: en/linkchecker.1:212 msgid "" "Note that by default only errors and warnings are logged. You should use " "the B<--verbose> option to get the complete URL list, especially when " @@ -910,27 +807,27 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:242 +#: en/linkchecker.1:213 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:245 +#: en/linkchecker.1:216 msgid "Standard text logger, logging URLs in keyword: argument fashion." msgstr "Standard Textausgabe in \"Schlüssel: Wert\"-Form." # type: TP #. type: TP -#: en/linkchecker.1:245 +#: en/linkchecker.1:216 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:250 +#: en/linkchecker.1:221 msgid "" "Log URLs in keyword: argument fashion, formatted as HTML. Additionally has " "links to the referenced pages. Invalid URLs have HTML and CSS syntax check " @@ -942,79 +839,79 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:250 +#: en/linkchecker.1:221 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:253 +#: en/linkchecker.1:224 msgid "Log check result in CSV format with one URL per line." msgstr "Gebe Prüfresultat in CSV-Format aus mit einer URL pro Zeile." # type: TP #. type: TP -#: en/linkchecker.1:253 +#: en/linkchecker.1:224 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:256 +#: en/linkchecker.1:227 msgid "Log parent-child relations between linked URLs as a GML sitemap graph." msgstr "" "Gebe Vater-Kind Beziehungen zwischen verknüpften URLs als GML Graphen aus." # type: TP #. type: TP -#: en/linkchecker.1:256 +#: en/linkchecker.1:227 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:259 +#: en/linkchecker.1:230 msgid "Log parent-child relations between linked URLs as a DOT sitemap graph." msgstr "" "Gebe Vater-Kind Beziehungen zwischen verknüpften URLs als DOT Graphen aus." # type: TP #. type: TP -#: en/linkchecker.1:259 +#: en/linkchecker.1:230 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:262 +#: en/linkchecker.1:233 msgid "Log check result as a GraphXML sitemap graph." msgstr "Gebe Prüfresultat als GraphXML-Datei aus." # type: TP #. type: TP -#: en/linkchecker.1:262 +#: en/linkchecker.1:233 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:265 +#: en/linkchecker.1:236 msgid "Log check result as machine-readable XML." msgstr "Gebe Prüfresultat als maschinenlesbare XML-Datei aus." #. type: TP -#: en/linkchecker.1:265 +#: en/linkchecker.1:236 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkchecker.1:269 +#: en/linkchecker.1:240 msgid "" "Log check result as an XML sitemap whose protocol is documented at B." @@ -1024,14 +921,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:269 +#: en/linkchecker.1:240 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:273 +#: en/linkchecker.1:244 msgid "" "Log check result as SQL script with INSERT commands. An example script to " "create the initial SQL table is included as create.sql." @@ -1042,14 +939,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:273 +#: en/linkchecker.1:244 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:278 +#: en/linkchecker.1:249 msgid "" "Suitable for cron jobs. Logs the check result into a file B<~/.linkchecker/" "blacklist> which only contains entries with invalid URLs and the number of " @@ -1061,27 +958,27 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:278 +#: en/linkchecker.1:249 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkchecker.1:281 +#: en/linkchecker.1:252 msgid "Logs nothing. Suitable for debugging or checking the exit code." msgstr "Gibt nichts aus. Für Debugging oder Prüfen des Rückgabewerts geeignet." # type: SH #. type: SH -#: en/linkchecker.1:282 +#: en/linkchecker.1:253 #, no-wrap msgid "REGULAR EXPRESSIONS" msgstr "REGULÄRE AUSDRÜCKE" # type: Plain text #. type: Plain text -#: en/linkchecker.1:285 +#: en/linkchecker.1:256 msgid "" "LinkChecker accepts Python regular expressions. See B for an introduction." @@ -1091,7 +988,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:288 +#: en/linkchecker.1:259 msgid "" "An addition is that a leading exclamation mark negates the regular " "expression." @@ -1101,14 +998,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:289 +#: en/linkchecker.1:260 #, no-wrap msgid "COOKIE FILES" msgstr "COOKIE-DATEIEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:292 +#: en/linkchecker.1:263 msgid "" "A cookie file contains standard HTTP header (RFC 2616) data with the " "following possible names:" @@ -1118,66 +1015,53 @@ msgstr "" # type: TP #. type: TP -#: en/linkchecker.1:293 -#, no-wrap -msgid "B (optional)" -msgstr "B (optional)" - -# type: Plain text -#. type: Plain text -#: en/linkchecker.1:296 -msgid "Sets the scheme the cookies are valid for; default scheme is B." -msgstr "" -"Setzt das Schema für das die Cookies gültig sind; Standardschema ist B." - -# type: TP -#. type: TP -#: en/linkchecker.1:296 +#: en/linkchecker.1:264 #, no-wrap msgid "B (required)" msgstr "B (erforderlich)" # type: Plain text #. type: Plain text -#: en/linkchecker.1:299 +#: en/linkchecker.1:267 msgid "Sets the domain the cookies are valid for." msgstr "Setzt die Domäne für die die Cookies gültig sind." # type: TP #. type: TP -#: en/linkchecker.1:299 +#: en/linkchecker.1:267 #, no-wrap msgid "B (optional)" msgstr "B (optional)" # type: Plain text #. type: Plain text -#: en/linkchecker.1:302 +#: en/linkchecker.1:270 msgid "Gives the path the cookies are value for; default path is B." msgstr "Gibt den Pfad für den die Cookies gültig sind; Standardpfad ist B." # type: TP #. type: TP -#: en/linkchecker.1:302 -#, no-wrap -msgid "B (optional)" -msgstr "B (optional)" +#: en/linkchecker.1:270 +#, fuzzy, no-wrap +#| msgid "B (required)" +msgid "B (required)" +msgstr "B (erforderlich)" # type: Plain text #. type: Plain text -#: en/linkchecker.1:305 +#: en/linkchecker.1:273 msgid "Set cookie name/value. Can be given more than once." msgstr "Setzt den Cookie Name/Wert. Kann mehrmals angegeben werden." # type: Plain text #. type: Plain text -#: en/linkchecker.1:307 +#: en/linkchecker.1:275 msgid "Multiple entries are separated by a blank line." msgstr "Mehrere Einträge sind durch eine Leerzeile zu trennen." # type: Plain text #. type: Plain text -#: en/linkchecker.1:311 +#: en/linkchecker.1:279 msgid "" "The example below will send two cookies to all URLs starting with B and one to all URLs starting with B, B)" msgstr "HTTP Verknüpfungen (B, B)" #. type: Plain text -#: en/linkchecker.1:358 +#: en/linkchecker.1:324 +#, fuzzy +#| msgid "" +#| "After connecting to the given HTTP server the given path or query is " +#| "requested. All redirections are followed, and if user/password is given " +#| "it will be used as authorization when necessary. Permanently moved pages " +#| "issue a warning. All final HTTP status codes other than 2xx are errors." msgid "" "After connecting to the given HTTP server the given path or query is " "requested. All redirections are followed, and if user/password is given it " -"will be used as authorization when necessary. Permanently moved pages issue " -"a warning. All final HTTP status codes other than 2xx are errors." +"will be used as authorization when necessary. All final HTTP status codes " +"other than 2xx are errors." msgstr "" "Nach Verbinden zu dem gegebenen HTTP-Server wird der eingegebene Pfad oder " "Query angefordert. Alle Umleitungen werden verfolgt, und falls ein Benutzer/" @@ -1330,18 +1223,18 @@ msgstr "" "Fehler ausgegeben." #. type: Plain text -#: en/linkchecker.1:360 +#: en/linkchecker.1:326 msgid "HTML page contents are checked for recursion." msgstr "Der Inhalt von HTML-Seiten wird rekursiv geprüft." #. type: TP -#: en/linkchecker.1:360 +#: en/linkchecker.1:326 #, no-wrap msgid "Local files (B)" msgstr "Lokale Dateien (B)" #. type: Plain text -#: en/linkchecker.1:365 +#: en/linkchecker.1:331 msgid "" "A regular, readable file that can be opened is valid. A readable directory " "is also valid. All other files, for example device files, unreadable or non-" @@ -1353,18 +1246,18 @@ msgstr "" "Fehler." #. type: Plain text -#: en/linkchecker.1:367 +#: en/linkchecker.1:333 msgid "HTML or other parseable file contents are checked for recursion." msgstr "HTML- oder andere untersuchbare Dateiinhalte werden rekursiv geprüft." #. type: TP -#: en/linkchecker.1:367 +#: en/linkchecker.1:333 #, no-wrap msgid "Mail links (B)" msgstr "Mail-Links (B)" #. type: Plain text -#: en/linkchecker.1:372 +#: en/linkchecker.1:338 msgid "" "A mailto: link eventually resolves to a list of email addresses. If one " "address fails, the whole list will fail. For each mail address we check the " @@ -1375,7 +1268,7 @@ msgstr "" "Mail-Adresse werden die folgenden Dinge geprüft:" #. type: Plain text -#: en/linkchecker.1:382 +#: en/linkchecker.1:348 #, no-wrap msgid "" " 1) Check the adress syntax, both of the part before and after\n" @@ -1396,19 +1289,19 @@ msgstr "" " 4) Versuche, die Adresse mit dem VRFY-Befehl zu verifizieren. Falls eine Antwort kommt, wird die verifizierte Adresse als Information ausgegeben.\n" #. type: TP -#: en/linkchecker.1:382 +#: en/linkchecker.1:348 #, no-wrap msgid "FTP links (B)" msgstr "FTP-Links (B)" #. type: Plain text -#: en/linkchecker.1:386 +#: en/linkchecker.1:352 #, no-wrap msgid " For FTP links we do:\n" msgstr "Für FTP-Links wird Folgendes geprüft:\n" #. type: Plain text -#: en/linkchecker.1:392 +#: en/linkchecker.1:358 #, no-wrap msgid "" " 1) connect to the specified host\n" @@ -1423,13 +1316,13 @@ msgstr "" " 4) Liste die Dateien im Verzeichnis auf mit dem NLST-Befehl\n" #. type: TP -#: en/linkchecker.1:393 +#: en/linkchecker.1:359 #, no-wrap msgid "Telnet links (``telnet:``)" msgstr "Telnet-Links (``telnet:``)" #. type: Plain text -#: en/linkchecker.1:398 +#: en/linkchecker.1:364 #, no-wrap msgid "" " We try to connect and if user/password are given, login to the\n" @@ -1437,13 +1330,13 @@ msgid "" msgstr " Versuche, zu dem angegeben Telnetrechner zu verginden und falls Benutzer/Passwort angegeben sind, wird versucht, sich anzumelden.\n" #. type: TP -#: en/linkchecker.1:399 +#: en/linkchecker.1:365 #, no-wrap msgid "NNTP links (``news:``, ``snews:``, ``nntp``)" msgstr "NNTP-Links (``news:``, ``snews:``, ``nntp``)" #. type: Plain text -#: en/linkchecker.1:404 +#: en/linkchecker.1:370 #, no-wrap msgid "" " We try to connect to the given NNTP server. If a news group or\n" @@ -1451,13 +1344,13 @@ msgid "" msgstr " Versuche, zu dem angegebenen NNTP-Rechner eine Verbindung aufzubaucne. Falls eine Nachrichtengruppe oder ein bestimmter Artikel angegeben ist, wird versucht, diese Gruppe oder diesen Artikel vom Rechner anzufragen.\n" #. type: TP -#: en/linkchecker.1:405 +#: en/linkchecker.1:371 #, no-wrap msgid "Unsupported links (``javascript:``, etc.)" msgstr "Nicht unterstützte Links (``javascript:``, etc.)" #. type: Plain text -#: en/linkchecker.1:410 +#: en/linkchecker.1:376 #, no-wrap msgid "" " An unsupported link will only print a warning. No further checking\n" @@ -1465,7 +1358,7 @@ msgid "" msgstr " Ein nicht unterstützter Link wird nur eine Warnung ausgeben. Weitere Prüfungen werden nicht durchgeführt.\n" #. type: Plain text -#: en/linkchecker.1:414 +#: en/linkchecker.1:380 #, no-wrap msgid "" " The complete list of recognized, but unsupported links can be found\n" @@ -1475,15 +1368,48 @@ msgstr "" " Die komplette Liste von erkannten, aber nicht unterstützten Links ist in der\n" " Quelldatei B. Die bekanntesten davon dürften JavaScript-Links sein.\n" +#. type: SH +#: en/linkchecker.1:381 en/linkcheckerrc.5:444 +#, no-wrap +msgid "PLUGINS" +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:383 +msgid "There are two plugin types: connection and content plugins." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:386 +msgid "" +"Connection plugins are run after a successful connection to the URL host." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:390 +msgid "" +"Content plugins are run if the URL type has content (mailto: URLs have no " +"content for example) and if the check is not forbidden (ie. by HTTP robots." +"txt)." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:394 +msgid "" +"See B for a list of plugins and their " +"documentation. All plugins are enabled via the B(5) " +"configuration file." +msgstr "" + # type: SH #. type: SH -#: en/linkchecker.1:415 +#: en/linkchecker.1:395 #, no-wrap msgid "RECURSION" msgstr "Rekursion" #. type: Plain text -#: en/linkchecker.1:418 +#: en/linkchecker.1:398 msgid "" "Before descending recursively into a URL, it has to fulfill several " "conditions. They are checked in this order:" @@ -1492,12 +1418,12 @@ msgstr "" "erfüllen. Diese werden in folgender Reihenfolge geprüft:" #. type: Plain text -#: en/linkchecker.1:420 +#: en/linkchecker.1:400 msgid "1. A URL must be valid." msgstr "1. Eine URL muss gültig sein." #. type: Plain text -#: en/linkchecker.1:426 +#: en/linkchecker.1:406 #, no-wrap msgid "" "2. A URL must be parseable. This currently includes HTML files,\n" @@ -1508,7 +1434,7 @@ msgid "" msgstr "2. Der URL-Inhalt muss analysierbar sein. Dies beinhaltet zur Zeit HTML-Dateien, Opera Lesezeichen, und Verzeichnisse. Falls ein Dateityp nicht erkannt wird, (zum Beispiel weil er keine bekannte HTML-Dateierweiterung besitzt, und der Inhalt nicht nach HTML aussieht), wird der Inhalt als nicht analysierbar angesehen.\n" #. type: Plain text -#: en/linkchecker.1:429 +#: en/linkchecker.1:409 #, no-wrap msgid "" "3. The URL content must be retrievable. This is usually the case\n" @@ -1516,7 +1442,7 @@ msgid "" msgstr "3. Der URL-Inhalt muss ladbar sein. Dies ist normalerweise der Fall, mit Ausnahme von mailto: oder unbekannten URL-Typen.\n" #. type: Plain text -#: en/linkchecker.1:432 +#: en/linkchecker.1:412 #, no-wrap msgid "" "4. The maximum recursion level must not be exceeded. It is configured\n" @@ -1524,7 +1450,7 @@ msgid "" msgstr "4. Die maximale Rekursionstiefe darf nicht überschritten werden. Diese wird mit der Option B<--recursion-level> konfiguriert und ist standardmäßig nicht limitiert.\n" #. type: Plain text -#: en/linkchecker.1:435 +#: en/linkchecker.1:415 #, no-wrap msgid "" "5. It must not match the ignored URL list. This is controlled with\n" @@ -1532,7 +1458,7 @@ msgid "" msgstr "5. Die URL darf nicht in der Liste von ignorierten URLs sein. Die ignorierten URLs werden mit der Option B<--ignore-url> konfiguriert.\n" #. type: Plain text -#: en/linkchecker.1:439 +#: en/linkchecker.1:419 #, no-wrap msgid "" "6. The Robots Exclusion Protocol must allow links in the URL to be\n" @@ -1541,7 +1467,7 @@ msgid "" msgstr "6. Das Robots Exclusion Protocol muss es erlauben, dass Verknüpfungen in der URL rekursiv verfolgt werden können. Dies wird geprüft, indem in den HTML Kopfdaten nach der \"nofollow\"-Direktive gesucht wird.\n" #. type: Plain text -#: en/linkchecker.1:442 +#: en/linkchecker.1:422 msgid "" "Note that the directory recursion reads all files in that directory, not " "just a subset like B." @@ -1551,14 +1477,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:443 +#: en/linkchecker.1:423 #, no-wrap msgid "NOTES" msgstr "BEMERKUNGEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:448 +#: en/linkchecker.1:428 msgid "" "URLs on the commandline starting with B are treated like B, URLs starting with B are treated like B. You can also " @@ -1570,7 +1496,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:453 +#: en/linkchecker.1:433 msgid "" "If you have your system configured to automatically establish a connection " "to the internet (e.g. with diald), it will connect when checking links not " @@ -1583,13 +1509,13 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:455 +#: en/linkchecker.1:435 msgid "Javascript links are not supported." msgstr "Javascript Links werden nicht unterstützt." # type: Plain text #. type: Plain text -#: en/linkchecker.1:458 +#: en/linkchecker.1:438 msgid "" "If your platform does not support threading, LinkChecker disables it " "automatically." @@ -1599,7 +1525,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:460 +#: en/linkchecker.1:440 msgid "You can supply multiple user/password pairs in a configuration file." msgstr "" "Sie können mehrere Benutzer/Passwort Paare in einer Konfigurationsdatei " @@ -1607,7 +1533,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:463 +#: en/linkchecker.1:443 msgid "" "When checking B links the given NNTP host doesn't need to be the same " "as the host of the user browsing your pages." @@ -1617,31 +1543,31 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:464 +#: en/linkchecker.1:444 #, no-wrap msgid "ENVIRONMENT" msgstr "UMGEBUNG" # type: Plain text #. type: Plain text -#: en/linkchecker.1:466 +#: en/linkchecker.1:446 msgid "B - specifies default NNTP server" msgstr "B - gibt Standard NNTP Server an" # type: Plain text #. type: Plain text -#: en/linkchecker.1:468 +#: en/linkchecker.1:448 msgid "B - specifies default HTTP proxy server" msgstr "B - gibt Standard HTTP Proxy an" # type: Plain text #. type: Plain text -#: en/linkchecker.1:470 +#: en/linkchecker.1:450 msgid "B - specifies default FTP proxy server" msgstr "B - gibt Standard FTP Proxy an" #. type: Plain text -#: en/linkchecker.1:472 +#: en/linkchecker.1:452 msgid "" "B - comma-separated list of domains to not contact over a proxy " "server" @@ -1651,63 +1577,63 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:474 +#: en/linkchecker.1:454 msgid "B, B, B - specify output language" msgstr "B, B, B - gibt Ausgabesprache an" # type: SH #. type: SH -#: en/linkchecker.1:475 +#: en/linkchecker.1:455 #, no-wrap msgid "RETURN VALUE" msgstr "RÜCKGABEWERT" # type: Plain text #. type: Plain text -#: en/linkchecker.1:477 +#: en/linkchecker.1:457 msgid "The return value is 2 when" msgstr "Der Rückgabewert ist 2 falls" # type: Plain text #. type: Plain text -#: en/linkchecker.1:479 +#: en/linkchecker.1:459 msgid "a program error occurred." msgstr "ein Programmfehler aufgetreten ist." # type: Plain text #. type: Plain text -#: en/linkchecker.1:481 +#: en/linkchecker.1:461 msgid "The return value is 1 when" msgstr "Der Rückgabewert ist 1 falls" # type: Plain text #. type: Plain text -#: en/linkchecker.1:483 +#: en/linkchecker.1:463 msgid "invalid links were found or" msgstr "ungültige Verknüpfungen gefunden wurden oder" # type: Plain text #. type: Plain text -#: en/linkchecker.1:485 +#: en/linkchecker.1:465 msgid "link warnings were found and warnings are enabled" msgstr "Warnungen gefunden wurden und Warnungen aktiviert sind" # type: Plain text #. type: Plain text -#: en/linkchecker.1:487 +#: en/linkchecker.1:467 msgid "Else the return value is zero." msgstr "Sonst ist der Rückgabewert Null." # type: SH #. type: SH -#: en/linkchecker.1:488 +#: en/linkchecker.1:468 #, no-wrap msgid "LIMITATIONS" msgstr "LIMITIERUNGEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:492 +#: en/linkchecker.1:472 msgid "" "LinkChecker consumes memory for each queued URL to check. With thousands of " "queued URLs the amount of consumed memory can become quite large. This might " @@ -1720,33 +1646,33 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:493 +#: en/linkchecker.1:473 #, no-wrap msgid "FILES" msgstr "DATEIEN" # type: Plain text #. type: Plain text -#: en/linkchecker.1:495 +#: en/linkchecker.1:475 msgid "B<~/.linkchecker/linkcheckerrc> - default configuration file" msgstr "B<~/.linkchecker/linkcheckerrc> - Standardkonfigurationsdatei" # type: Plain text #. type: Plain text -#: en/linkchecker.1:497 +#: en/linkchecker.1:477 msgid "B<~/.linkchecker/blacklist> - default blacklist logger output filename" msgstr "" "B<~/.linkchecker/blacklist> - Standard Dateiname der blacklist Logger Ausgabe" # type: Plain text #. type: Plain text -#: en/linkchecker.1:499 +#: en/linkchecker.1:479 msgid "BI - default logger file output name" msgstr "BI - Standard Dateiname der Logausgabe" # type: Plain text #. type: Plain text -#: en/linkchecker.1:501 +#: en/linkchecker.1:481 msgid "" "B - valid " "output encodings" @@ -1756,7 +1682,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkchecker.1:503 +#: en/linkchecker.1:483 msgid "" "B - regular expression documentation" msgstr "" @@ -1765,40 +1691,40 @@ msgstr "" # type: SH #. type: SH -#: en/linkchecker.1:504 en/linkcheckerrc.5:614 en/linkchecker-gui.1:16 +#: en/linkchecker.1:484 en/linkcheckerrc.5:538 en/linkchecker-gui.1:16 #, no-wrap msgid "SEE ALSO" msgstr "SIEHE AUCH" # type: TH #. type: Plain text -#: en/linkchecker.1:506 +#: en/linkchecker.1:486 msgid "B(5)" msgstr "B(5)" # type: SH #. type: SH -#: en/linkchecker.1:507 en/linkcheckerrc.5:617 en/linkchecker-gui.1:19 +#: en/linkchecker.1:487 en/linkcheckerrc.5:541 en/linkchecker-gui.1:19 #, no-wrap msgid "AUTHOR" msgstr "AUTHOR" # type: Plain text #. type: Plain text -#: en/linkchecker.1:509 en/linkcheckerrc.5:619 en/linkchecker-gui.1:21 +#: en/linkchecker.1:489 en/linkcheckerrc.5:543 en/linkchecker-gui.1:21 msgid "Bastian Kleineidam Ebastian.kleineidam@web.deE" msgstr "Bastian Kleineidam Ebastian.kleineidam@web.deE" # type: SH #. type: SH -#: en/linkchecker.1:510 en/linkcheckerrc.5:620 en/linkchecker-gui.1:22 +#: en/linkchecker.1:490 en/linkcheckerrc.5:544 en/linkchecker-gui.1:22 #, no-wrap msgid "COPYRIGHT" msgstr "COPYRIGHT" # type: Plain text #. type: Plain text -#: en/linkchecker.1:511 en/linkcheckerrc.5:621 +#: en/linkchecker.1:491 en/linkcheckerrc.5:545 msgid "Copyright \\(co 2000-2014 Bastian Kleineidam" msgstr "Copyright \\(co 2000-2014 Bastian Kleineidam" @@ -1850,160 +1776,52 @@ msgstr "EIGENSCHAFTEN" # type: SS #. type: SS -#: en/linkcheckerrc.5:13 +#: en/linkcheckerrc.5:12 #, no-wrap msgid "[checking]" msgstr "[checking]" # type: TP #. type: TP -#: en/linkcheckerrc.5:14 +#: en/linkcheckerrc.5:13 #, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" +msgid "BI" +msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:20 -msgid "Command line option: B<--anchors>" -msgstr "Kommandozeilenoption: B<--anchors>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:20 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" +#: en/linkcheckerrc.5:17 +msgid "" +"Read a file with initial cookie data. The cookie data format is explained in " +"linkchecker(1)." +msgstr "" +"Lese eine Datei mit Cookie-Daten. Das Cookie Datenformat wird in linkchecker" +"(1) erklärt." # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:25 -msgid "Command line option: B<--check-css>" -msgstr "Kommandozeilenoption: B<--check-css>" +#: en/linkcheckerrc.5:19 +msgid "Command line option: B<--cookiefile>" +msgstr "Kommandozeilenoption: B<--cookiefile>" # type: TP #. type: TP -#: en/linkcheckerrc.5:25 +#: en/linkcheckerrc.5:19 #, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" +msgid "BI" +msgstr "BI" -# type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:30 -msgid "Command line option: B<--check-html>" -msgstr "Kommandozeilenoption: B<--check-html>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:30 -#, no-wrap -msgid "BI" -msgstr "BI" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:33 -msgid "Filename of B config file." -msgstr "Dateiname von B Konfigurationsdatei." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:35 en/linkcheckerrc.5:56 en/linkcheckerrc.5:66 -#: en/linkcheckerrc.5:134 en/linkcheckerrc.5:144 en/linkcheckerrc.5:153 -#: en/linkcheckerrc.5:161 en/linkcheckerrc.5:168 en/linkcheckerrc.5:175 -#: en/linkcheckerrc.5:182 en/linkcheckerrc.5:194 en/linkcheckerrc.5:200 -#: en/linkcheckerrc.5:313 en/linkcheckerrc.5:330 -msgid "Command line option: none" -msgstr "Kommandozeilenoption: keine" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:35 -#, no-wrap -msgid "BI" -msgstr "BI" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:39 -msgid "" -"Read a file with initial cookie data. The cookie data format is explained in " -"linkchecker(1)." -msgstr "" -"Lese eine Datei mit Cookie-Daten. Das Cookie Datenformat wird in linkchecker" -"(1) erklärt." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:41 -msgid "Command line option: B<--cookiefile>" -msgstr "Kommandozeilenoption: B<--cookiefile>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:41 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" - -#. type: Plain text -#: en/linkcheckerrc.5:44 -msgid "Accept and send HTTP cookies." -msgstr "Akzeptiere und sende HTTP cookies." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:46 -msgid "Command line option: B<--cookies>" -msgstr "Kommandozeilenoption: B<--cookies>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:46 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" - -#. type: Plain text -#: en/linkcheckerrc.5:51 -msgid "" -"When checking finishes, write a memory dump to a temporary file. The memory " -"dump is written both when checking finishes normally and when checking gets " -"canceled." -msgstr "" -"Schreibe einen Speicherabzug in eine temporäre Datei wenn die Prüfung endet. " -"Der Speicherabzug wird sowohl beim normalen Beenden der Prüfung als auch " -"wenn die Prüfung abgebrochen wird geschrieben." - -#. type: Plain text -#: en/linkcheckerrc.5:54 -msgid "" -"The memory dump only works if the python-meliae package is installed. " -"Otherwise a warning is printed to install it." -msgstr "" -"Der Speicherabzug funktioniert nur falls das Paket python-meliae installiert " -"ist. Andernfalls wird eine Warnung angezeigt mit dem Hinweis dieses Paket zu " -"installieren." - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:56 -#, no-wrap -msgid "BI" -msgstr "BI" - -#. type: Plain text -#: en/linkcheckerrc.5:60 -msgid "" -"When checking absolute URLs inside local files, the given root directory is " -"used as base URL." -msgstr "" -"Beim Prüfen von absoluten URLs in lokalen Dateien wird das angegebene " -"Wurzelverzeichnis als Basis-URL benutzt." +#: en/linkcheckerrc.5:23 +msgid "" +"When checking absolute URLs inside local files, the given root directory is " +"used as base URL." +msgstr "" +"Beim Prüfen von absoluten URLs in lokalen Dateien wird das angegebene " +"Wurzelverzeichnis als Basis-URL benutzt." #. type: Plain text -#: en/linkcheckerrc.5:64 +#: en/linkcheckerrc.5:27 msgid "" "Note that the given directory must have URL syntax, so it must use a slash " "to join directories instead of a backslash. And the given directory must " @@ -2014,153 +1832,103 @@ msgstr "" "Aneinanderfügen von Verzeichnissen benutzen. Und das angegebene Verzeichnis " "muss mit einem Schrägstrich enden." +# type: Plain text +#. type: Plain text +#: en/linkcheckerrc.5:29 en/linkcheckerrc.5:77 en/linkcheckerrc.5:86 +#: en/linkcheckerrc.5:94 en/linkcheckerrc.5:112 en/linkcheckerrc.5:118 +#: en/linkcheckerrc.5:229 en/linkcheckerrc.5:246 +msgid "Command line option: none" +msgstr "Kommandozeilenoption: keine" + # type: TP #. type: TP -#: en/linkcheckerrc.5:66 +#: en/linkcheckerrc.5:29 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:73 +#: en/linkcheckerrc.5:36 msgid "Command line option: B<--nntp-server>" msgstr "Kommandozeilenoption: B<--nntp-server>" # type: TP #. type: TP -#: en/linkcheckerrc.5:73 -#, no-wrap -msgid "BI" -msgstr "BI" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:77 -msgid "" -"Pause the given number of seconds between two subsequent connection requests " -"to the same host." -msgstr "" -"Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander " -"folgenden Verbindungen zum demselben Rechner." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:79 -msgid "Command line option: B<--pause>" -msgstr "Kommandozeilenoption: B<--pause>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:79 +#: en/linkcheckerrc.5:36 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:86 +#: en/linkcheckerrc.5:43 msgid "Command line option: B<--recursion-level>" msgstr "Kommandozeilenoption: B<--recursion-level>" # type: TP #. type: TP -#: en/linkcheckerrc.5:86 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:91 -msgid "Command line option: B<--scan-virus>" -msgstr "Kommandozeilenoption: B<--scan-virus>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:91 +#: en/linkcheckerrc.5:43 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:97 +#: en/linkcheckerrc.5:49 msgid "Command line option: B<--threads>" msgstr "Kommandozeilenoption: B<--threads>" # type: TP #. type: TP -#: en/linkcheckerrc.5:97 +#: en/linkcheckerrc.5:49 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:103 +#: en/linkcheckerrc.5:55 en/linkcheckerrc.5:62 msgid "Command line option: B<--timeout>" msgstr "Kommandozeilenoption: B<--timeout>" # type: TP #. type: TP -#: en/linkcheckerrc.5:103 -#, no-wrap -msgid "BI" -msgstr "BI" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:110 -msgid "Command line option: B<--user-agent>" -msgstr "Kommandozeilenoption: B<--user-agent>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:110 -#, no-wrap -msgid "B=I" -msgstr "B=I" +#: en/linkcheckerrc.5:55 +#, fuzzy, no-wrap +#| msgid "BI" +msgid "BI" +msgstr "BI" -# type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:118 +#: en/linkcheckerrc.5:60 msgid "" -"Use this to check for pages that contain some form of error, for example " -"\"This page has moved\" or \"Oracle Application Server error\"." +"Time to wait for checks to finish after the user aborts the first time (with " +"Ctrl-C or the abort button). The default abort timeout is 300 seconds." msgstr "" -"Benutzen Sie dies, um nach Seiten zu suchen, welche bestimmte Fehler " -"enthalten, zum Beispiel \"Diese Seite wurde entfernt\" oder \"Oracle " -"Applikationsfehler\"." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:120 -msgid "Command line option: B<--warning-regex>" -msgstr "Kommandozeilenoption: B<--warning-regex>" # type: TP #. type: TP -#: en/linkcheckerrc.5:120 +#: en/linkcheckerrc.5:62 #, no-wrap -msgid "BI" -msgstr "BI" +msgid "BI" +msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:126 -msgid "Command line option: B<--warning-size-bytes>" -msgstr "Kommandozeilenoption: B<--warning-size-bytes>" +#: en/linkcheckerrc.5:69 +msgid "Command line option: B<--user-agent>" +msgstr "Kommandozeilenoption: B<--user-agent>" # type: TP #. type: TP -#: en/linkcheckerrc.5:126 +#: en/linkcheckerrc.5:69 #, no-wrap msgid "B[B<0>|B<1>|I]" msgstr "B[B<0>|B<1>|I]" #. type: Plain text -#: en/linkcheckerrc.5:132 +#: en/linkcheckerrc.5:75 msgid "" "If set to zero disables SSL certificate checking. If set to one (the " "default) enables SSL certificate checking with the provided CA certificate " @@ -2173,36 +1941,13 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:134 -#, no-wrap -msgid "BI" -msgstr "BI" - -#. type: Plain text -#: en/linkcheckerrc.5:140 -msgid "" -"Check that SSL certificates are at least the given number of days valid. " -"The number must not be negative. If the number of days is zero a warning is " -"printed only for certificates that are already expired." -msgstr "" -"Prüfe ob SSL-Zertifikate mindestens die angegebene Anzahl an Tagen gültig " -"sind. Die Anzahl darf nicht negativ sein. Falls die Anzahl Null ist wird " -"eine Warnung nur für Zertifikate ausgegeben, die schon abgelaufen sind." - -#. type: Plain text -#: en/linkcheckerrc.5:142 -msgid "The default number of days is 14." -msgstr "The Standardanzahl an Tagen ist 14." - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:144 +#: en/linkcheckerrc.5:77 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:149 +#: en/linkcheckerrc.5:82 msgid "" "Stop checking new URLs after the given number of seconds. Same as if the " "user stops (by hitting Ctrl-C or clicking the abort buttin in the GUI) " @@ -2214,19 +1959,19 @@ msgstr "" "GUI)." #. type: Plain text -#: en/linkcheckerrc.5:151 +#: en/linkcheckerrc.5:84 msgid "The default is not to stop until all URLs are checked." msgstr "Standard ist nicht zu stoppen bis alle URLs geprüft sind." # type: TP #. type: TP -#: en/linkcheckerrc.5:153 +#: en/linkcheckerrc.5:86 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:157 +#: en/linkcheckerrc.5:90 msgid "" "Maximum number of URLs to check. New URLs will not be queued after the given " "number of URLs is checked." @@ -2235,73 +1980,53 @@ msgstr "" "angenommen nachdem die angegebene Anzahl von URLs geprüft wurde." #. type: Plain text -#: en/linkcheckerrc.5:159 +#: en/linkcheckerrc.5:92 msgid "The default is to queue and check all URLs." msgstr "Standard ist alle URLs anzunehmen und zu prüfen." # type: TP #. type: TP -#: en/linkcheckerrc.5:161 -#, no-wrap -msgid "BI" -msgstr "BI" - -#. type: Plain text -#: en/linkcheckerrc.5:164 -msgid "Maximum number of connections to HTTP servers." -msgstr "Maximale Anzahl an HTTP-Verbindungen." - -#. type: Plain text -#: en/linkcheckerrc.5:166 en/linkcheckerrc.5:173 -msgid "The default is 10." -msgstr "Der Standard ist 10." - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:168 -#, no-wrap -msgid "BI" -msgstr "BI" +#: en/linkcheckerrc.5:94 +#, fuzzy, no-wrap +#| msgid "BI" +msgid "BI" +msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:171 -msgid "Maximum number of connections to HTTPS servers." -msgstr "Maximale Anzahl an HTTPS-Verbindungen." +#: en/linkcheckerrc.5:97 +msgid "Limit the maximum number of requests per second to one host." +msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:175 -#, no-wrap -msgid "BI" -msgstr "BI" - -#. type: Plain text -#: en/linkcheckerrc.5:178 -msgid "Maximum number of connections to FTP servers." -msgstr "Maximale Anzahl an FTP-Verbindungen." +#: en/linkcheckerrc.5:97 +#, fuzzy, no-wrap +#| msgid "BI[B<,>I...]" +msgid "BI[B<,>I...]" +msgstr "BI[B<,>I...]" #. type: Plain text -#: en/linkcheckerrc.5:180 -msgid "The default is 2." -msgstr "Der Standard ist 2." +#: en/linkcheckerrc.5:100 +msgid "Allowed URL schemes as comma-separated list." +msgstr "" # type: SS #. type: SS -#: en/linkcheckerrc.5:182 +#: en/linkcheckerrc.5:100 #, no-wrap msgid "[filtering]" msgstr "[filtering]" # type: TP #. type: TP -#: en/linkcheckerrc.5:183 +#: en/linkcheckerrc.5:101 #, no-wrap msgid "BI (MULTILINE)" msgstr "BI (MULTILINE)" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:186 +#: en/linkcheckerrc.5:104 msgid "Only check syntax of URLs matching the given regular expressions." msgstr "" "Prüfe lediglich die Syntax von URLs, welche dem angegebenen regulären " @@ -2309,20 +2034,20 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:188 +#: en/linkcheckerrc.5:106 msgid "Command line option: B<--ignore-url>" msgstr "Kommandozeilenoption: B<--ignore-url>" # type: TP #. type: TP -#: en/linkcheckerrc.5:188 +#: en/linkcheckerrc.5:106 #, no-wrap msgid "BI[B<,>I...]" msgstr "BI[B<,>I...]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:192 +#: en/linkcheckerrc.5:110 msgid "" "Ignore the comma-separated list of warnings. See B for the list of " "supported warnings." @@ -2332,14 +2057,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:194 +#: en/linkcheckerrc.5:112 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:198 +#: en/linkcheckerrc.5:116 msgid "" "Regular expression to add more URLs recognized as internal links. Default " "is that URLs given on the command line are internal." @@ -2349,14 +2074,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:200 +#: en/linkcheckerrc.5:118 #, no-wrap msgid "BI (MULTILINE)" msgstr "BI (MULTILINE)" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:204 +#: en/linkcheckerrc.5:122 msgid "" "Check but do not recurse into URLs matching the given regular expressions." msgstr "" @@ -2365,27 +2090,48 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:206 +#: en/linkcheckerrc.5:124 msgid "Command line option: B<--no-follow-url>" msgstr "Kommandozeilenoption: B<--no-follow-url>" +# type: TP +#. type: TP +#: en/linkcheckerrc.5:124 +#, fuzzy, no-wrap +#| msgid "B[B<0>|B<1>]" +msgid "B[B<0>|B<1>]" +msgstr "B[B<0>|B<1>]" + +#. type: Plain text +#: en/linkcheckerrc.5:127 +msgid "Check external links. Default is to check internal links only." +msgstr "" + +# type: Plain text +#. type: Plain text +#: en/linkcheckerrc.5:129 +#, fuzzy +#| msgid "Command line option: B<--check-html>" +msgid "Command line option: B<--checkextern>" +msgstr "Kommandozeilenoption: B<--check-html>" + # type: SS #. type: SS -#: en/linkcheckerrc.5:206 +#: en/linkcheckerrc.5:129 #, no-wrap msgid "[authentication]" msgstr "[authentication]" # type: TP #. type: TP -#: en/linkcheckerrc.5:207 +#: en/linkcheckerrc.5:130 #, no-wrap msgid "BI I [I] (MULTILINE)" msgstr "BI I [I] (MULTILINE)" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:213 +#: en/linkcheckerrc.5:136 msgid "" "Provide different user/password pairs for different link types. Entries are " "a triple (URL regex, username, password) or a tuple (URL regex, username), " @@ -2397,7 +2143,7 @@ msgstr "" "Benutzername), wobei die Einträge durch Leerzeichen getrennt sind." #. type: Plain text -#: en/linkcheckerrc.5:216 +#: en/linkcheckerrc.5:139 msgid "" "The password is optional and if missing it has to be entered at the " "commandline." @@ -2407,7 +2153,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:222 +#: en/linkcheckerrc.5:145 msgid "" "If the regular expression matches the checked URL, the given user/password " "pair is used for authentication. The commandline options B<-u> and B<-p> " @@ -2423,23 +2169,27 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:224 +#: en/linkcheckerrc.5:147 msgid "Command line option: B<-u>, B<-p>" msgstr "Kommandozeilenoption: B<-u>, B<-p>" # type: TP #. type: TP -#: en/linkcheckerrc.5:224 +#: en/linkcheckerrc.5:147 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:229 +#: en/linkcheckerrc.5:151 +#, fuzzy +#| msgid "" +#| "A login URL to be visited before checking. Also needs authentication data " +#| "set for it, and implies using cookies because most logins use cookies " +#| "nowadays." msgid "" "A login URL to be visited before checking. Also needs authentication data " -"set for it, and implies using cookies because most logins use cookies " -"nowadays." +"set for it." msgstr "" "Eine Anmelde-URL, die vor der Prüfung besucht wird. Benötigt einen Eintrag " "zur Authentifizierung und impliziert die Benutzung von Cookies, weil die " @@ -2447,37 +2197,37 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:229 +#: en/linkcheckerrc.5:151 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:232 +#: en/linkcheckerrc.5:154 msgid "The name of the user CGI field. Default name is B." msgstr "Der Name für das Benutzer CGI-Feld. Der Standardname ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:232 +#: en/linkcheckerrc.5:154 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:235 +#: en/linkcheckerrc.5:157 msgid "The name of the password CGI field. Default name is B." msgstr "Der Name für das Passwort CGI-Feld. Der Standardname ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:235 +#: en/linkcheckerrc.5:157 #, no-wrap msgid "BIB<:>I (MULTILINE)" msgstr "BIB<:>I (MULTILINE)" #. type: Plain text -#: en/linkcheckerrc.5:239 +#: en/linkcheckerrc.5:161 msgid "" "Optionally any additional CGI name/value pairs. Note that the default values " "are submitted automatically." @@ -2487,44 +2237,21 @@ msgstr "" # type: SS #. type: SS -#: en/linkcheckerrc.5:239 +#: en/linkcheckerrc.5:161 #, no-wrap msgid "[output]" msgstr "[output]" # type: TP #. type: TP -#: en/linkcheckerrc.5:240 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "B[B<0>|B<1>]" - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:244 -msgid "" -"If set log all checked URLs, even duplicates. Default is to log duplicate " -"URLs only once." -msgstr "" -"Falls gesetzt, gebe alle geprüften URLs aus, sogar Duplikate. Standard ist " -"es, URLs nur einmal auszugeben." - -# type: Plain text -#. type: Plain text -#: en/linkcheckerrc.5:246 -msgid "Command line option: B<--complete>" -msgstr "Kommandozeilenoption: B<--complete>" - -# type: TP -#. type: TP -#: en/linkcheckerrc.5:246 +#: en/linkcheckerrc.5:162 #, no-wrap msgid "BI[B<,>I...]" msgstr "BI[B<,>I...]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:252 +#: en/linkcheckerrc.5:168 msgid "" "Print debugging output for the given loggers. Available loggers are " "B, B, B, B, B, B and B. " @@ -2536,20 +2263,20 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:254 +#: en/linkcheckerrc.5:170 msgid "Command line option: B<--debug>" msgstr "[output]" # type: TP #. type: TP -#: en/linkcheckerrc.5:254 +#: en/linkcheckerrc.5:170 #, no-wrap msgid "BI[B<,>I...]" msgstr "BI[B<,>I...]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:259 +#: en/linkcheckerrc.5:175 msgid "" "Output to a files BI, B<$HOME/.linkchecker/" "blacklist> for B output." @@ -2559,7 +2286,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:265 +#: en/linkcheckerrc.5:181 msgid "" "Valid file output types are B, B, B, B, B, " "B, B, B or B Default is no file output. The " @@ -2573,20 +2300,20 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:267 +#: en/linkcheckerrc.5:183 msgid "Command line option: B<--file-output>" msgstr "Kommandozeilenoption: B<--file-output>" # type: TP #. type: TP -#: en/linkcheckerrc.5:267 +#: en/linkcheckerrc.5:183 #, no-wrap msgid "BI[BI]" msgstr "BI[BI]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:273 +#: en/linkcheckerrc.5:189 msgid "" "Specify output type as B, B, B, B, B, B, " "B, B or B. Default type is B. The various " @@ -2598,7 +2325,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:277 +#: en/linkcheckerrc.5:193 msgid "" "The I specifies the output encoding, the default is that of your " "locale. Valid encodings are listed at B" msgstr "Kommandozeilenoption: B<--output>" # type: TP #. type: TP -#: en/linkcheckerrc.5:279 +#: en/linkcheckerrc.5:195 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "B[B<0>|B<1>]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:283 +#: en/linkcheckerrc.5:199 msgid "" "If set, operate quiet. An alias for B. This is only useful with " "B." @@ -2633,39 +2360,39 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:285 en/linkcheckerrc.5:295 +#: en/linkcheckerrc.5:201 en/linkcheckerrc.5:211 msgid "Command line option: B<--verbose>" msgstr "Kommandozeilenoption: B<--verbose>" # type: TP #. type: TP -#: en/linkcheckerrc.5:285 +#: en/linkcheckerrc.5:201 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "B[B<0>|B<1>]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:288 +#: en/linkcheckerrc.5:204 msgid "Control printing check status messages. Default is 1." msgstr "Kontrolle der Statusmeldungen. Standard ist 1." # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:290 +#: en/linkcheckerrc.5:206 msgid "Command line option: B<--no-status>" msgstr "Kommandozeilenoption: B<--no-status>" # type: TP #. type: TP -#: en/linkcheckerrc.5:290 +#: en/linkcheckerrc.5:206 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "B[B<0>|B<1>]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:293 +#: en/linkcheckerrc.5:209 msgid "" "If set log all checked URLs once. Default is to log only errors and warnings." msgstr "" @@ -2674,14 +2401,14 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:295 +#: en/linkcheckerrc.5:211 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "B[B<0>|B<1>]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:298 +#: en/linkcheckerrc.5:214 msgid "If set log warnings. Default is to log warnings." msgstr "" "Falls gesetzt, gebe keine Warnungen aus. Standard ist die Ausgabe von " @@ -2689,30 +2416,30 @@ msgstr "" # type: TP #. type: Plain text -#: en/linkcheckerrc.5:300 +#: en/linkcheckerrc.5:216 msgid "Command line option: B<--no-warnings>" msgstr "Kommandozeilenoption: B<--no-warnings>" # type: TP #. type: SS -#: en/linkcheckerrc.5:300 +#: en/linkcheckerrc.5:216 #, no-wrap msgid "[text]" msgstr "[text]" # type: TP #. type: TP -#: en/linkcheckerrc.5:301 en/linkcheckerrc.5:364 en/linkcheckerrc.5:374 -#: en/linkcheckerrc.5:384 en/linkcheckerrc.5:400 en/linkcheckerrc.5:416 -#: en/linkcheckerrc.5:447 en/linkcheckerrc.5:454 en/linkcheckerrc.5:464 -#: en/linkcheckerrc.5:474 +#: en/linkcheckerrc.5:217 en/linkcheckerrc.5:280 en/linkcheckerrc.5:290 +#: en/linkcheckerrc.5:300 en/linkcheckerrc.5:316 en/linkcheckerrc.5:332 +#: en/linkcheckerrc.5:363 en/linkcheckerrc.5:370 en/linkcheckerrc.5:380 +#: en/linkcheckerrc.5:390 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:305 +#: en/linkcheckerrc.5:221 msgid "" "Specify output filename for text logging. Default filename is B." @@ -2722,22 +2449,22 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:307 +#: en/linkcheckerrc.5:223 msgid "Command line option: B<--file-output=>" msgstr "Kommandozeilenoption: B<--file-output=>" # type: TP #. type: TP -#: en/linkcheckerrc.5:307 en/linkcheckerrc.5:367 en/linkcheckerrc.5:377 -#: en/linkcheckerrc.5:387 en/linkcheckerrc.5:403 en/linkcheckerrc.5:419 -#: en/linkcheckerrc.5:457 en/linkcheckerrc.5:467 en/linkcheckerrc.5:477 +#: en/linkcheckerrc.5:223 en/linkcheckerrc.5:283 en/linkcheckerrc.5:293 +#: en/linkcheckerrc.5:303 en/linkcheckerrc.5:319 en/linkcheckerrc.5:335 +#: en/linkcheckerrc.5:373 en/linkcheckerrc.5:383 en/linkcheckerrc.5:393 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:311 +#: en/linkcheckerrc.5:227 msgid "" "Comma-separated list of parts that have to be logged. See B " "below." @@ -2747,17 +2474,17 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:313 en/linkcheckerrc.5:370 en/linkcheckerrc.5:380 -#: en/linkcheckerrc.5:390 en/linkcheckerrc.5:406 en/linkcheckerrc.5:422 -#: en/linkcheckerrc.5:450 en/linkcheckerrc.5:460 en/linkcheckerrc.5:470 -#: en/linkcheckerrc.5:480 +#: en/linkcheckerrc.5:229 en/linkcheckerrc.5:286 en/linkcheckerrc.5:296 +#: en/linkcheckerrc.5:306 en/linkcheckerrc.5:322 en/linkcheckerrc.5:338 +#: en/linkcheckerrc.5:366 en/linkcheckerrc.5:376 en/linkcheckerrc.5:386 +#: en/linkcheckerrc.5:396 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:317 +#: en/linkcheckerrc.5:233 msgid "" "Valid encodings are listed in B." @@ -2766,20 +2493,20 @@ msgstr "" "library/codecs.html#standard-encodings>." #. type: Plain text -#: en/linkcheckerrc.5:319 +#: en/linkcheckerrc.5:235 msgid "Default encoding is B." msgstr "Die Standardenkodierung ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:319 +#: en/linkcheckerrc.5:235 #, no-wrap msgid "I" msgstr "I" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:328 +#: en/linkcheckerrc.5:244 msgid "" "Color settings for the various log parts, syntax is I or IB<;" ">I. The I can be B, B, B, B. The " @@ -2795,369 +2522,369 @@ msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:330 +#: en/linkcheckerrc.5:246 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:333 +#: en/linkcheckerrc.5:249 msgid "Set parent color. Default is B." msgstr "Setze Farbe des Vaters. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:333 +#: en/linkcheckerrc.5:249 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:336 +#: en/linkcheckerrc.5:252 msgid "Set URL color. Default is B." msgstr "Setze URL Farbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:336 +#: en/linkcheckerrc.5:252 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:339 +#: en/linkcheckerrc.5:255 msgid "Set name color. Default is B." msgstr "Kommandozeilenoption: B<--file-output=>" # type: TP #. type: TP -#: en/linkcheckerrc.5:339 +#: en/linkcheckerrc.5:255 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:342 +#: en/linkcheckerrc.5:258 msgid "Set real URL color. Default is B." msgstr "Setze Farbe für tatsächliche URL. Default ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:342 +#: en/linkcheckerrc.5:258 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:345 +#: en/linkcheckerrc.5:261 msgid "Set base URL color. Default is B." msgstr "Setzt Basisurl Farbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:345 +#: en/linkcheckerrc.5:261 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:348 +#: en/linkcheckerrc.5:264 msgid "Set valid color. Default is B." msgstr "Setze gültige Farbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:348 +#: en/linkcheckerrc.5:264 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:351 +#: en/linkcheckerrc.5:267 msgid "Set invalid color. Default is B." msgstr "Setze ungültige Farbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:351 +#: en/linkcheckerrc.5:267 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:354 +#: en/linkcheckerrc.5:270 msgid "Set info color. Default is B." msgstr "Setzt Informationsfarbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:354 +#: en/linkcheckerrc.5:270 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:357 +#: en/linkcheckerrc.5:273 msgid "Set warning color. Default is B." msgstr "Setze Warnfarbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:357 +#: en/linkcheckerrc.5:273 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:360 +#: en/linkcheckerrc.5:276 msgid "Set download time color. Default is B." msgstr "Setze Downloadzeitfarbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:360 +#: en/linkcheckerrc.5:276 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:363 +#: en/linkcheckerrc.5:279 msgid "Set reset color. Default is B." msgstr "Setze Reset Farbe. Standard ist B." # type: SS #. type: SS -#: en/linkcheckerrc.5:363 +#: en/linkcheckerrc.5:279 #, no-wrap msgid "[gml]" msgstr "[gml]" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:367 en/linkcheckerrc.5:370 en/linkcheckerrc.5:373 -#: en/linkcheckerrc.5:377 en/linkcheckerrc.5:380 en/linkcheckerrc.5:383 -#: en/linkcheckerrc.5:387 en/linkcheckerrc.5:390 en/linkcheckerrc.5:393 -#: en/linkcheckerrc.5:403 en/linkcheckerrc.5:406 en/linkcheckerrc.5:409 -#: en/linkcheckerrc.5:419 en/linkcheckerrc.5:422 en/linkcheckerrc.5:425 -#: en/linkcheckerrc.5:450 en/linkcheckerrc.5:453 en/linkcheckerrc.5:457 -#: en/linkcheckerrc.5:460 en/linkcheckerrc.5:463 en/linkcheckerrc.5:467 -#: en/linkcheckerrc.5:470 en/linkcheckerrc.5:473 en/linkcheckerrc.5:477 -#: en/linkcheckerrc.5:480 en/linkcheckerrc.5:483 +#: en/linkcheckerrc.5:283 en/linkcheckerrc.5:286 en/linkcheckerrc.5:289 +#: en/linkcheckerrc.5:293 en/linkcheckerrc.5:296 en/linkcheckerrc.5:299 +#: en/linkcheckerrc.5:303 en/linkcheckerrc.5:306 en/linkcheckerrc.5:309 +#: en/linkcheckerrc.5:319 en/linkcheckerrc.5:322 en/linkcheckerrc.5:325 +#: en/linkcheckerrc.5:335 en/linkcheckerrc.5:338 en/linkcheckerrc.5:341 +#: en/linkcheckerrc.5:366 en/linkcheckerrc.5:369 en/linkcheckerrc.5:373 +#: en/linkcheckerrc.5:376 en/linkcheckerrc.5:379 en/linkcheckerrc.5:383 +#: en/linkcheckerrc.5:386 en/linkcheckerrc.5:389 en/linkcheckerrc.5:393 +#: en/linkcheckerrc.5:396 en/linkcheckerrc.5:399 msgid "See [text] section above." msgstr "Siehe [text] Sektion weiter oben." # type: SS #. type: SS -#: en/linkcheckerrc.5:373 +#: en/linkcheckerrc.5:289 #, no-wrap msgid "[dot]" msgstr "[dot]" # type: SS #. type: SS -#: en/linkcheckerrc.5:383 +#: en/linkcheckerrc.5:299 #, no-wrap msgid "[csv]" msgstr "[csv]" # type: TP #. type: TP -#: en/linkcheckerrc.5:393 en/linkcheckerrc.5:412 +#: en/linkcheckerrc.5:309 en/linkcheckerrc.5:328 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:396 +#: en/linkcheckerrc.5:312 msgid "Set CSV separator. Default is a comma (B<,>)." msgstr "Das CSV Trennzeichen. Standard ist Komma (B<,>)." # type: TP #. type: TP -#: en/linkcheckerrc.5:396 +#: en/linkcheckerrc.5:312 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:399 +#: en/linkcheckerrc.5:315 msgid "Set CSV quote character. Default is a double quote (B<\">)." msgstr "" "Setze CSV Quotezeichen. Standard ist das doppelte Anführungszeichen (B<\">)." # type: SS #. type: SS -#: en/linkcheckerrc.5:399 +#: en/linkcheckerrc.5:315 #, no-wrap msgid "[sql]" msgstr "[sql]" # type: TP #. type: TP -#: en/linkcheckerrc.5:409 +#: en/linkcheckerrc.5:325 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:412 +#: en/linkcheckerrc.5:328 msgid "Set database name to store into. Default is B." msgstr "Setze Datenbankname zum Speichern. Standard ist B." # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:415 +#: en/linkcheckerrc.5:331 msgid "Set SQL command separator character. Default is a semicolor (B<;>)." msgstr "Setze SQL Kommandotrennzeichen. Standard ist ein Strichpunkt (B<;>)." # type: TP #. type: SS -#: en/linkcheckerrc.5:415 +#: en/linkcheckerrc.5:331 #, no-wrap msgid "[html]" msgstr "[html]" # type: TP #. type: TP -#: en/linkcheckerrc.5:425 +#: en/linkcheckerrc.5:341 #, no-wrap msgid "BI" msgstr "BI" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:428 +#: en/linkcheckerrc.5:344 msgid "Set HTML background color. Default is B<#fff7e5>." msgstr "Setze Reset Farbe. Standard ist B." # type: TP #. type: TP -#: en/linkcheckerrc.5:428 +#: en/linkcheckerrc.5:344 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:431 +#: en/linkcheckerrc.5:347 msgid "Set HTML URL color. Default is B<#dcd5cf>." msgstr "Setze HTML URL Farbe. Standard ist B<#dcd5cf>." # type: TP #. type: TP -#: en/linkcheckerrc.5:431 +#: en/linkcheckerrc.5:347 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:434 +#: en/linkcheckerrc.5:350 msgid "Set HTML border color. Default is B<#000000>." msgstr "Setze HTML Rahmenfarbe. Standard ist B<#000000>." # type: TP #. type: TP -#: en/linkcheckerrc.5:434 +#: en/linkcheckerrc.5:350 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:437 +#: en/linkcheckerrc.5:353 msgid "Set HTML link color. Default is B<#191c83>." msgstr "Setze HTML Verknüpfungsfarbe. Standard ist B<#191c83>." # type: TP #. type: TP -#: en/linkcheckerrc.5:437 +#: en/linkcheckerrc.5:353 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:440 +#: en/linkcheckerrc.5:356 msgid "Set HTML warning color. Default is B<#e0954e>." msgstr "Setze HTML Warnfarbe. Standard ist B<#e0954e>." # type: TP #. type: TP -#: en/linkcheckerrc.5:440 +#: en/linkcheckerrc.5:356 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:443 +#: en/linkcheckerrc.5:359 msgid "Set HTML error color. Default is B<#db4930>." msgstr "Setze HTML Fehlerfarbe. Standard ist B<#db4930>." # type: TP #. type: TP -#: en/linkcheckerrc.5:443 +#: en/linkcheckerrc.5:359 #, no-wrap msgid "B" msgstr "B" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:446 +#: en/linkcheckerrc.5:362 msgid "Set HTML valid color. Default is B<#3ba557>." msgstr "Setze HTML Gültigkeitsfarbe. Standard ist B<#3ba557>." # type: TP #. type: SS -#: en/linkcheckerrc.5:446 +#: en/linkcheckerrc.5:362 #, no-wrap msgid "[blacklist]" msgstr "[blacklist]" # type: SS #. type: SS -#: en/linkcheckerrc.5:453 +#: en/linkcheckerrc.5:369 #, no-wrap msgid "[xml]" msgstr "[xml]" # type: TP #. type: SS -#: en/linkcheckerrc.5:463 +#: en/linkcheckerrc.5:379 #, no-wrap msgid "[gxml]" msgstr "[gxml]" #. type: SS -#: en/linkcheckerrc.5:473 +#: en/linkcheckerrc.5:389 #, no-wrap msgid "[sitemap]" msgstr "[sitemap]" # type: TP #. type: TP -#: en/linkcheckerrc.5:483 +#: en/linkcheckerrc.5:399 #, no-wrap msgid "BI" msgstr "BI" #. type: Plain text -#: en/linkcheckerrc.5:487 +#: en/linkcheckerrc.5:403 msgid "" "A number between 0.0 and 1.0 determining the priority. The default priority " "for the first URL is 1.0, for all child URLs 0.5." @@ -3166,26 +2893,26 @@ msgstr "" "Standardpriorität für die erste URL ist 1.0, für alle Kind-URLs ist sie 0.5." #. type: TP -#: en/linkcheckerrc.5:487 +#: en/linkcheckerrc.5:403 #, no-wrap msgid "B[B|B|B|B|B|B|B]" msgstr "B[B|B|B|B|B|B|B]" #. type: Plain text -#: en/linkcheckerrc.5:490 +#: en/linkcheckerrc.5:406 msgid "The frequence pages are changing with." msgstr "Die Häufigkeit mit der Seiten sich ändern." # type: SH #. type: SH -#: en/linkcheckerrc.5:491 +#: en/linkcheckerrc.5:407 #, no-wrap msgid "LOGGER PARTS" msgstr "AUSGABE PARTS" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:507 +#: en/linkcheckerrc.5:423 #, no-wrap msgid "" " B (for all parts)\n" @@ -3222,14 +2949,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkcheckerrc.5:507 +#: en/linkcheckerrc.5:423 #, no-wrap msgid "MULTILINE" msgstr "MULTILINE" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:511 +#: en/linkcheckerrc.5:427 msgid "" "Some option values can span multiple lines. Each line has to be indented for " "that to work. Lines starting with a hash (B<#>) will be ignored, though they " @@ -3241,7 +2968,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:517 +#: en/linkcheckerrc.5:433 #, no-wrap msgid "" " ignore=\n" @@ -3257,14 +2984,14 @@ msgstr "" # type: SH #. type: SH -#: en/linkcheckerrc.5:518 +#: en/linkcheckerrc.5:434 #, no-wrap msgid "EXAMPLE" msgstr "BEISPIEL" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:521 +#: en/linkcheckerrc.5:437 #, no-wrap msgid "" " [output]\n" @@ -3275,7 +3002,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:524 +#: en/linkcheckerrc.5:440 #, no-wrap msgid "" " [checking]\n" @@ -3286,7 +3013,7 @@ msgstr "" # type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:527 +#: en/linkcheckerrc.5:443 #, no-wrap msgid "" " [filtering]\n" @@ -3295,327 +3022,333 @@ msgstr "" " [filtering]\n" " ignorewarnings=http-moved-permanent\n" -#. type: SH -#: en/linkcheckerrc.5:528 -#, no-wrap -msgid "WARNINGS" -msgstr "WARNUNGEN" - #. type: Plain text -#: en/linkcheckerrc.5:531 +#: en/linkcheckerrc.5:448 msgid "" -"The following warnings are recognized in the 'ignorewarnings' config file " -"entry:" +"All plugins have a separate section. If the section appears in the " +"configuration file the plugin is enabled. Some plugins read extra options " +"in their section." msgstr "" -"Die folgenden Warnungen werden vom Konfigurationseintrag 'ignorewarnings' " -"erkannt:" -#. type: TP -#: en/linkcheckerrc.5:532 -#, no-wrap -msgid "B" -msgstr "B" +# type: SS +#. type: SS +#: en/linkcheckerrc.5:449 +#, fuzzy, no-wrap +#| msgid "[checking]" +msgid "[AnchorCheck]" +msgstr "[checking]" #. type: Plain text -#: en/linkcheckerrc.5:535 -msgid "The file: URL is missing a trailing slash." -msgstr "Der file: URL fehlt ein abschließender Schrägstrich." +#: en/linkcheckerrc.5:451 +msgid "Checks validity of HTML anchors." +msgstr "" -#. type: TP -#: en/linkcheckerrc.5:535 +#. type: SS +#: en/linkcheckerrc.5:452 #, no-wrap -msgid "B" -msgstr "B" +msgid "[LocationInfo]" +msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:538 -msgid "The file: path is not the same as the system specific path." -msgstr "Der file: Pfad ist nicht derselbe wie der Systempfad." +#: en/linkcheckerrc.5:455 +msgid "" +"Adds the country and if possible city name of the URL host as info. Needs " +"GeoIP or pygeoip and a local country or city lookup DB installed." +msgstr "" -#. type: TP -#: en/linkcheckerrc.5:538 +#. type: SS +#: en/linkcheckerrc.5:456 #, no-wrap -msgid "B" -msgstr "B" +msgid "[RegexCheck]" +msgstr "" +# type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:541 -msgid "The ftp: URL is missing a trailing slash." -msgstr "Der ftp: URL fehlt ein abschließender Schrägstrich." +#: en/linkcheckerrc.5:460 +#, fuzzy +#| msgid "" +#| "Define a regular expression which prints a warning if it matches any " +#| "content of the checked link. This applies only to valid pages, so we can " +#| "get their content." +msgid "" +"Define a regular expression which prints a warning if it matches any content " +"of the checked link. This applies only to valid pages, so we can get their " +"content." +msgstr "" +"Definieren Sie einen regulären Ausdruck der eine Warnung ausgibt falls er " +"auf den Inhalt einer geprüften URL zutrifft. Dies gilt nur für gültige " +"Seiten deren Inhalt wir bekommen können." -#. type: TP -#: en/linkcheckerrc.5:541 +# type: Plain text +#. type: Plain text +#: en/linkcheckerrc.5:464 +#, fuzzy +#| msgid "" +#| "Use this to check for pages that contain some form of error, for example " +#| "\"This page has moved\" or \"Oracle Application error\"." +msgid "" +"Use this to check for pages that contain some form of error message, for " +"example 'This page has moved' or 'Oracle Application error'." +msgstr "" +"Benutzen Sie dies, um nach Seiten zu suchen, welche bestimmte Fehler " +"enthalten, zum Beispiel \"Diese Seite ist umgezogen\" oder \"Oracle " +"Applikationsfehler\"." + +#. type: SS +#: en/linkcheckerrc.5:468 #, no-wrap -msgid "B" -msgstr "B" +msgid "[SslCertificateCheck]" +msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:544 -msgid "Unsupported HTTP authentication method." -msgstr "Nicht unterstützte HTTP Authentifizierungsmethode." +#: en/linkcheckerrc.5:472 +msgid "" +"Check SSL certificate expiration date. Only internal https: links will be " +"checked. A domain will only be checked once to avoid duplicate warnings." +msgstr "" # type: TP #. type: TP -#: en/linkcheckerrc.5:544 +#: en/linkcheckerrc.5:472 +#, fuzzy, no-wrap +#| msgid "BI" +msgid "BI" +msgstr "BI" + +#. type: Plain text +#: en/linkcheckerrc.5:475 +msgid "Configures the expiration warning time in days." +msgstr "" + +#. type: SS +#: en/linkcheckerrc.5:476 #, no-wrap -msgid "B" -msgstr "B" +msgid "[HtmlSyntaxCheck]" +msgstr "" +# type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:547 -msgid "An error occurred while storing a cookie." -msgstr "Ein Fehler trat auf während des Speicherns eines Cookies." +#: en/linkcheckerrc.5:479 +#, fuzzy +#| msgid "Check syntax of HTML URLs with the W3C online validator." +msgid "" +"Check the syntax of HTML pages with the online W3C HTML validator. See " +"http://validator.w3.org/docs/api.html." +msgstr "Prüfe Syntax von HTML URLs mit dem W3C Online Validator." -#. type: TP -#: en/linkcheckerrc.5:547 +#. type: SS +#: en/linkcheckerrc.5:480 #, no-wrap -msgid "B" -msgstr "B" +msgid "[CssSyntaxCheck]" +msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:550 -msgid "An error occurred while decompressing the URL content." -msgstr "Ein Fehler trat beim Dekomprimieren des URL Inhalts auf." +#: en/linkcheckerrc.5:483 +msgid "" +"Check the syntax of HTML pages with the online W3C CSS validator. See " +"http://jigsaw.w3.org/css-validator/manual.html#expert." +msgstr "" -#. type: TP -#: en/linkcheckerrc.5:550 +#. type: SS +#: en/linkcheckerrc.5:484 #, no-wrap -msgid "B" -msgstr "B" +msgid "[VirusCheck]" +msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:553 -msgid "The URL had no content." -msgstr "Die URL besitzt keinen Inhalt." +#: en/linkcheckerrc.5:487 +msgid "" +"Checks the page content for virus infections with clamav. A local clamav " +"daemon must be installed." +msgstr "" +# type: TP #. type: TP -#: en/linkcheckerrc.5:553 +#: en/linkcheckerrc.5:487 #, no-wrap -msgid "B" -msgstr "B" +msgid "BI" +msgstr "BI" +# type: Plain text #. type: Plain text -#: en/linkcheckerrc.5:556 -msgid "The URL has moved permanently." -msgstr "Die URL wurde dauerhaft verschoben." +#: en/linkcheckerrc.5:490 +msgid "Filename of B config file." +msgstr "Dateiname von B Konfigurationsdatei." -#. type: TP -#: en/linkcheckerrc.5:556 +#. type: SH +#: en/linkcheckerrc.5:491 #, no-wrap -msgid "B" -msgstr "B" +msgid "WARNINGS" +msgstr "WARNUNGEN" #. type: Plain text -#: en/linkcheckerrc.5:559 -msgid "The http: URL checking has been denied." -msgstr "Die http: URL-Überprüfung wurde verweigert." +#: en/linkcheckerrc.5:494 +msgid "" +"The following warnings are recognized in the 'ignorewarnings' config file " +"entry:" +msgstr "" +"Die folgenden Warnungen werden vom Konfigurationseintrag 'ignorewarnings' " +"erkannt:" #. type: TP -#: en/linkcheckerrc.5:559 +#: en/linkcheckerrc.5:495 #, no-wrap -msgid "B" -msgstr "B" +msgid "B" +msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:562 -msgid "The URL content is encoded with an unknown encoding." -msgstr "Der URL-Inhalt ist in einer unbekannten Kodierung verfasst." +#: en/linkcheckerrc.5:498 +msgid "The file: URL is missing a trailing slash." +msgstr "Der file: URL fehlt ein abschließender Schrägstrich." #. type: TP -#: en/linkcheckerrc.5:562 +#: en/linkcheckerrc.5:498 #, no-wrap -msgid "B" -msgstr "B" +msgid "B" +msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:565 -msgid "The URL has been redirected to an URL of a different type." -msgstr "Die URL wurde zu einem anderen URL-Typ umgeleitet." +#: en/linkcheckerrc.5:501 +msgid "The file: path is not the same as the system specific path." +msgstr "Der file: Pfad ist nicht derselbe wie der Systempfad." -# type: TP #. type: TP -#: en/linkcheckerrc.5:565 +#: en/linkcheckerrc.5:501 #, no-wrap -msgid "B" -msgstr "B" +msgid "B" +msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:568 -msgid "The SSL certificate is invalid or expired." -msgstr "Das SSL-Zertifikat ist ungültig oder abgelaufen." +#: en/linkcheckerrc.5:504 +msgid "The ftp: URL is missing a trailing slash." +msgstr "Der ftp: URL fehlt ein abschließender Schrägstrich." # type: TP #. type: TP -#: en/linkcheckerrc.5:568 +#: en/linkcheckerrc.5:504 #, no-wrap -msgid "B" -msgstr "B" +msgid "B" +msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:571 -msgid "The URL has been ignored." -msgstr "Die URL wurde ignoriert." +#: en/linkcheckerrc.5:507 +msgid "An error occurred while storing a cookie." +msgstr "Ein Fehler trat auf während des Speicherns eines Cookies." #. type: TP -#: en/linkcheckerrc.5:571 +#: en/linkcheckerrc.5:507 #, no-wrap -msgid "B" -msgstr "B" +msgid "B" +msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:574 -msgid "No connection to a MX host could be established." -msgstr "Es konnte keine Verbindung zu einem MX-Rechner hergestellt werden." +#: en/linkcheckerrc.5:510 +msgid "The URL had no content." +msgstr "Die URL besitzt keinen Inhalt." #. type: TP -#: en/linkcheckerrc.5:574 +#: en/linkcheckerrc.5:510 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:577 +#: en/linkcheckerrc.5:513 msgid "The mail MX host could not be found." msgstr "Der MX Mail-Rechner konnte nicht gefunden werden." #. type: TP -#: en/linkcheckerrc.5:577 -#, no-wrap -msgid "B" -msgstr "B" - -#. type: Plain text -#: en/linkcheckerrc.5:580 -msgid "The mailto: address could not be verified." -msgstr "Die mailto: Addresse konnte nicht überprüft werden." - -#. type: TP -#: en/linkcheckerrc.5:580 +#: en/linkcheckerrc.5:513 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:583 +#: en/linkcheckerrc.5:516 msgid "The NNTP newsgroup could not be found." msgstr "Die NNTP Nachrichtengruppe konnte nicht gefunden werden." # type: TP #. type: TP -#: en/linkcheckerrc.5:583 +#: en/linkcheckerrc.5:516 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:586 +#: en/linkcheckerrc.5:519 msgid "No NNTP server was found." msgstr "Es wurde kein NNTP Server gefunden." #. type: TP -#: en/linkcheckerrc.5:586 -#, no-wrap -msgid "B" -msgstr "B" - -#. type: Plain text -#: en/linkcheckerrc.5:589 -msgid "URL anchor was not found." -msgstr "URL Anker wurde nicht gefunden." - -#. type: TP -#: en/linkcheckerrc.5:589 -#, no-wrap -msgid "B" -msgstr "B" - -#. type: Plain text -#: en/linkcheckerrc.5:592 -msgid "The URL content size and download size are unequal." -msgstr "" -"Der URL Inhaltsgrößenangabe und die Download-Größe sind unterschiedlich." - -#. type: TP -#: en/linkcheckerrc.5:592 +#: en/linkcheckerrc.5:519 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:595 +#: en/linkcheckerrc.5:522 msgid "The URL content size is zero." msgstr "Der URL Inhaltsgrößenangabe ist Null." #. type: TP -#: en/linkcheckerrc.5:595 +#: en/linkcheckerrc.5:522 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:598 +#: en/linkcheckerrc.5:525 msgid "The URL content size is too large." msgstr "Der URL Inhalt ist zu groß." #. type: TP -#: en/linkcheckerrc.5:598 +#: en/linkcheckerrc.5:525 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:601 +#: en/linkcheckerrc.5:528 msgid "The effective URL is different from the original." msgstr "Die effektive URL unterscheidet sich vom Original." #. type: TP -#: en/linkcheckerrc.5:601 +#: en/linkcheckerrc.5:528 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:604 +#: en/linkcheckerrc.5:531 msgid "Could not get the content of the URL." msgstr "Konnte den Inhalt der URL nicht bekommen." #. type: TP -#: en/linkcheckerrc.5:604 +#: en/linkcheckerrc.5:531 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:607 +#: en/linkcheckerrc.5:534 msgid "The IP is obfuscated." msgstr "Die IP-Adresse ist verschleiert." #. type: TP -#: en/linkcheckerrc.5:607 -#, no-wrap -msgid "B" -msgstr "B" - -#. type: Plain text -#: en/linkcheckerrc.5:610 -msgid "The warning regular expression was found in the URL contents." -msgstr "" -"Der reguläre Ausdruck für Warnungen wurde in den URL Inhalten gefunden." - -#. type: TP -#: en/linkcheckerrc.5:610 +#: en/linkcheckerrc.5:534 #, no-wrap msgid "B" msgstr "B" #. type: Plain text -#: en/linkcheckerrc.5:613 +#: en/linkcheckerrc.5:537 msgid "The URL contains leading or trailing whitespace." msgstr "Die URL %(url)s enthält Leerzeichen am Anfang oder Ende." # type: TH #. type: Plain text -#: en/linkcheckerrc.5:616 +#: en/linkcheckerrc.5:540 msgid "linkchecker(1)" msgstr "BEISPIEL" @@ -3702,6 +3435,351 @@ msgstr "B(1)" msgid "Copyright \\(co 2009-2014 Bastian Kleineidam" msgstr "Copyright \\(co 2000-2014 Bastian Kleineidam" +# type: TP +#~ msgid "B<--check-css>" +#~ msgstr "B<--check-css>" + +# type: Plain text +#~ msgid "Check syntax of CSS URLs with the W3C online validator." +#~ msgstr "Prüfe Syntax von CSS URLs mit dem W3C Online Validator." + +# type: TP +#~ msgid "B<--complete>" +#~ msgstr "B<--complete>" + +# type: Plain text +#~ msgid "" +#~ "Log all URLs, including duplicates. Default is to log duplicate URLs only " +#~ "once." +#~ msgstr "" +#~ "Gebe alle geprüften URLs aus. Standard ist es, doppelte URLs nur einmal " +#~ "auszugeben." + +# type: TP +#~ msgid "B<--scan-virus>" +#~ msgstr "B<--scan-virus>" + +# type: Plain text +#~ msgid "Scan content of URLs for viruses with ClamAV." +#~ msgstr "Prüfe Inhalt von URLs auf Viren mit ClamAV." + +# type: TP +#~ msgid "B<--trace>" +#~ msgstr "B<--trace>" + +# type: Plain text +#~ msgid "Print tracing information." +#~ msgstr "Trace-Information ausgeben." + +# type: TP +#~ msgid "B<--warning-size-bytes=>I" +#~ msgstr "B<--warning-size-bytes=>I" + +# type: Plain text +#~ msgid "" +#~ "Print a warning if content size info is available and exceeds the given " +#~ "number of I." +#~ msgstr "" +#~ "Gebe eine Warnung aus, wenn die Inhaltsgröße bekannt ist und die " +#~ "angegebene Anzahl von Bytes übersteigt." + +# type: TP +#~ msgid "B<-a>, B<--anchors>" +#~ msgstr "B<-a>, B<--anchors>" + +# type: Plain text +#~ msgid "" +#~ "Check HTTP anchor references. Default is not to check anchors. This " +#~ "option enables logging of the warning B." +#~ msgstr "" +#~ "Prüfe HTTP Ankerverweise. Standard ist, Ankerverweise nicht zu prüfen. " +#~ "Diese Option aktiviert die Ausgabe der Warnung B." + +# type: TP +#~ msgid "B<-C>, B<--cookies>" +#~ msgstr "B<-C>, B<--cookies>" + +# type: Plain text +#~ msgid "" +#~ "Accept and send HTTP cookies according to RFC 2109. Only cookies which " +#~ "are sent back to the originating server are accepted. Sent and accepted " +#~ "cookies are provided as additional logging information." +#~ msgstr "" +#~ "Akzeptiere und sende HTTP Cookies nach der RFC 2109. Lediglich Cookies, " +#~ "die zum ursprünglichen Server zurückgesendet werden, werden akzeptiert. " +#~ "Gesendete und akzeptierte Cookies werden als zusätzlicheLoginformation " +#~ "aufgeführt." + +# type: TP +#~ msgid "B<-P>I, B<--pause=>I" +#~ msgstr "B<-P>I, B<--pause=>I" + +# type: Plain text +#~ msgid "" +#~ "Pause the given number of seconds between two subsequent connection " +#~ "requests to the same host. Default is no pause between requests." +#~ msgstr "" +#~ "Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander " +#~ "folgenden Verbindungen zum demselben Rechner. Standard ist keine Pause " +#~ "zwischen Verbindungen." + +# type: TP +#~ msgid "B (optional)" +#~ msgstr "B (optional)" + +# type: Plain text +#~ msgid "" +#~ "Sets the scheme the cookies are valid for; default scheme is B." +#~ msgstr "" +#~ "Setzt das Schema für das die Cookies gültig sind; Standardschema ist " +#~ "B." + +# type: TP +#~ msgid "B (optional)" +#~ msgstr "B (optional)" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +# type: Plain text +#~ msgid "Command line option: B<--anchors>" +#~ msgstr "Kommandozeilenoption: B<--anchors>" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +# type: Plain text +#~ msgid "Command line option: B<--check-css>" +#~ msgstr "Kommandozeilenoption: B<--check-css>" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +# type: Plain text +#~ msgid "Command line option: B<--check-html>" +#~ msgstr "Kommandozeilenoption: B<--check-html>" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +#~ msgid "Accept and send HTTP cookies." +#~ msgstr "Akzeptiere und sende HTTP cookies." + +# type: Plain text +#~ msgid "Command line option: B<--cookies>" +#~ msgstr "Kommandozeilenoption: B<--cookies>" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +#~ msgid "" +#~ "When checking finishes, write a memory dump to a temporary file. The " +#~ "memory dump is written both when checking finishes normally and when " +#~ "checking gets canceled." +#~ msgstr "" +#~ "Schreibe einen Speicherabzug in eine temporäre Datei wenn die Prüfung " +#~ "endet. Der Speicherabzug wird sowohl beim normalen Beenden der Prüfung " +#~ "als auch wenn die Prüfung abgebrochen wird geschrieben." + +#~ msgid "" +#~ "The memory dump only works if the python-meliae package is installed. " +#~ "Otherwise a warning is printed to install it." +#~ msgstr "" +#~ "Der Speicherabzug funktioniert nur falls das Paket python-meliae " +#~ "installiert ist. Andernfalls wird eine Warnung angezeigt mit dem Hinweis " +#~ "dieses Paket zu installieren." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "Unsupported HTTP authentication method." +#~ msgstr "Nicht unterstützte HTTP Authentifizierungsmethode." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "An error occurred while decompressing the URL content." +#~ msgstr "Ein Fehler trat beim Dekomprimieren des URL Inhalts auf." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The URL has moved permanently." +#~ msgstr "Die URL wurde dauerhaft verschoben." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The http: URL checking has been denied." +#~ msgstr "Die http: URL-Überprüfung wurde verweigert." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The URL content is encoded with an unknown encoding." +#~ msgstr "Der URL-Inhalt ist in einer unbekannten Kodierung verfasst." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The URL has been redirected to an URL of a different type." +#~ msgstr "Die URL wurde zu einem anderen URL-Typ umgeleitet." + +# type: TP +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The SSL certificate is invalid or expired." +#~ msgstr "Das SSL-Zertifikat ist ungültig oder abgelaufen." + +# type: TP +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The URL has been ignored." +#~ msgstr "Die URL wurde ignoriert." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "No connection to a MX host could be established." +#~ msgstr "Es konnte keine Verbindung zu einem MX-Rechner hergestellt werden." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The mailto: address could not be verified." +#~ msgstr "Die mailto: Addresse konnte nicht überprüft werden." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "URL anchor was not found." +#~ msgstr "URL Anker wurde nicht gefunden." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The URL content size and download size are unequal." +#~ msgstr "" +#~ "Der URL Inhaltsgrößenangabe und die Download-Größe sind unterschiedlich." + +#~ msgid "B" +#~ msgstr "B" + +#~ msgid "The warning regular expression was found in the URL contents." +#~ msgstr "" +#~ "Der reguläre Ausdruck für Warnungen wurde in den URL Inhalten gefunden." + +# type: TP +#~ msgid "BI" +#~ msgstr "BI" + +# type: Plain text +#~ msgid "" +#~ "Pause the given number of seconds between two subsequent connection " +#~ "requests to the same host." +#~ msgstr "" +#~ "Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander " +#~ "folgenden Verbindungen zum demselben Rechner." + +# type: Plain text +#~ msgid "Command line option: B<--pause>" +#~ msgstr "Kommandozeilenoption: B<--pause>" + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +# type: Plain text +#~ msgid "Command line option: B<--scan-virus>" +#~ msgstr "Kommandozeilenoption: B<--scan-virus>" + +# type: TP +#~ msgid "B=I" +#~ msgstr "B=I" + +# type: Plain text +#~ msgid "" +#~ "Use this to check for pages that contain some form of error, for example " +#~ "\"This page has moved\" or \"Oracle Application Server error\"." +#~ msgstr "" +#~ "Benutzen Sie dies, um nach Seiten zu suchen, welche bestimmte Fehler " +#~ "enthalten, zum Beispiel \"Diese Seite wurde entfernt\" oder \"Oracle " +#~ "Applikationsfehler\"." + +# type: Plain text +#~ msgid "Command line option: B<--warning-regex>" +#~ msgstr "Kommandozeilenoption: B<--warning-regex>" + +# type: TP +#~ msgid "BI" +#~ msgstr "BI" + +# type: Plain text +#~ msgid "Command line option: B<--warning-size-bytes>" +#~ msgstr "Kommandozeilenoption: B<--warning-size-bytes>" + +#~ msgid "" +#~ "Check that SSL certificates are at least the given number of days valid. " +#~ "The number must not be negative. If the number of days is zero a warning " +#~ "is printed only for certificates that are already expired." +#~ msgstr "" +#~ "Prüfe ob SSL-Zertifikate mindestens die angegebene Anzahl an Tagen gültig " +#~ "sind. Die Anzahl darf nicht negativ sein. Falls die Anzahl Null ist wird " +#~ "eine Warnung nur für Zertifikate ausgegeben, die schon abgelaufen sind." + +#~ msgid "The default number of days is 14." +#~ msgstr "The Standardanzahl an Tagen ist 14." + +# type: TP +#~ msgid "BI" +#~ msgstr "BI" + +#~ msgid "Maximum number of connections to HTTP servers." +#~ msgstr "Maximale Anzahl an HTTP-Verbindungen." + +#~ msgid "The default is 10." +#~ msgstr "Der Standard ist 10." + +# type: TP +#~ msgid "BI" +#~ msgstr "BI" + +#~ msgid "Maximum number of connections to HTTPS servers." +#~ msgstr "Maximale Anzahl an HTTPS-Verbindungen." + +# type: TP +#~ msgid "BI" +#~ msgstr "BI" + +#~ msgid "Maximum number of connections to FTP servers." +#~ msgstr "Maximale Anzahl an FTP-Verbindungen." + +#~ msgid "The default is 2." +#~ msgstr "Der Standard ist 2." + +# type: TP +#~ msgid "B[B<0>|B<1>]" +#~ msgstr "B[B<0>|B<1>]" + +# type: Plain text +#~ msgid "" +#~ "If set log all checked URLs, even duplicates. Default is to log duplicate " +#~ "URLs only once." +#~ msgstr "" +#~ "Falls gesetzt, gebe alle geprüften URLs aus, sogar Duplikate. Standard " +#~ "ist es, URLs nur einmal auszugeben." + +# type: Plain text +#~ msgid "Command line option: B<--complete>" +#~ msgstr "Kommandozeilenoption: B<--complete>" + # type: Plain text #, fuzzy #~| msgid "Copyright \\(co 2009-2014 Bastian Kleineidam" diff --git a/doc/de/linkchecker.1 b/doc/de/linkchecker.1 index 7f2ec4656..a1a7ad59f 100644 --- a/doc/de/linkchecker.1 +++ b/doc/de/linkchecker.1 @@ -41,16 +41,15 @@ Antivirusprüfung .IP \(bu ein Kommandozeilenprogramm, GUI und web interface .SH BEISPIELE -Der häufigste Gebrauchsfall prüft die angegebene Domäne rekursiv, -inklusive aller einzelnen nach außen zeigenden Verknüpfungen: - \fBlinkchecker http://www.example.net/\fP +The most common use checks the given domain recursively: + \fBlinkchecker http://www.example.com/\fP .br Beachten Sie dass dies die komplette Domäne überprüft, welche aus mehreren tausend URLs bestehen kann. Benutzen Sie die Option \fB\-r\fP, um die Rekursionstiefe zu beschränken. .br -Prüfe keine \fBmailto:\fP URLs. Alle anderen Verknüpfungen werden wie üblich geprüft: - \fBlinkchecker \-\-ignore\-url=^mailto: mysite.example.org\fP +Don't check URLs with \fB/secret\fP in its name. All other links are checked as usual: + \fBlinkchecker \-\-ignore\-url=/secret mysite.example.com\fP .br Überprüfung einer lokalen HTML Datei unter Unix: \fBlinkchecker ../bla.html\fP @@ -61,8 +60,8 @@ Prüfe keine \fBmailto:\fP URLs. Alle anderen Verknüpfungen werden wie üblich Sie können den \fBhttp://\fP URL Anteil weglassen wenn die Domäne mit \fBwww.\fP beginnt: \fBlinkchecker www.example.com\fP .br -Sie können den \fBftp://\fP URL Anteil weglassen wenn die Domäne mit \fBftp.\fP beginnt: - \fBlinkchecker \-r0 ftp.example.org\fP +You can skip the \fBftp://\fP url part if the domain starts with \fBftp.\fP: + \fBlinkchecker \-r0 ftp.example.com\fP .br Erzeuge einen Sitemap Graphen und konvertiere ihn mit dem graphviz dot Programm: \fBlinkchecker \-odot \-v www.example.com | dot \-Tps > sitemap.ps\fP @@ -88,19 +87,12 @@ positive Nummer an. .TP \fB\-V\fP, \fB\-\-version\fP Gebe die Version aus und beende das Programm. +.TP +\fB\-\-list\-plugins\fP +Print available check plugins and exit. . .SS Ausgabeoptionen .TP -\fB\-\-check\-css\fP -Prüfe Syntax von CSS URLs mit dem W3C Online Validator. -.TP -\fB\-\-check\-html\fP -Prüfe Syntax von HTML URLs mit dem W3C Online Validator. -.TP -\fB\-\-complete\fP -Gebe alle geprüften URLs aus. Standard ist es, doppelte URLs nur einmal -auszugeben. -.TP \fB\-D\fP\fINAME\fP, \fB\-\-debug=\fP\fINAME\fP Gebe Testmeldungen aus für den angegebenen Logger. Verfügbare Logger sind \fBcmdline\fP, \fBchecking\fP,\fBcache\fP, \fBgui\fP, \fBdns\fP und \fBall\fP. Die Angabe @@ -144,12 +136,6 @@ lokalen Spracheinstellung. Gültige Enkodierungen sind aufgelistet unter Keine Ausgabe, ein Alias für \fB\-o none\fP. Dies ist nur in Verbindung mit \fB\-F\fP nützlich. .TP -\fB\-\-scan\-virus\fP -Prüfe Inhalt von URLs auf Viren mit ClamAV. -.TP -\fB\-\-trace\fP -Trace\-Information ausgeben. -.TP \fB\-v\fP, \fB\-\-verbose\fP Gebe alle geprüften URLs aus. Standard ist es, nur fehlerhafte URLs und Warnungen auszugeben. @@ -168,27 +154,15 @@ werden können, zum Beispiel "(Diese Seite ist umgezogen|Oracle Applikationsfehler)". .br Siehe Abschnitt \fBREGULAR EXPRESSIONS\fP für weitere Infos. -.TP -\fB\-\-warning\-size\-bytes=\fP\fINUMMER\fP -Gebe eine Warnung aus, wenn die Inhaltsgröße bekannt ist und die angegebene -Anzahl von Bytes übersteigt. -. .SS "Optionen zum Prüfen" .TP -\fB\-a\fP, \fB\-\-anchors\fP -Prüfe HTTP Ankerverweise. Standard ist, Ankerverweise nicht zu prüfen. Diese -Option aktiviert die Ausgabe der Warnung \fBurl\-anchor\-not\-found\fP. -.TP -\fB\-C\fP, \fB\-\-cookies\fP -Akzeptiere und sende HTTP Cookies nach der RFC 2109. Lediglich Cookies, die -zum ursprünglichen Server zurückgesendet werden, werden akzeptiert. -Gesendete und akzeptierte Cookies werden als zusätzlicheLoginformation -aufgeführt. -.TP \fB\-\-cookiefile=\fP\fIDATEINAME\fP Lese eine Datei mit Cookie\-Daten. Das Cookie Datenformat wird weiter unten erklärt. .TP +\fB\-\-check\-extern\fP +Check external URLs. +.TP \fB\-\-ignore\-url=\fP\fIREGEX\fP URLs welche dem angegebenen regulären Ausdruck entsprechen werden ignoriert und nicht geprüft. @@ -215,11 +189,6 @@ Liest ein Passwort von der Kommandozeile und verwende es für HTTP und FTP Autorisierung. Für FTP ist das Standardpasswort \fBanonymous@\fP. Für HTTP gibt es kein Standardpasswort. Siehe auch \fB\-u\fP. .TP -\fB\-P\fP\fINUMMER\fP, \fB\-\-pause=\fP\fINUMMER\fP -Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander -folgenden Verbindungen zum demselben Rechner. Standard ist keine Pause -zwischen Verbindungen. -.TP \fB\-r\fP\fINUMMER\fP, \fB\-\-recursion\-level=\fP\fINUMMER\fP Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe. Eine negative Tiefe bewirkt unendliche Rekursion. Standard Tiefe ist unendlich. @@ -301,17 +270,13 @@ Eine Cookie\-Datei enthält Standard HTTP\-Header (RFC 2616) mit den folgenden möglichen Namen: . .TP -\fBScheme\fP (optional) -Setzt das Schema für das die Cookies gültig sind; Standardschema ist -\fBhttp\fP. -.TP \fBHost\fP (erforderlich) Setzt die Domäne für die die Cookies gültig sind. .TP \fBPath\fP (optional) Gibt den Pfad für den die Cookies gültig sind; Standardpfad ist \fB/\fP. .TP -\fBSet\-cookie\fP (optional) +\fBSet\-cookie\fP (required) Setzt den Cookie Name/Wert. Kann mehrmals angegeben werden. .PP Mehrere Einträge sind durch eine Leerzeile zu trennen. @@ -325,7 +290,6 @@ Das untige Beispiel sendet zwei Cookies zu allen URLs die mit Set\-cookie: ID="smee" Set\-cookie: spam="egg" - Scheme: https Host: example.org Set\-cookie: baggage="elitist"; comment="hologram" @@ -362,12 +326,10 @@ beschrieben. . .TP HTTP Verknüpfungen (\fBhttp:\fP, \fBhttps:\fP) -Nach Verbinden zu dem gegebenen HTTP\-Server wird der eingegebene Pfad oder -Query angefordert. Alle Umleitungen werden verfolgt, und falls ein -Benutzer/Passwort angegeben wurde werden diese falls notwendig als -Authorisierung benutzt. Permanent umgezogene Webseiten werden als Warnung -ausgegeben. Alle finalen HTTP Statuscodes, die nicht dem Muster 2xx -entsprechen, werden als Fehler ausgegeben. +After connecting to the given HTTP server the given path or query is +requested. All redirections are followed, and if user/password is given it +will be used as authorization when necessary. All final HTTP status codes +other than 2xx are errors. . Der Inhalt von HTML\-Seiten wird rekursiv geprüft. .TP @@ -418,6 +380,19 @@ Nicht unterstützte Links (\*(lqjavascript:\*(lq, etc.) Die komplette Liste von erkannten, aber nicht unterstützten Links ist in der Quelldatei \fBlinkcheck/checker/unknownurl.py\fP. Die bekanntesten davon dürften JavaScript\-Links sein. +.SH PLUGINS +There are two plugin types: connection and content plugins. +. +Connection plugins are run after a successful connection to the URL host. +. +Content plugins are run if the URL type has content (mailto: URLs have no +content for example) and if the check is not forbidden (ie. by HTTP +robots.txt). +. +See \fBlinkchecker \-\-list\-plugins\fP for a list of plugins and their +documentation. All plugins are enabled via the \fBlinkcheckerrc\fP(5) +configuration file. + .SH Rekursion Bevor eine URL rekursiv geprüft wird, hat diese mehrere Bedingungen zu erfüllen. Diese werden in folgender Reihenfolge geprüft: diff --git a/doc/de/linkcheckerrc.5 b/doc/de/linkcheckerrc.5 index f794aeb6a..77cdfb516 100644 --- a/doc/de/linkcheckerrc.5 +++ b/doc/de/linkcheckerrc.5 @@ -14,52 +14,14 @@ in einem INI\-Format geschrieben. Die Standarddatei ist \fB~/.linkchecker/linkcheckerrc\fP unter Unix\-, \fB%HOMEPATH%\e.linkchecker\elinkcheckerrc\fP unter Windows\-Systemen. .SH EIGENSCHAFTEN - .SS [checking] .TP -\fBanchors=\fP[\fB0\fP|\fB1\fP] -Prüfe HTTP Ankerverweise. Standard ist, Ankerverweise nicht zu prüfen. Diese -Option aktiviert die Ausgabe der Warnung \fBurl\-anchor\-not\-found\fP. -.br -Kommandozeilenoption: \fB\-\-anchors\fP -.TP -\fBcheckcss=\fP[\fB0\fP|\fB1\fP] -Prüfe Syntax von CSS URLs mit dem W3C Online Validator. -.br -Kommandozeilenoption: \fB\-\-check\-css\fP -.TP -\fBcheckhtml=\fP[\fB0\fP|\fB1\fP] -Prüfe Syntax von HTML URLs mit dem W3C Online Validator. -.br -Kommandozeilenoption: \fB\-\-check\-html\fP -.TP -\fBclamavconf=\fP\fIDateiname\fP -Dateiname von \fBclamd.conf\fP Konfigurationsdatei. -.br -Kommandozeilenoption: keine -.TP \fBcookiefile=\fP\fIDateiname\fP Lese eine Datei mit Cookie\-Daten. Das Cookie Datenformat wird in linkchecker(1) erklärt. .br Kommandozeilenoption: \fB\-\-cookiefile\fP .TP -\fBcookies=\fP[\fB0\fP|\fB1\fP] -Akzeptiere und sende HTTP cookies. -.br -Kommandozeilenoption: \fB\-\-cookies\fP -.TP -\fBdebugmemory=\fP[\fB0\fP|\fB1\fP] -Schreibe einen Speicherabzug in eine temporäre Datei wenn die Prüfung -endet. Der Speicherabzug wird sowohl beim normalen Beenden der Prüfung als -auch wenn die Prüfung abgebrochen wird geschrieben. -.br -Der Speicherabzug funktioniert nur falls das Paket python\-meliae installiert -ist. Andernfalls wird eine Warnung angezeigt mit dem Hinweis dieses Paket zu -installieren. -.br -Kommandozeilenoption: keine -.TP \fBlocalwebroot=\fP\fISTRING\fP Beim Prüfen von absoluten URLs in lokalen Dateien wird das angegebene Wurzelverzeichnis als Basis\-URL benutzt. @@ -78,23 +40,12 @@ korrekte Syntax des Links geprüft. .br Kommandozeilenoption: \fB\-\-nntp\-server\fP .TP -\fBpause=\fP\fINUMBER\fP -Pausiere die angegebene Anzahl von Sekunden zwischen zwei aufeinander -folgenden Verbindungen zum demselben Rechner. -.br -Kommandozeilenoption: \fB\-\-pause\fP -.TP \fBrecursionlevel=\fP\fINUMBER\fP Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe. Eine negative Tiefe bewirkt unendliche Rekursion. Standard Tiefe ist unendlich. .br Kommandozeilenoption: \fB\-\-recursion\-level\fP .TP -\fBscanvirus=\fP[\fB0\fP|\fB1\fP] -Prüfe Inhalt von URLs auf Viren mit ClamAV. -.br -Kommandozeilenoption: \fB\-\-scan\-virus\fP -.TP \fBthreads=\fP\fINUMBER\fP Generiere nicht mehr als die angegebene Anzahl von Threads. Standard Anzahl von Threads ist 100. Um Threads zu deaktivieren, geben Sie eine nicht @@ -108,6 +59,12 @@ Setze den Timeout für TCP\-Verbindungen in Sekunden. Der Standard Timeout ist .br Kommandozeilenoption: \fB\-\-timeout\fP .TP +\fBaborttimeout=\fP\fINUMBER\fP +Time to wait for checks to finish after the user aborts the first time (with +Ctrl\-C or the abort button). The default abort timeout is 300 seconds. +.br +Kommandozeilenoption: \fB\-\-timeout\fP +.TP \fBuseragent=\fP\fISTRING\fP Gibt den User\-Agent an, der zu HTTP\-Servern geschickt wird, z.B. "Mozilla/4.0". Der Standard ist "LinkChecker/X.Y", wobei X.Y die @@ -115,23 +72,6 @@ aktuelle Version von LinkChecker ist. .br Kommandozeilenoption: \fB\-\-user\-agent\fP .TP -\fBwarningregex=\fP=\fIREGEX\fP -Definieren Sie einen regulären Ausdruck der eine Warnung ausgibt falls er -auf den Inhalt einer geprüften URL zutrifft. Dies gilt nur für gültige -Seiten deren Inhalt wir bekommen können. -.br -Benutzen Sie dies, um nach Seiten zu suchen, welche bestimmte Fehler -enthalten, zum Beispiel "Diese Seite wurde entfernt" oder "Oracle -Applikationsfehler". -.br -Kommandozeilenoption: \fB\-\-warning\-regex\fP -.TP -\fBwarnsizebytes=\fP\fINUMBER\fP -Gebe eine Warnung aus, wenn die Inhaltsgröße bekannt ist und die angegebene -Anzahl von Bytes übersteigt. -.br -Kommandozeilenoption: \fB\-\-warning\-size\-bytes\fP -.TP \fBsslverify=\fP[\fB0\fP|\fB1\fP|\fIdateiname\fP] Falls der Wert Null ist werden SSL Zertifikate nicht überprüft. Falls er auf Eins gesetzt wird (der Standard) werden SSL Zertifikate mit der gelieferten @@ -140,15 +80,6 @@ zur Prüfung verwendet. .br Kommandozeilenoption: keine .TP -\fBwarnsslcertdaysvalid=\fP\fINUMBER\fP -Prüfe ob SSL\-Zertifikate mindestens die angegebene Anzahl an Tagen gültig -sind. Die Anzahl darf nicht negativ sein. Falls die Anzahl Null ist wird -eine Warnung nur für Zertifikate ausgegeben, die schon abgelaufen sind. -.br -The Standardanzahl an Tagen ist 14. -.br -Kommandozeilenoption: keine -.TP \fBmaxrunseconds=\fP\fINUMBER\fP Hört nach der angegebenen Anzahl von Sekunden auf, neue URLs zu prüfen. Dies ist dasselbe als wenn der Benutzer nach der gegebenen Anzahl von Sekunden @@ -167,26 +98,11 @@ Standard ist alle URLs anzunehmen und zu prüfen. .br Kommandozeilenoption: keine .TP -\fBmaxconnectionshttp=\fP\fINUMBER\fP -Maximale Anzahl an HTTP\-Verbindungen. -.br -Der Standard ist 10. -.br -Kommandozeilenoption: keine +\fBmaxrequestspersecond=\fP\fINUMBER\fP +Limit the maximum number of requests per second to one host. .TP -\fBmaxconnectionshttps=\fP\fINUMBER\fP -Maximale Anzahl an HTTPS\-Verbindungen. -.br -Der Standard ist 10. -.br -Kommandozeilenoption: keine -.TP -\fBmaxconnectionsftp=\fP\fINUMBER\fP -Maximale Anzahl an FTP\-Verbindungen. -.br -Der Standard ist 2. -.br -Kommandozeilenoption: keine +\fBallowedschemes=\fP\fINAME\fP[\fB,\fP\fINAME\fP...] +Allowed URL schemes as comma\-separated list. .SS [filtering] .TP \fBignore=\fP\fIREGEX\fP (MULTILINE) @@ -212,6 +128,11 @@ Prüfe URLs die auf den regulären Ausdruck zutreffen, aber führe keine Rekursion durch. .br Kommandozeilenoption: \fB\-\-no\-follow\-url\fP +.TP +\fBcheckextern=\fP[\fB0\fP|\fB1\fP] +Check external links. Default is to check internal links only. +.br +Command line option: \fB\-\-checkextern\fP .SS [authentication] .TP \fBentry=\fP\fIREGEX\fP \fIBENUTZER\fP [\fIPASSWORT\fP] (MULTILINE) @@ -232,9 +153,8 @@ wird Authentifizierung für http[s] und ftp Verknüpfungen benutzt. Kommandozeilenoption: \fB\-u\fP, \fB\-p\fP .TP \fBloginurl=\fP\fIURL\fP -Eine Anmelde\-URL, die vor der Prüfung besucht wird. Benötigt einen Eintrag -zur Authentifizierung und impliziert die Benutzung von Cookies, weil die -meisten Anmeldungen heutzutage Cookies benutzen. +A login URL to be visited before checking. Also needs authentication data +set for it. .TP \fBloginuserfield=\fP\fINAME\fP Der Name für das Benutzer CGI\-Feld. Der Standardname ist \fBlogin\fP. @@ -247,12 +167,6 @@ Optional zusätzliche CGI Namen/Werte\-Paare. Die Default\-Werte werden automatisch übermittelt. .SS [output] .TP -\fBcomplete=\fP[\fB0\fP|\fB1\fP] -Falls gesetzt, gebe alle geprüften URLs aus, sogar Duplikate. Standard ist -es, URLs nur einmal auszugeben. -.br -Kommandozeilenoption: \fB\-\-complete\fP -.TP \fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...] Gebe Testmeldungen aus für den angegebenen Logger. Verfügbare Logger sind \fBcmdline\fP, \fBchecking\fP,\fBcache\fP, \fBgui\fP, \fBdns\fP, \fBthread\fP und \fBall\fP. Die @@ -528,6 +442,52 @@ ignoriert, müssen aber eingerückt sein. [filtering] ignorewarnings=http\-moved\-permanent + +.SH PLUGINS +All plugins have a separate section. If the section appears in the +configuration file the plugin is enabled. Some plugins read extra options +in their section. + +.SS [AnchorCheck] +Checks validity of HTML anchors. + +.SS [LocationInfo] +Adds the country and if possible city name of the URL host as info. Needs +GeoIP or pygeoip and a local country or city lookup DB installed. + +.SS [RegexCheck] +Define a regular expression which prints a warning if it matches any content +of the checked link. This applies only to valid pages, so we can get their +content. + +Use this to check for pages that contain some form of error message, for +example 'This page has moved' or 'Oracle Application error'. + +Man beachte, dass mehrere Werte in dem regulären Ausdruck kombiniert +werden können, zum Beispiel "(Diese Seite ist umgezogen|Oracle +Applikationsfehler)". + +.SS [SslCertificateCheck] +Check SSL certificate expiration date. Only internal https: links will be +checked. A domain will only be checked once to avoid duplicate warnings. +.TP +\fBsslcertwarndays=\fP\fINUMBER\fP +Configures the expiration warning time in days. + +.SS [HtmlSyntaxCheck] +Check the syntax of HTML pages with the online W3C HTML validator. See +http://validator.w3.org/docs/api.html. + +.SS [CssSyntaxCheck] +Check the syntax of HTML pages with the online W3C CSS validator. See +http://jigsaw.w3.org/css\-validator/manual.html#expert. + +.SS [VirusCheck] +Checks the page content for virus infections with clamav. A local clamav +daemon must be installed. +.TP +\fBclamavconf=\fP\fIDateiname\fP +Dateiname von \fBclamd.conf\fP Konfigurationsdatei. . .SH WARNUNGEN Die folgenden Warnungen werden vom Konfigurationseintrag 'ignorewarnings' @@ -543,57 +503,21 @@ Der file: Pfad ist nicht derselbe wie der Systempfad. \fBftp\-missing\-slash\fP Der ftp: URL fehlt ein abschließender Schrägstrich. .TP -\fBhttp\-auth\-unknonwn\fP -Nicht unterstützte HTTP Authentifizierungsmethode. -.TP \fBhttp\-cookie\-store\-error\fP Ein Fehler trat auf während des Speicherns eines Cookies. .TP -\fBhttp\-decompress\-error\fP -Ein Fehler trat beim Dekomprimieren des URL Inhalts auf. -.TP \fBhttp\-empty\-content\fP Die URL besitzt keinen Inhalt. .TP -\fBhttp\-moved\-permanent\fP -Die URL wurde dauerhaft verschoben. -.TP -\fBhttp\-robots\-denied\fP -Die http: URL\-Überprüfung wurde verweigert. -.TP -\fBhttp\-unsupported\-encoding\fP -Der URL\-Inhalt ist in einer unbekannten Kodierung verfasst. -.TP -\fBhttp\-wrong\-redirect\fP -Die URL wurde zu einem anderen URL\-Typ umgeleitet. -.TP -\fBhttps\-certificate\-error\fP -Das SSL\-Zertifikat ist ungültig oder abgelaufen. -.TP -\fBignore\-url\fP -Die URL wurde ignoriert. -.TP -\fBmail\-no\-connection\fP -Es konnte keine Verbindung zu einem MX\-Rechner hergestellt werden. -.TP \fBmail\-no\-mx\-host\fP Der MX Mail\-Rechner konnte nicht gefunden werden. .TP -\fBmail\-unverified\-address\fP -Die mailto: Addresse konnte nicht überprüft werden. -.TP \fBnntp\-no\-newsgroup\fP Die NNTP Nachrichtengruppe konnte nicht gefunden werden. .TP \fBnntp\-no\-server\fP Es wurde kein NNTP Server gefunden. .TP -\fBurl\-anchor\-not\-found\fP -URL Anker wurde nicht gefunden. -.TP -\fBurl\-content\-size\-unequal\fP -Der URL Inhaltsgrößenangabe und die Download\-Größe sind unterschiedlich. -.TP \fBurl\-content\-size\-zero\fP Der URL Inhaltsgrößenangabe ist Null. .TP @@ -609,9 +533,6 @@ Konnte den Inhalt der URL nicht bekommen. \fBurl\-obfuscated\-ip\fP Die IP\-Adresse ist verschleiert. .TP -\fBurl\-warnregex\-found\fP -Der reguläre Ausdruck für Warnungen wurde in den URL Inhalten gefunden. -.TP \fBurl\-whitespace\fP Die URL %(url)s enthält Leerzeichen am Anfang oder Ende. diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 7ee212829..ed91343ec 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -33,15 +33,14 @@ Antivirus check .IP \(bu a command line, GUI and web interface .SH EXAMPLES -The most common use checks the given domain recursively, plus any -URL pointing outside of the domain: - \fBlinkchecker http://www.example.net/\fP +The most common use checks the given domain recursively: + \fBlinkchecker http://www.example.com/\fP .br Beware that this checks the whole site which can have thousands of URLs. Use the \fB\-r\fP option to restrict the recursion depth. .br -Don't check \fBmailto:\fP URLs. All other links are checked as usual: - \fBlinkchecker \-\-ignore\-url=^mailto: mysite.example.org\fP +Don't check URLs with \fB/secret\fP in its name. All other links are checked as usual: + \fBlinkchecker \-\-ignore\-url=/secret mysite.example.com\fP .br Checking a local HTML file on Unix: \fBlinkchecker ../bla.html\fP @@ -53,7 +52,7 @@ You can skip the \fBhttp://\fP url part if the domain starts with \fBwww.\fP: \fBlinkchecker www.example.com\fP .br You can skip the \fBftp://\fP url part if the domain starts with \fBftp.\fP: - \fBlinkchecker \-r0 ftp.example.org\fP + \fBlinkchecker \-r0 ftp.example.com\fP .br Generate a sitemap graph and convert it with the graphviz dot utility: \fBlinkchecker \-odot \-v www.example.com | dot \-Tps > sitemap.ps\fP @@ -77,18 +76,12 @@ of threads is 100. To disable threading specify a non-positive number. .TP \fB\-V\fP, \fB\-\-version\fP Print version and exit. +.TP +\fB\-\-list\-plugins\fP +Print available check plugins and exit. . .SS Output options .TP -\fB\-\-check\-css\fP -Check syntax of CSS URLs with the W3C online validator. -.TP -\fB\-\-check\-html\fP -Check syntax of HTML URLs with the W3C online validator. -.TP -\fB\-\-complete\fP -Log all URLs, including duplicates. Default is to log duplicate URLs only once. -.TP \fB\-D\fP\fISTRING\fP, \fB\-\-debug=\fP\fISTRING\fP Print debugging output for the given logger. Available loggers are \fBcmdline\fP, \fBchecking\fP, @@ -139,12 +132,6 @@ that of your locale. Valid encodings are listed at Quiet operation, an alias for \fB\-o none\fP. This is only useful with \fB\-F\fP. .TP -\fB\-\-scan\-virus\fP -Scan content of URLs for viruses with ClamAV. -.TP -\fB\-\-trace\fP -Print tracing information. -.TP \fB\-v\fP, \fB\-\-verbose\fP Log all checked URLs. Default is to log only errors and warnings. .TP @@ -160,27 +147,15 @@ Note that multiple values can be combined in the regular expression, for example "(This page has moved|Oracle Application error)". .br See section \fBREGULAR EXPRESSIONS\fP for more info. -.TP -\fB\-\-warning\-size\-bytes=\fP\fINUMBER\fP -Print a warning if content size info is available and exceeds the given -number of \fIbytes\fP. -. .SS Checking options .TP -\fB\-a\fP, \fB\-\-anchors\fP -Check HTTP anchor references. Default is not to check anchors. -This option enables logging of the warning \fBurl\-anchor\-not\-found\fP. -.TP -\fB\-C\fP, \fB\-\-cookies\fP -Accept and send HTTP cookies according to RFC 2109. Only cookies -which are sent back to the originating server are accepted. -Sent and accepted cookies are provided as additional logging -information. -.TP \fB\-\-cookiefile=\fP\fIFILENAME\fP Read a file with initial cookie data. The cookie data format is explained below. .TP +\fB\-\-check\-extern +Check external URLs. +.TP \fB\-\-ignore\-url=\fP\fIREGEX\fP URLs matching the given regular expression will be ignored and not checked. .br @@ -206,10 +181,6 @@ Read a password from console and use it for HTTP and FTP authorization. For FTP the default password is \fBanonymous@\fP. For HTTP there is no default password. See also \fB\-u\fP. .TP -\fB\-P\fP\fINUMBER\fP, \fB\-\-pause=\fP\fINUMBER\fP -Pause the given number of seconds between two subsequent connection -requests to the same host. Default is no pause between requests. -.TP \fB\-r\fP\fINUMBER\fP, \fB\-\-recursion\-level=\fP\fINUMBER\fP Check recursively all links up to given depth. A negative depth will enable infinite recursion. @@ -291,16 +262,13 @@ A cookie file contains standard HTTP header (RFC 2616) data with the following possible names: . .TP -\fBScheme\fP (optional) -Sets the scheme the cookies are valid for; default scheme is \fBhttp\fP. -.TP \fBHost\fP (required) Sets the domain the cookies are valid for. .TP \fBPath\fP (optional) Gives the path the cookies are value for; default path is \fB/\fP. .TP -\fBSet-cookie\fP (optional) +\fBSet-cookie\fP (required) Set cookie name/value. Can be given more than once. .PP Multiple entries are separated by a blank line. @@ -314,7 +282,6 @@ with \fBhttps://example.org/\fP: Set-cookie: ID="smee" Set-cookie: spam="egg" - Scheme: https Host: example.org Set-cookie: baggage="elitist"; comment="hologram" @@ -353,7 +320,6 @@ After connecting to the given HTTP server the given path or query is requested. All redirections are followed, and if user/password is given it will be used as authorization when necessary. -Permanently moved pages issue a warning. All final HTTP status codes other than 2xx are errors. . HTML page contents are checked for recursion. @@ -412,6 +378,20 @@ Unsupported links (``javascript:``, etc.) in the \fBlinkcheck/checker/unknownurl.py\fP source file. The most prominent of them should be JavaScript links. +.SH PLUGINS +There are two plugin types: connection and content plugins. +. +Connection plugins are run after a successful connection to the +URL host. +. +Content plugins are run if the URL type has content +(mailto: URLs have no content for example) and if the check is not +forbidden (ie. by HTTP robots.txt). +. +See \fBlinkchecker \-\-list\-plugins\fP for a list of plugins and +their documentation. All plugins are enabled via the \fBlinkcheckerrc\fP(5) +configuration file. + .SH RECURSION Before descending recursively into a URL, it has to fulfill several conditions. They are checked in this order: diff --git a/doc/en/linkcheckerrc.5 b/doc/en/linkcheckerrc.5 index 664c3e867..d82231893 100644 --- a/doc/en/linkcheckerrc.5 +++ b/doc/en/linkcheckerrc.5 @@ -9,51 +9,14 @@ The file is written in an INI-style format. The default file location is \fB~/.linkchecker/linkcheckerrc\fP on Unix, \fB%HOMEPATH%\\.linkchecker\\linkcheckerrc\fP on Windows systems. .SH SETTINGS - .SS \fB[checking]\fP .TP -\fBanchors=\fP[\fB0\fP|\fB1\fP] -Check HTTP anchor references. Default is not to check anchors. -This option enables logging of the warning \fBurl\-anchor\-not\-found\fP. -.br -Command line option: \fB\-\-anchors\fP -.TP -\fBcheckcss=\fP[\fB0\fP|\fB1\fP] -Check syntax of CSS URLs with the W3C online validator. -.br -Command line option: \fB\-\-check\-css\fP -.TP -\fBcheckhtml=\fP[\fB0\fP|\fB1\fP] -Check syntax of HTML URLs with the W3C online validator. -.br -Command line option: \fB\-\-check\-html\fP -.TP -\fBclamavconf=\fP\fIfilename\fP -Filename of \fBclamd.conf\fP config file. -.br -Command line option: none -.TP \fBcookiefile=\fP\fIfilename\fP Read a file with initial cookie data. The cookie data format is explained in linkchecker(1). .br Command line option: \fB\-\-cookiefile\fP .TP -\fBcookies=\fP[\fB0\fP|\fB1\fP] -Accept and send HTTP cookies. -.br -Command line option: \fB\-\-cookies\fP -.TP -\fBdebugmemory=\fP[\fB0\fP|\fB1\fP] -When checking finishes, write a memory dump to a temporary file. -The memory dump is written both when checking finishes normally -and when checking gets canceled. -.br -The memory dump only works if the python-meliae package is installed. -Otherwise a warning is printed to install it. -.br -Command line option: none -.TP \fBlocalwebroot=\fP\fISTRING\fP When checking absolute URLs inside local files, the given root directory is used as base URL. @@ -71,12 +34,6 @@ only the syntax of the link is checked. .br Command line option: \fB\-\-nntp\-server\fP .TP -\fBpause=\fP\fINUMBER\fP -Pause the given number of seconds between two subsequent connection -requests to the same host. -.br -Command line option: \fB\-\-pause\fP -.TP \fBrecursionlevel=\fP\fINUMBER\fP Check recursively all links up to given depth. A negative depth will enable infinite recursion. @@ -84,11 +41,6 @@ Default depth is infinite. .br Command line option: \fB\-\-recursion\-level\fP .TP -\fBscanvirus=\fP[\fB0\fP|\fB1\fP] -Scan content of URLs for viruses with ClamAV. -.br -Command line option: \fB\-\-scan\-virus\fP -.TP \fBthreads=\fP\fINUMBER\fP Generate no more than the given number of threads. Default number of threads is 100. To disable threading specify a non-positive number. @@ -101,6 +53,13 @@ is 60 seconds. .br Command line option: \fB\-\-timeout\fP .TP +\fBaborttimeout=\fP\fINUMBER\fP +Time to wait for checks to finish after the user aborts the first time +(with Ctrl-C or the abort button). +The default abort timeout is 300 seconds. +.br +Command line option: \fB\-\-timeout\fP +.TP \fBuseragent=\fP\fISTRING\fP Specify the User-Agent string to send to the HTTP server, for example "Mozilla/4.0". The default is "LinkChecker/X.Y" where X.Y is the current @@ -108,22 +67,6 @@ version of LinkChecker. .br Command line option: \fB\-\-user\-agent\fP .TP -\fBwarningregex=\fP=\fIREGEX\fP -Define a regular expression which prints a warning if it matches any -content of the checked link. -This applies only to valid pages, so we can get their content. -.br -Use this to check for pages that contain some form of error, for example -"This page has moved" or "Oracle Application Server error". -.br -Command line option: \fB\-\-warning\-regex\fP -.TP -\fBwarnsizebytes=\fP\fINUMBER\fP -Print a warning if content size info is available and exceeds the given -number of \fIbytes\fP. -.br -Command line option: \fB\-\-warning\-size\-bytes\fP -.TP \fBsslverify=\fP[\fB0\fP|\fB1\fP|\fIfilename\fP] If set to zero disables SSL certificate checking. If set to one (the default) enables SSL certificate checking with @@ -132,16 +75,6 @@ will be used as the certificate file. .br Command line option: none .TP -\fBwarnsslcertdaysvalid=\fP\fINUMBER\fP -Check that SSL certificates are at least the given number of days valid. -The number must not be negative. -If the number of days is zero a warning is printed only for certificates -that are already expired. -.br -The default number of days is 14. -.br -Command line option: none -.TP \fBmaxrunseconds=\fP\fINUMBER\fP Stop checking new URLs after the given number of seconds. Same as if the user stops (by hitting Ctrl-C or clicking the abort buttin in the GUI) @@ -159,26 +92,11 @@ The default is to queue and check all URLs. .br Command line option: none .TP -\fBmaxconnectionshttp=\fP\fINUMBER\fP -Maximum number of connections to HTTP servers. -.br -The default is 10. -.br -Command line option: none +\fBmaxrequestspersecond=\fP\fINUMBER\fP +Limit the maximum number of requests per second to one host. .TP -\fBmaxconnectionshttps=\fP\fINUMBER\fP -Maximum number of connections to HTTPS servers. -.br -The default is 10. -.br -Command line option: none -.TP -\fBmaxconnectionsftp=\fP\fINUMBER\fP -Maximum number of connections to FTP servers. -.br -The default is 2. -.br -Command line option: none +\fBallowedschemes=\fP\fINAME\fP[\fB,\fP\fINAME\fP...] +Allowed URL schemes as comma-separated list. .SS \fB[filtering]\fP .TP \fBignore=\fP\fIREGEX\fP (MULTILINE) @@ -203,6 +121,11 @@ Check but do not recurse into URLs matching the given regular expressions. .br Command line option: \fB\-\-no\-follow\-url\fP +.TP +\fBcheckextern=\fP[\fB0\fP|\fB1\fP] +Check external links. Default is to check internal links only. +.br +Command line option: \fB\-\-checkextern\fP .SS \fB[authentication]\fP .TP \fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (MULTILINE) @@ -224,8 +147,7 @@ Command line option: \fB\-u\fP, \fB\-p\fP .TP \fBloginurl=\fP\fIURL\fP A login URL to be visited before checking. Also needs authentication -data set for it, and implies using cookies because most logins use -cookies nowadays. +data set for it. .TP \fBloginuserfield=\fP\fISTRING\fP The name of the user CGI field. Default name is \fBlogin\fP. @@ -238,12 +160,6 @@ Optionally any additional CGI name/value pairs. Note that the default values are submitted automatically. .SS \fB[output]\fP .TP -\fBcomplete=\fP[\fB0\fP|\fB1\fP] -If set log all checked URLs, even duplicates. Default is to log -duplicate URLs only once. -.br -Command line option: \fB\-\-complete\fP -.TP \fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...] Print debugging output for the given loggers. Available loggers are \fBcmdline\fP, \fBchecking\fP, @@ -524,6 +440,53 @@ though they must still be indented. [filtering] ignorewarnings=http-moved-permanent + +.SH PLUGINS +All plugins have a separate section. If the section +appears in the configuration file the plugin is enabled. +Some plugins read extra options in their section. + +.SS \fB[AnchorCheck]\fP +Checks validity of HTML anchors. + +.SS \fB[LocationInfo]\fP +Adds the country and if possible city name of the URL host as info. +Needs GeoIP or pygeoip and a local country or city lookup DB installed. + +.SS \fB[RegexCheck]\fP +Define a regular expression which prints a warning if it matches +any content of the checked link. This applies only to valid pages, +so we can get their content. + +Use this to check for pages that contain some form of error +message, for example 'This page has moved' or 'Oracle +Application error'. + +Note that multiple values can be combined in the regular expression, +for example "(This page has moved|Oracle Application error)". + +.SS \fB[SslCertificateCheck]\fP +Check SSL certificate expiration date. Only internal https: links +will be checked. A domain will only be checked once to avoid duplicate +warnings. +.TP +\fBsslcertwarndays=\fP\fINUMBER\fP +Configures the expiration warning time in days. + +.SS \fB[HtmlSyntaxCheck]\fP +Check the syntax of HTML pages with the online W3C HTML validator. +See http://validator.w3.org/docs/api.html. + +.SS \fB[CssSyntaxCheck]\fP +Check the syntax of HTML pages with the online W3C CSS validator. +See http://jigsaw.w3.org/css-validator/manual.html#expert. + +.SS \fB[VirusCheck]\fP +Checks the page content for virus infections with clamav. +A local clamav daemon must be installed. +.TP +\fBclamavconf=\fP\fIfilename\fP +Filename of \fBclamd.conf\fP config file. . .SH WARNINGS The following warnings are recognized in the 'ignorewarnings' config @@ -539,57 +502,21 @@ The file: path is not the same as the system specific path. \fBftp-missing-slash\fP The ftp: URL is missing a trailing slash. .TP -\fBhttp-auth-unknonwn\fP -Unsupported HTTP authentication method. -.TP \fBhttp-cookie-store-error\fP An error occurred while storing a cookie. .TP -\fBhttp-decompress-error\fP -An error occurred while decompressing the URL content. -.TP \fBhttp-empty-content\fP The URL had no content. .TP -\fBhttp-moved-permanent\fP -The URL has moved permanently. -.TP -\fBhttp-robots-denied\fP -The http: URL checking has been denied. -.TP -\fBhttp-unsupported-encoding\fP -The URL content is encoded with an unknown encoding. -.TP -\fBhttp-wrong-redirect\fP -The URL has been redirected to an URL of a different type. -.TP -\fBhttps-certificate-error\fP -The SSL certificate is invalid or expired. -.TP -\fBignore-url\fP -The URL has been ignored. -.TP -\fBmail-no-connection\fP -No connection to a MX host could be established. -.TP \fBmail-no-mx-host\fP The mail MX host could not be found. .TP -\fBmail-unverified-address\fP -The mailto: address could not be verified. -.TP \fBnntp-no-newsgroup\fP The NNTP newsgroup could not be found. .TP \fBnntp-no-server\fP No NNTP server was found. .TP -\fBurl-anchor-not-found\fP -URL anchor was not found. -.TP -\fBurl-content-size-unequal\fP -The URL content size and download size are unequal. -.TP \fBurl-content-size-zero\fP The URL content size is zero. .TP @@ -605,9 +532,6 @@ Could not get the content of the URL. \fBurl-obfuscated-ip\fP The IP is obfuscated. .TP -\fBurl-warnregex-found\fP -The warning regular expression was found in the URL contents. -.TP \fBurl-whitespace\fP The URL contains leading or trailing whitespace. diff --git a/doc/install.txt b/doc/install.txt index 063628fae..1bb089179 100644 --- a/doc/install.txt +++ b/doc/install.txt @@ -50,7 +50,9 @@ First, install the required software. On Debian or Ubuntu systems, install the package qt4-dev-tools. On Redhat systems, install the package qt-devel. -4. *Optional, for bash-completion:* +4. Python requests module from https://pypi.python.org/pypi/requests + +5. *Optional, for bash-completion:* argcomplete Python module from https://pypi.python.org/pypi/argcomplete 6. *Optional, for displaying country codes:* diff --git a/doc/linkchecker.doc.pot b/doc/linkchecker.doc.pot index 1ec4df1bf..19dfdd901 100644 --- a/doc/linkchecker.doc.pot +++ b/doc/linkchecker.doc.pot @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2014-01-08 22:34+0100\n" +"POT-Creation-Date: 2014-02-28 22:57+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -76,7 +76,7 @@ msgid "LinkChecker features" msgstr "" #. type: IP -#: en/linkchecker.1:11 en/linkchecker.1:13 en/linkchecker.1:15 en/linkchecker.1:17 en/linkchecker.1:19 en/linkchecker.1:21 en/linkchecker.1:23 en/linkchecker.1:25 en/linkchecker.1:27 en/linkchecker.1:29 en/linkchecker.1:31 en/linkchecker.1:33 en/linkchecker.1:477 en/linkchecker.1:481 en/linkchecker.1:483 +#: en/linkchecker.1:11 en/linkchecker.1:13 en/linkchecker.1:15 en/linkchecker.1:17 en/linkchecker.1:19 en/linkchecker.1:21 en/linkchecker.1:23 en/linkchecker.1:25 en/linkchecker.1:27 en/linkchecker.1:29 en/linkchecker.1:31 en/linkchecker.1:33 en/linkchecker.1:457 en/linkchecker.1:461 en/linkchecker.1:463 #, no-wrap msgid "\\(bu" msgstr "" @@ -152,31 +152,31 @@ msgid "EXAMPLES" msgstr "" #. type: Plain text -#: en/linkchecker.1:39 +#: en/linkchecker.1:38 #, no-wrap msgid "" -"The most common use checks the given domain recursively, plus any\n" -"URL pointing outside of the domain:\n" -" B\n" +"The most common use checks the given domain recursively:\n" +" B\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:42 +#: en/linkchecker.1:41 msgid "" "Beware that this checks the whole site which can have thousands of URLs. " "Use the B<-r> option to restrict the recursion depth." msgstr "" #. type: Plain text -#: en/linkchecker.1:45 +#: en/linkchecker.1:44 #, no-wrap msgid "" -"Don't check B URLs. All other links are checked as usual:\n" -" B\n" +"Don't check URLs with B in its name. All other links are checked as " +"usual:\n" +" B\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:48 +#: en/linkchecker.1:47 #, no-wrap msgid "" "Checking a local HTML file on Unix:\n" @@ -184,7 +184,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:51 +#: en/linkchecker.1:50 #, no-wrap msgid "" "Checking a local HTML file on Windows:\n" @@ -192,7 +192,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:54 +#: en/linkchecker.1:53 #, no-wrap msgid "" "You can skip the B url part if the domain starts with B:\n" @@ -200,15 +200,15 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:57 +#: en/linkchecker.1:56 #, no-wrap msgid "" "You can skip the B url part if the domain starts with B:\n" -" B\n" +" B\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:60 +#: en/linkchecker.1:59 #, no-wrap msgid "" "Generate a sitemap graph and convert it with the graphviz dot utility:\n" @@ -216,125 +216,101 @@ msgid "" msgstr "" #. type: SH -#: en/linkchecker.1:61 +#: en/linkchecker.1:60 #, no-wrap msgid "OPTIONS" msgstr "" #. type: SS -#: en/linkchecker.1:62 +#: en/linkchecker.1:61 #, no-wrap msgid "General options" msgstr "" #. type: TP -#: en/linkchecker.1:63 +#: en/linkchecker.1:62 #, no-wrap msgid "B<-f>I, B<--config=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:67 +#: en/linkchecker.1:66 msgid "" "Use I as configuration file. As default LinkChecker uses " "B<~/.linkchecker/linkcheckerrc>." msgstr "" #. type: TP -#: en/linkchecker.1:67 +#: en/linkchecker.1:66 #, no-wrap msgid "B<-h>, B<--help>" msgstr "" #. type: Plain text -#: en/linkchecker.1:70 +#: en/linkchecker.1:69 msgid "Help me! Print usage information for this program." msgstr "" #. type: TP -#: en/linkchecker.1:70 +#: en/linkchecker.1:69 #, no-wrap msgid "B<--stdin>" msgstr "" #. type: Plain text -#: en/linkchecker.1:73 +#: en/linkchecker.1:72 msgid "Read list of white-space separated URLs to check from stdin." msgstr "" #. type: TP -#: en/linkchecker.1:73 +#: en/linkchecker.1:72 #, no-wrap msgid "B<-t>I, B<--threads=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:77 en/linkcheckerrc.5:95 +#: en/linkchecker.1:76 en/linkcheckerrc.5:47 msgid "" "Generate no more than the given number of threads. Default number of threads " "is 100. To disable threading specify a non-positive number." msgstr "" #. type: TP -#: en/linkchecker.1:77 +#: en/linkchecker.1:76 #, no-wrap msgid "B<-V>, B<--version>" msgstr "" #. type: Plain text -#: en/linkchecker.1:80 +#: en/linkchecker.1:79 msgid "Print version and exit." msgstr "" -#. type: SS -#: en/linkchecker.1:81 -#, no-wrap -msgid "Output options" -msgstr "" - -#. type: TP -#: en/linkchecker.1:82 -#, no-wrap -msgid "B<--check-css>" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:85 en/linkcheckerrc.5:23 -msgid "Check syntax of CSS URLs with the W3C online validator." -msgstr "" - #. type: TP -#: en/linkchecker.1:85 +#: en/linkchecker.1:79 #, no-wrap -msgid "B<--check-html>" +msgid "B<--list-plugins>" msgstr "" #. type: Plain text -#: en/linkchecker.1:88 en/linkcheckerrc.5:28 -msgid "Check syntax of HTML URLs with the W3C online validator." +#: en/linkchecker.1:82 +msgid "Print available check plugins and exit." msgstr "" -#. type: TP -#: en/linkchecker.1:88 +#. type: SS +#: en/linkchecker.1:83 #, no-wrap -msgid "B<--complete>" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:91 -msgid "" -"Log all URLs, including duplicates. Default is to log duplicate URLs only " -"once." +msgid "Output options" msgstr "" #. type: TP -#: en/linkchecker.1:91 +#: en/linkchecker.1:84 #, no-wrap msgid "B<-D>I, B<--debug=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:101 +#: en/linkchecker.1:94 msgid "" "Print debugging output for the given logger. Available loggers are " "B, B, B, B, B and B. Specifying " @@ -344,7 +320,7 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:101 +#: en/linkchecker.1:94 #, no-wrap msgid "" "B<-F>I[BI][BI], " @@ -352,7 +328,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:110 +#: en/linkchecker.1:103 msgid "" "Output to a file BI, B<$HOME/.linkchecker/blacklist> " "for B output, or I if specified. The I " @@ -362,7 +338,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:120 +#: en/linkchecker.1:113 msgid "" "The I and I parts of the B output type will be " "ignored, else if the file already exists, it will be overwritten. You can " @@ -374,35 +350,35 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:120 +#: en/linkchecker.1:113 #, no-wrap msgid "B<--no-status>" msgstr "" #. type: Plain text -#: en/linkchecker.1:123 +#: en/linkchecker.1:116 msgid "Do not print check status messages." msgstr "" #. type: TP -#: en/linkchecker.1:123 +#: en/linkchecker.1:116 #, no-wrap msgid "B<--no-warnings>" msgstr "" #. type: Plain text -#: en/linkchecker.1:126 +#: en/linkchecker.1:119 msgid "Don't log warnings. Default is to log warnings." msgstr "" #. type: TP -#: en/linkchecker.1:126 +#: en/linkchecker.1:119 #, no-wrap msgid "B<-o>I[BI], B<--output=>I[BI]" msgstr "" #. type: Plain text -#: en/linkchecker.1:133 +#: en/linkchecker.1:126 msgid "" "Specify output type as B, B, B, B, B, B, " "B, B, B or B. Default type is B. The " @@ -410,7 +386,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:137 +#: en/linkchecker.1:130 msgid "" "The I specifies the output encoding, the default is that of your " "locale. Valid encodings are listed at " @@ -418,57 +394,35 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:137 +#: en/linkchecker.1:130 #, no-wrap msgid "B<-q>, B<--quiet>" msgstr "" #. type: Plain text -#: en/linkchecker.1:141 +#: en/linkchecker.1:134 msgid "Quiet operation, an alias for B<-o none>. This is only useful with B<-F>." msgstr "" #. type: TP -#: en/linkchecker.1:141 -#, no-wrap -msgid "B<--scan-virus>" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:144 en/linkcheckerrc.5:89 -msgid "Scan content of URLs for viruses with ClamAV." -msgstr "" - -#. type: TP -#: en/linkchecker.1:144 -#, no-wrap -msgid "B<--trace>" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:147 -msgid "Print tracing information." -msgstr "" - -#. type: TP -#: en/linkchecker.1:147 +#: en/linkchecker.1:134 #, no-wrap msgid "B<-v>, B<--verbose>" msgstr "" #. type: Plain text -#: en/linkchecker.1:150 +#: en/linkchecker.1:137 msgid "Log all checked URLs. Default is to log only errors and warnings." msgstr "" #. type: TP -#: en/linkchecker.1:150 +#: en/linkchecker.1:137 #, no-wrap msgid "B<-W>I, B<--warning-regex=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:155 en/linkcheckerrc.5:115 +#: en/linkchecker.1:142 msgid "" "Define a regular expression which prints a warning if it matches any content " "of the checked link. This applies only to valid pages, so we can get their " @@ -476,107 +430,78 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:158 +#: en/linkchecker.1:145 msgid "" "Use this to check for pages that contain some form of error, for example " "\"This page has moved\" or \"Oracle Application error\"." msgstr "" #. type: Plain text -#: en/linkchecker.1:161 +#: en/linkchecker.1:148 en/linkcheckerrc.5:467 msgid "" "Note that multiple values can be combined in the regular expression, for " "example \"(This page has moved|Oracle Application error)\"." msgstr "" #. type: Plain text -#: en/linkchecker.1:163 en/linkchecker.1:190 en/linkchecker.1:203 +#: en/linkchecker.1:150 en/linkchecker.1:165 en/linkchecker.1:178 msgid "See section B for more info." msgstr "" -#. type: TP -#: en/linkchecker.1:163 -#, no-wrap -msgid "B<--warning-size-bytes=>I" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:167 en/linkcheckerrc.5:124 -msgid "" -"Print a warning if content size info is available and exceeds the given " -"number of I." -msgstr "" - #. type: SS -#: en/linkchecker.1:168 +#: en/linkchecker.1:150 #, no-wrap msgid "Checking options" msgstr "" #. type: TP -#: en/linkchecker.1:169 -#, no-wrap -msgid "B<-a>, B<--anchors>" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:173 en/linkcheckerrc.5:18 -msgid "" -"Check HTTP anchor references. Default is not to check anchors. This option " -"enables logging of the warning B." -msgstr "" - -#. type: TP -#: en/linkchecker.1:173 +#: en/linkchecker.1:151 #, no-wrap -msgid "B<-C>, B<--cookies>" +msgid "B<--cookiefile=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:179 +#: en/linkchecker.1:155 msgid "" -"Accept and send HTTP cookies according to RFC 2109. Only cookies which are " -"sent back to the originating server are accepted. Sent and accepted cookies " -"are provided as additional logging information." +"Read a file with initial cookie data. The cookie data format is explained " +"below." msgstr "" #. type: TP -#: en/linkchecker.1:179 +#: en/linkchecker.1:155 #, no-wrap -msgid "B<--cookiefile=>I" +msgid "B<--check-extern>" msgstr "" #. type: Plain text -#: en/linkchecker.1:183 -msgid "" -"Read a file with initial cookie data. The cookie data format is explained " -"below." +#: en/linkchecker.1:158 +msgid "Check external URLs." msgstr "" #. type: TP -#: en/linkchecker.1:183 +#: en/linkchecker.1:158 #, no-wrap msgid "B<--ignore-url=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:186 +#: en/linkchecker.1:161 msgid "URLs matching the given regular expression will be ignored and not checked." msgstr "" #. type: Plain text -#: en/linkchecker.1:188 en/linkchecker.1:201 +#: en/linkchecker.1:163 en/linkchecker.1:176 msgid "This option can be given multiple times." msgstr "" #. type: TP -#: en/linkchecker.1:190 +#: en/linkchecker.1:165 #, no-wrap msgid "B<-N>I, B<--nntp-server=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:195 en/linkcheckerrc.5:71 +#: en/linkchecker.1:170 en/linkcheckerrc.5:34 msgid "" "Specify an NNTP server for B links. Default is the environment " "variable B. If no host is given, only the syntax of the link is " @@ -584,24 +509,24 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:195 +#: en/linkchecker.1:170 #, no-wrap msgid "B<--no-follow-url=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:199 +#: en/linkchecker.1:174 msgid "Check but do not recurse into URLs matching the given regular expression." msgstr "" #. type: TP -#: en/linkchecker.1:203 +#: en/linkchecker.1:178 #, no-wrap msgid "B<-p>, B<--password>" msgstr "" #. type: Plain text -#: en/linkchecker.1:208 +#: en/linkchecker.1:183 msgid "" "Read a password from console and use it for HTTP and FTP authorization. For " "FTP the default password is B. For HTTP there is no default " @@ -609,52 +534,39 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:208 -#, no-wrap -msgid "B<-P>I, B<--pause=>I" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:212 -msgid "" -"Pause the given number of seconds between two subsequent connection requests " -"to the same host. Default is no pause between requests." -msgstr "" - -#. type: TP -#: en/linkchecker.1:212 +#: en/linkchecker.1:183 #, no-wrap msgid "B<-r>I, B<--recursion-level=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:217 en/linkcheckerrc.5:84 +#: en/linkchecker.1:188 en/linkcheckerrc.5:41 msgid "" "Check recursively all links up to given depth. A negative depth will enable " "infinite recursion. Default depth is infinite." msgstr "" #. type: TP -#: en/linkchecker.1:217 +#: en/linkchecker.1:188 #, no-wrap msgid "B<--timeout=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:221 en/linkcheckerrc.5:101 +#: en/linkchecker.1:192 en/linkcheckerrc.5:53 msgid "" "Set the timeout for connection attempts in seconds. The default timeout is " "60 seconds." msgstr "" #. type: TP -#: en/linkchecker.1:221 +#: en/linkchecker.1:192 #, no-wrap msgid "B<-u>I, B<--user=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:226 +#: en/linkchecker.1:197 msgid "" "Try the given username for HTTP and FTP authorization. For FTP the default " "username is B. For HTTP there is no default username. See also " @@ -662,13 +574,13 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:226 +#: en/linkchecker.1:197 #, no-wrap msgid "B<--user-agent=>I" msgstr "" #. type: Plain text -#: en/linkchecker.1:231 en/linkcheckerrc.5:108 +#: en/linkchecker.1:202 en/linkcheckerrc.5:67 msgid "" "Specify the User-Agent string to send to the HTTP server, for example " "\"Mozilla/4.0\". The default is \"LinkChecker/X.Y\" where X.Y is the current " @@ -676,13 +588,13 @@ msgid "" msgstr "" #. type: SH -#: en/linkchecker.1:232 +#: en/linkchecker.1:203 #, no-wrap msgid "CONFIGURATION FILES" msgstr "" #. type: Plain text -#: en/linkchecker.1:236 +#: en/linkchecker.1:207 msgid "" "Configuration files can specify all options above. They can also specify " "some options that cannot be set on the command line. See " @@ -690,13 +602,13 @@ msgid "" msgstr "" #. type: SH -#: en/linkchecker.1:237 +#: en/linkchecker.1:208 #, no-wrap msgid "OUTPUT TYPES" msgstr "" #. type: Plain text -#: en/linkchecker.1:241 +#: en/linkchecker.1:212 msgid "" "Note that by default only errors and warnings are logged. You should use " "the B<--verbose> option to get the complete URL list, especially when " @@ -704,24 +616,24 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:242 +#: en/linkchecker.1:213 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:245 +#: en/linkchecker.1:216 msgid "Standard text logger, logging URLs in keyword: argument fashion." msgstr "" #. type: TP -#: en/linkchecker.1:245 +#: en/linkchecker.1:216 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:250 +#: en/linkchecker.1:221 msgid "" "Log URLs in keyword: argument fashion, formatted as HTML. Additionally has " "links to the referenced pages. Invalid URLs have HTML and CSS syntax check " @@ -729,94 +641,94 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:250 +#: en/linkchecker.1:221 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:253 +#: en/linkchecker.1:224 msgid "Log check result in CSV format with one URL per line." msgstr "" #. type: TP -#: en/linkchecker.1:253 +#: en/linkchecker.1:224 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:256 +#: en/linkchecker.1:227 msgid "Log parent-child relations between linked URLs as a GML sitemap graph." msgstr "" #. type: TP -#: en/linkchecker.1:256 +#: en/linkchecker.1:227 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:259 +#: en/linkchecker.1:230 msgid "Log parent-child relations between linked URLs as a DOT sitemap graph." msgstr "" #. type: TP -#: en/linkchecker.1:259 +#: en/linkchecker.1:230 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:262 +#: en/linkchecker.1:233 msgid "Log check result as a GraphXML sitemap graph." msgstr "" #. type: TP -#: en/linkchecker.1:262 +#: en/linkchecker.1:233 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:265 +#: en/linkchecker.1:236 msgid "Log check result as machine-readable XML." msgstr "" #. type: TP -#: en/linkchecker.1:265 +#: en/linkchecker.1:236 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:269 +#: en/linkchecker.1:240 msgid "" "Log check result as an XML sitemap whose protocol is documented at " "B." msgstr "" #. type: TP -#: en/linkchecker.1:269 +#: en/linkchecker.1:240 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:273 +#: en/linkchecker.1:244 msgid "" "Log check result as SQL script with INSERT commands. An example script to " "create the initial SQL table is included as create.sql." msgstr "" #. type: TP -#: en/linkchecker.1:273 +#: en/linkchecker.1:244 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:278 +#: en/linkchecker.1:249 msgid "" "Suitable for cron jobs. Logs the check result into a file " "B<~/.linkchecker/blacklist> which only contains entries with invalid URLs " @@ -824,100 +736,89 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:278 +#: en/linkchecker.1:249 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkchecker.1:281 +#: en/linkchecker.1:252 msgid "Logs nothing. Suitable for debugging or checking the exit code." msgstr "" #. type: SH -#: en/linkchecker.1:282 +#: en/linkchecker.1:253 #, no-wrap msgid "REGULAR EXPRESSIONS" msgstr "" #. type: Plain text -#: en/linkchecker.1:285 +#: en/linkchecker.1:256 msgid "" "LinkChecker accepts Python regular expressions. See " "B for an introduction." msgstr "" #. type: Plain text -#: en/linkchecker.1:288 +#: en/linkchecker.1:259 msgid "" "An addition is that a leading exclamation mark negates the regular " "expression." msgstr "" #. type: SH -#: en/linkchecker.1:289 +#: en/linkchecker.1:260 #, no-wrap msgid "COOKIE FILES" msgstr "" #. type: Plain text -#: en/linkchecker.1:292 +#: en/linkchecker.1:263 msgid "" "A cookie file contains standard HTTP header (RFC 2616) data with the " "following possible names:" msgstr "" #. type: TP -#: en/linkchecker.1:293 -#, no-wrap -msgid "B (optional)" -msgstr "" - -#. type: Plain text -#: en/linkchecker.1:296 -msgid "Sets the scheme the cookies are valid for; default scheme is B." -msgstr "" - -#. type: TP -#: en/linkchecker.1:296 +#: en/linkchecker.1:264 #, no-wrap msgid "B (required)" msgstr "" #. type: Plain text -#: en/linkchecker.1:299 +#: en/linkchecker.1:267 msgid "Sets the domain the cookies are valid for." msgstr "" #. type: TP -#: en/linkchecker.1:299 +#: en/linkchecker.1:267 #, no-wrap msgid "B (optional)" msgstr "" #. type: Plain text -#: en/linkchecker.1:302 +#: en/linkchecker.1:270 msgid "Gives the path the cookies are value for; default path is B." msgstr "" #. type: TP -#: en/linkchecker.1:302 +#: en/linkchecker.1:270 #, no-wrap -msgid "B (optional)" +msgid "B (required)" msgstr "" #. type: Plain text -#: en/linkchecker.1:305 +#: en/linkchecker.1:273 msgid "Set cookie name/value. Can be given more than once." msgstr "" #. type: Plain text -#: en/linkchecker.1:307 +#: en/linkchecker.1:275 msgid "Multiple entries are separated by a blank line." msgstr "" #. type: Plain text -#: en/linkchecker.1:311 +#: en/linkchecker.1:279 msgid "" "The example below will send two cookies to all URLs starting with " "B and one to all URLs starting with " @@ -925,7 +826,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:316 +#: en/linkchecker.1:284 #, no-wrap msgid "" " Host: example.com\n" @@ -935,22 +836,21 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:320 +#: en/linkchecker.1:287 #, no-wrap msgid "" -" Scheme: https\n" " Host: example.org\n" " Set-cookie: baggage=\"elitist\"; comment=\"hologram\"\n" msgstr "" #. type: SH -#: en/linkchecker.1:321 +#: en/linkchecker.1:288 #, no-wrap msgid "PROXY SUPPORT" msgstr "" #. type: Plain text -#: en/linkchecker.1:327 +#: en/linkchecker.1:294 msgid "" "To use a proxy on Unix or Windows set the $http_proxy, $https_proxy or " "$ftp_proxy environment variables to the proxy URL. The URL should be of the " @@ -960,53 +860,53 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:330 +#: en/linkchecker.1:297 msgid "" "You can also set a comma-separated domain list in the $no_proxy environment " "variables to ignore any proxy settings for these domains." msgstr "" #. type: Plain text -#: en/linkchecker.1:332 +#: en/linkchecker.1:299 msgid "Setting a HTTP proxy on Unix for example looks like this:" msgstr "" #. type: Plain text -#: en/linkchecker.1:334 +#: en/linkchecker.1:301 #, no-wrap msgid " export http_proxy=\"http://proxy.example.com:8080\"\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:336 +#: en/linkchecker.1:303 msgid "Proxy authentication is also supported:" msgstr "" #. type: Plain text -#: en/linkchecker.1:338 +#: en/linkchecker.1:305 #, no-wrap msgid " export http_proxy=\"http://user1:mypass@proxy.example.org:8081\"\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:340 +#: en/linkchecker.1:307 msgid "Setting a proxy on the Windows command prompt:" msgstr "" #. type: Plain text -#: en/linkchecker.1:342 +#: en/linkchecker.1:309 #, no-wrap msgid " set http_proxy=http://proxy.example.com:8080\n" msgstr "" #. type: SH -#: en/linkchecker.1:343 +#: en/linkchecker.1:310 #, no-wrap msgid "PERFORMED CHECKS" msgstr "" #. type: Plain text -#: en/linkchecker.1:349 +#: en/linkchecker.1:316 msgid "" "All URLs have to pass a preliminary syntax test. Minor quoting mistakes will " "issue a warning, all other invalid syntax issues are errors. After the " @@ -1015,33 +915,33 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:350 +#: en/linkchecker.1:317 #, no-wrap msgid "HTTP links (B, B)" msgstr "" #. type: Plain text -#: en/linkchecker.1:358 +#: en/linkchecker.1:324 msgid "" "After connecting to the given HTTP server the given path or query is " "requested. All redirections are followed, and if user/password is given it " -"will be used as authorization when necessary. Permanently moved pages issue " -"a warning. All final HTTP status codes other than 2xx are errors." +"will be used as authorization when necessary. All final HTTP status codes " +"other than 2xx are errors." msgstr "" #. type: Plain text -#: en/linkchecker.1:360 +#: en/linkchecker.1:326 msgid "HTML page contents are checked for recursion." msgstr "" #. type: TP -#: en/linkchecker.1:360 +#: en/linkchecker.1:326 #, no-wrap msgid "Local files (B)" msgstr "" #. type: Plain text -#: en/linkchecker.1:365 +#: en/linkchecker.1:331 msgid "" "A regular, readable file that can be opened is valid. A readable directory " "is also valid. All other files, for example device files, unreadable or " @@ -1049,18 +949,18 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:367 +#: en/linkchecker.1:333 msgid "HTML or other parseable file contents are checked for recursion." msgstr "" #. type: TP -#: en/linkchecker.1:367 +#: en/linkchecker.1:333 #, no-wrap msgid "Mail links (B)" msgstr "" #. type: Plain text -#: en/linkchecker.1:372 +#: en/linkchecker.1:338 msgid "" "A mailto: link eventually resolves to a list of email addresses. If one " "address fails, the whole list will fail. For each mail address we check the " @@ -1068,7 +968,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:382 +#: en/linkchecker.1:348 #, no-wrap msgid "" " 1) Check the adress syntax, both of the part before and after\n" @@ -1083,19 +983,19 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:382 +#: en/linkchecker.1:348 #, no-wrap msgid "FTP links (B)" msgstr "" #. type: Plain text -#: en/linkchecker.1:386 +#: en/linkchecker.1:352 #, no-wrap msgid " For FTP links we do:\n" msgstr "" #. type: Plain text -#: en/linkchecker.1:392 +#: en/linkchecker.1:358 #, no-wrap msgid "" " 1) connect to the specified host\n" @@ -1106,13 +1006,13 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:393 +#: en/linkchecker.1:359 #, no-wrap msgid "Telnet links (``telnet:``)" msgstr "" #. type: Plain text -#: en/linkchecker.1:398 +#: en/linkchecker.1:364 #, no-wrap msgid "" " We try to connect and if user/password are given, login to the\n" @@ -1120,13 +1020,13 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:399 +#: en/linkchecker.1:365 #, no-wrap msgid "NNTP links (``news:``, ``snews:``, ``nntp``)" msgstr "" #. type: Plain text -#: en/linkchecker.1:404 +#: en/linkchecker.1:370 #, no-wrap msgid "" " We try to connect to the given NNTP server. If a news group or\n" @@ -1134,13 +1034,13 @@ msgid "" msgstr "" #. type: TP -#: en/linkchecker.1:405 +#: en/linkchecker.1:371 #, no-wrap msgid "Unsupported links (``javascript:``, etc.)" msgstr "" #. type: Plain text -#: en/linkchecker.1:410 +#: en/linkchecker.1:376 #, no-wrap msgid "" " An unsupported link will only print a warning. No further checking\n" @@ -1148,7 +1048,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:414 +#: en/linkchecker.1:380 #, no-wrap msgid "" " The complete list of recognized, but unsupported links can be found\n" @@ -1157,25 +1057,57 @@ msgid "" msgstr "" #. type: SH -#: en/linkchecker.1:415 +#: en/linkchecker.1:381 en/linkcheckerrc.5:444 +#, no-wrap +msgid "PLUGINS" +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:383 +msgid "There are two plugin types: connection and content plugins." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:386 +msgid "Connection plugins are run after a successful connection to the URL host." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:390 +msgid "" +"Content plugins are run if the URL type has content (mailto: URLs have no " +"content for example) and if the check is not forbidden (ie. by HTTP " +"robots.txt)." +msgstr "" + +#. type: Plain text +#: en/linkchecker.1:394 +msgid "" +"See B for a list of plugins and their " +"documentation. All plugins are enabled via the B(5) " +"configuration file." +msgstr "" + +#. type: SH +#: en/linkchecker.1:395 #, no-wrap msgid "RECURSION" msgstr "" #. type: Plain text -#: en/linkchecker.1:418 +#: en/linkchecker.1:398 msgid "" "Before descending recursively into a URL, it has to fulfill several " "conditions. They are checked in this order:" msgstr "" #. type: Plain text -#: en/linkchecker.1:420 +#: en/linkchecker.1:400 msgid "1. A URL must be valid." msgstr "" #. type: Plain text -#: en/linkchecker.1:426 +#: en/linkchecker.1:406 #, no-wrap msgid "" "2. A URL must be parseable. This currently includes HTML files,\n" @@ -1186,7 +1118,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:429 +#: en/linkchecker.1:409 #, no-wrap msgid "" "3. The URL content must be retrievable. This is usually the case\n" @@ -1194,7 +1126,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:432 +#: en/linkchecker.1:412 #, no-wrap msgid "" "4. The maximum recursion level must not be exceeded. It is configured\n" @@ -1202,7 +1134,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:435 +#: en/linkchecker.1:415 #, no-wrap msgid "" "5. It must not match the ignored URL list. This is controlled with\n" @@ -1210,7 +1142,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:439 +#: en/linkchecker.1:419 #, no-wrap msgid "" "6. The Robots Exclusion Protocol must allow links in the URL to be\n" @@ -1219,20 +1151,20 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:442 +#: en/linkchecker.1:422 msgid "" "Note that the directory recursion reads all files in that directory, not " "just a subset like B." msgstr "" #. type: SH -#: en/linkchecker.1:443 +#: en/linkchecker.1:423 #, no-wrap msgid "NOTES" msgstr "" #. type: Plain text -#: en/linkchecker.1:448 +#: en/linkchecker.1:428 msgid "" "URLs on the commandline starting with B are treated like " "B, URLs starting with B are treated like B. " @@ -1240,7 +1172,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:453 +#: en/linkchecker.1:433 msgid "" "If you have your system configured to automatically establish a connection " "to the internet (e.g. with diald), it will connect when checking links not " @@ -1249,106 +1181,106 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkchecker.1:455 +#: en/linkchecker.1:435 msgid "Javascript links are not supported." msgstr "" #. type: Plain text -#: en/linkchecker.1:458 +#: en/linkchecker.1:438 msgid "" "If your platform does not support threading, LinkChecker disables it " "automatically." msgstr "" #. type: Plain text -#: en/linkchecker.1:460 +#: en/linkchecker.1:440 msgid "You can supply multiple user/password pairs in a configuration file." msgstr "" #. type: Plain text -#: en/linkchecker.1:463 +#: en/linkchecker.1:443 msgid "" "When checking B links the given NNTP host doesn't need to be the same " "as the host of the user browsing your pages." msgstr "" #. type: SH -#: en/linkchecker.1:464 +#: en/linkchecker.1:444 #, no-wrap msgid "ENVIRONMENT" msgstr "" #. type: Plain text -#: en/linkchecker.1:466 +#: en/linkchecker.1:446 msgid "B - specifies default NNTP server" msgstr "" #. type: Plain text -#: en/linkchecker.1:468 +#: en/linkchecker.1:448 msgid "B - specifies default HTTP proxy server" msgstr "" #. type: Plain text -#: en/linkchecker.1:470 +#: en/linkchecker.1:450 msgid "B - specifies default FTP proxy server" msgstr "" #. type: Plain text -#: en/linkchecker.1:472 +#: en/linkchecker.1:452 msgid "" "B - comma-separated list of domains to not contact over a proxy " "server" msgstr "" #. type: Plain text -#: en/linkchecker.1:474 +#: en/linkchecker.1:454 msgid "B, B, B - specify output language" msgstr "" #. type: SH -#: en/linkchecker.1:475 +#: en/linkchecker.1:455 #, no-wrap msgid "RETURN VALUE" msgstr "" #. type: Plain text -#: en/linkchecker.1:477 +#: en/linkchecker.1:457 msgid "The return value is 2 when" msgstr "" #. type: Plain text -#: en/linkchecker.1:479 +#: en/linkchecker.1:459 msgid "a program error occurred." msgstr "" #. type: Plain text -#: en/linkchecker.1:481 +#: en/linkchecker.1:461 msgid "The return value is 1 when" msgstr "" #. type: Plain text -#: en/linkchecker.1:483 +#: en/linkchecker.1:463 msgid "invalid links were found or" msgstr "" #. type: Plain text -#: en/linkchecker.1:485 +#: en/linkchecker.1:465 msgid "link warnings were found and warnings are enabled" msgstr "" #. type: Plain text -#: en/linkchecker.1:487 +#: en/linkchecker.1:467 msgid "Else the return value is zero." msgstr "" #. type: SH -#: en/linkchecker.1:488 +#: en/linkchecker.1:468 #, no-wrap msgid "LIMITATIONS" msgstr "" #. type: Plain text -#: en/linkchecker.1:492 +#: en/linkchecker.1:472 msgid "" "LinkChecker consumes memory for each queued URL to check. With thousands of " "queued URLs the amount of consumed memory can become quite large. This might " @@ -1356,70 +1288,70 @@ msgid "" msgstr "" #. type: SH -#: en/linkchecker.1:493 +#: en/linkchecker.1:473 #, no-wrap msgid "FILES" msgstr "" #. type: Plain text -#: en/linkchecker.1:495 +#: en/linkchecker.1:475 msgid "B<~/.linkchecker/linkcheckerrc> - default configuration file" msgstr "" #. type: Plain text -#: en/linkchecker.1:497 +#: en/linkchecker.1:477 msgid "B<~/.linkchecker/blacklist> - default blacklist logger output filename" msgstr "" #. type: Plain text -#: en/linkchecker.1:499 +#: en/linkchecker.1:479 msgid "BI - default logger file output name" msgstr "" #. type: Plain text -#: en/linkchecker.1:501 +#: en/linkchecker.1:481 msgid "" "B - valid " "output encodings" msgstr "" #. type: Plain text -#: en/linkchecker.1:503 +#: en/linkchecker.1:483 msgid "" "B - regular expression " "documentation" msgstr "" #. type: SH -#: en/linkchecker.1:504 en/linkcheckerrc.5:614 en/linkchecker-gui.1:16 +#: en/linkchecker.1:484 en/linkcheckerrc.5:538 en/linkchecker-gui.1:16 #, no-wrap msgid "SEE ALSO" msgstr "" #. type: Plain text -#: en/linkchecker.1:506 +#: en/linkchecker.1:486 msgid "B(5)" msgstr "" #. type: SH -#: en/linkchecker.1:507 en/linkcheckerrc.5:617 en/linkchecker-gui.1:19 +#: en/linkchecker.1:487 en/linkcheckerrc.5:541 en/linkchecker-gui.1:19 #, no-wrap msgid "AUTHOR" msgstr "" #. type: Plain text -#: en/linkchecker.1:509 en/linkcheckerrc.5:619 en/linkchecker-gui.1:21 +#: en/linkchecker.1:489 en/linkcheckerrc.5:543 en/linkchecker-gui.1:21 msgid "Bastian Kleineidam Ebastian.kleineidam@web.deE" msgstr "" #. type: SH -#: en/linkchecker.1:510 en/linkcheckerrc.5:620 en/linkchecker-gui.1:22 +#: en/linkchecker.1:490 en/linkcheckerrc.5:544 en/linkchecker-gui.1:22 #, no-wrap msgid "COPYRIGHT" msgstr "" #. type: Plain text -#: en/linkchecker.1:511 en/linkcheckerrc.5:621 +#: en/linkchecker.1:491 en/linkcheckerrc.5:545 msgid "Copyright \\(co 2000-2014 Bastian Kleineidam" msgstr "" @@ -1461,257 +1393,131 @@ msgid "SETTINGS" msgstr "" #. type: SS -#: en/linkcheckerrc.5:13 +#: en/linkcheckerrc.5:12 #, no-wrap msgid "[checking]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:14 +#: en/linkcheckerrc.5:13 #, no-wrap -msgid "B[B<0>|B<1>]" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:20 -msgid "Command line option: B<--anchors>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:20 -#, no-wrap -msgid "B[B<0>|B<1>]" +#: en/linkcheckerrc.5:17 +msgid "" +"Read a file with initial cookie data. The cookie data format is explained in " +"linkchecker(1)." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:25 -msgid "Command line option: B<--check-css>" +#: en/linkcheckerrc.5:19 +msgid "Command line option: B<--cookiefile>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:25 +#: en/linkcheckerrc.5:19 #, no-wrap -msgid "B[B<0>|B<1>]" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:30 -msgid "Command line option: B<--check-html>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:30 -#, no-wrap -msgid "BI" +#: en/linkcheckerrc.5:23 +msgid "" +"When checking absolute URLs inside local files, the given root directory is " +"used as base URL." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:33 -msgid "Filename of B config file." +#: en/linkcheckerrc.5:27 +msgid "" +"Note that the given directory must have URL syntax, so it must use a slash " +"to join directories instead of a backslash. And the given directory must " +"end with a slash." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:35 en/linkcheckerrc.5:56 en/linkcheckerrc.5:66 en/linkcheckerrc.5:134 en/linkcheckerrc.5:144 en/linkcheckerrc.5:153 en/linkcheckerrc.5:161 en/linkcheckerrc.5:168 en/linkcheckerrc.5:175 en/linkcheckerrc.5:182 en/linkcheckerrc.5:194 en/linkcheckerrc.5:200 en/linkcheckerrc.5:313 en/linkcheckerrc.5:330 +#: en/linkcheckerrc.5:29 en/linkcheckerrc.5:77 en/linkcheckerrc.5:86 en/linkcheckerrc.5:94 en/linkcheckerrc.5:112 en/linkcheckerrc.5:118 en/linkcheckerrc.5:229 en/linkcheckerrc.5:246 msgid "Command line option: none" msgstr "" #. type: TP -#: en/linkcheckerrc.5:35 +#: en/linkcheckerrc.5:29 #, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:39 -msgid "" -"Read a file with initial cookie data. The cookie data format is explained in " -"linkchecker(1)." +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:41 -msgid "Command line option: B<--cookiefile>" +#: en/linkcheckerrc.5:36 +msgid "Command line option: B<--nntp-server>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:41 +#: en/linkcheckerrc.5:36 #, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:44 -msgid "Accept and send HTTP cookies." +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:46 -msgid "Command line option: B<--cookies>" +#: en/linkcheckerrc.5:43 +msgid "Command line option: B<--recursion-level>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:46 +#: en/linkcheckerrc.5:43 #, no-wrap -msgid "B[B<0>|B<1>]" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:51 -msgid "" -"When checking finishes, write a memory dump to a temporary file. The memory " -"dump is written both when checking finishes normally and when checking gets " -"canceled." +#: en/linkcheckerrc.5:49 +msgid "Command line option: B<--threads>" +msgstr "" + +#. type: TP +#: en/linkcheckerrc.5:49 +#, no-wrap +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:54 -msgid "" -"The memory dump only works if the python-meliae package is installed. " -"Otherwise a warning is printed to install it." +#: en/linkcheckerrc.5:55 en/linkcheckerrc.5:62 +msgid "Command line option: B<--timeout>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:56 +#: en/linkcheckerrc.5:55 #, no-wrap -msgid "BI" +msgid "BI" msgstr "" #. type: Plain text #: en/linkcheckerrc.5:60 msgid "" -"When checking absolute URLs inside local files, the given root directory is " -"used as base URL." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:64 -msgid "" -"Note that the given directory must have URL syntax, so it must use a slash " -"to join directories instead of a backslash. And the given directory must " -"end with a slash." +"Time to wait for checks to finish after the user aborts the first time (with " +"Ctrl-C or the abort button). The default abort timeout is 300 seconds." msgstr "" #. type: TP -#: en/linkcheckerrc.5:66 +#: en/linkcheckerrc.5:62 #, no-wrap -msgid "BI" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:73 -msgid "Command line option: B<--nntp-server>" +#: en/linkcheckerrc.5:69 +msgid "Command line option: B<--user-agent>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:73 +#: en/linkcheckerrc.5:69 #, no-wrap -msgid "BI" +msgid "B[B<0>|B<1>|I]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:77 -msgid "" -"Pause the given number of seconds between two subsequent connection requests " -"to the same host." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:79 -msgid "Command line option: B<--pause>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:79 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:86 -msgid "Command line option: B<--recursion-level>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:86 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:91 -msgid "Command line option: B<--scan-virus>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:91 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:97 -msgid "Command line option: B<--threads>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:97 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:103 -msgid "Command line option: B<--timeout>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:103 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:110 -msgid "Command line option: B<--user-agent>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:110 -#, no-wrap -msgid "B=I" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:118 -msgid "" -"Use this to check for pages that contain some form of error, for example " -"\"This page has moved\" or \"Oracle Application Server error\"." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:120 -msgid "Command line option: B<--warning-regex>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:120 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:126 -msgid "Command line option: B<--warning-size-bytes>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:126 -#, no-wrap -msgid "B[B<0>|B<1>|I]" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:132 +#: en/linkcheckerrc.5:75 msgid "" "If set to zero disables SSL certificate checking. If set to one (the " "default) enables SSL certificate checking with the provided CA certificate " @@ -1719,32 +1525,13 @@ msgid "" msgstr "" #. type: TP -#: en/linkcheckerrc.5:134 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:140 -msgid "" -"Check that SSL certificates are at least the given number of days valid. " -"The number must not be negative. If the number of days is zero a warning is " -"printed only for certificates that are already expired." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:142 -msgid "The default number of days is 14." -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:144 +#: en/linkcheckerrc.5:77 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:149 +#: en/linkcheckerrc.5:82 msgid "" "Stop checking new URLs after the given number of seconds. Same as if the " "user stops (by hitting Ctrl-C or clicking the abort buttin in the GUI) " @@ -1752,149 +1539,144 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:151 +#: en/linkcheckerrc.5:84 msgid "The default is not to stop until all URLs are checked." msgstr "" #. type: TP -#: en/linkcheckerrc.5:153 +#: en/linkcheckerrc.5:86 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:157 +#: en/linkcheckerrc.5:90 msgid "" "Maximum number of URLs to check. New URLs will not be queued after the given " "number of URLs is checked." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:159 +#: en/linkcheckerrc.5:92 msgid "The default is to queue and check all URLs." msgstr "" #. type: TP -#: en/linkcheckerrc.5:161 +#: en/linkcheckerrc.5:94 #, no-wrap -msgid "BI" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:164 -msgid "Maximum number of connections to HTTP servers." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:166 en/linkcheckerrc.5:173 -msgid "The default is 10." -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:168 -#, no-wrap -msgid "BI" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:171 -msgid "Maximum number of connections to HTTPS servers." +#: en/linkcheckerrc.5:97 +msgid "Limit the maximum number of requests per second to one host." msgstr "" #. type: TP -#: en/linkcheckerrc.5:175 +#: en/linkcheckerrc.5:97 #, no-wrap -msgid "BI" +msgid "BI[B<,>I...]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:178 -msgid "Maximum number of connections to FTP servers." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:180 -msgid "The default is 2." +#: en/linkcheckerrc.5:100 +msgid "Allowed URL schemes as comma-separated list." msgstr "" #. type: SS -#: en/linkcheckerrc.5:182 +#: en/linkcheckerrc.5:100 #, no-wrap msgid "[filtering]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:183 +#: en/linkcheckerrc.5:101 #, no-wrap msgid "BI (MULTILINE)" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:186 +#: en/linkcheckerrc.5:104 msgid "Only check syntax of URLs matching the given regular expressions." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:188 +#: en/linkcheckerrc.5:106 msgid "Command line option: B<--ignore-url>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:188 +#: en/linkcheckerrc.5:106 #, no-wrap msgid "BI[B<,>I...]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:192 +#: en/linkcheckerrc.5:110 msgid "" "Ignore the comma-separated list of warnings. See B for the list of " "supported warnings." msgstr "" #. type: TP -#: en/linkcheckerrc.5:194 +#: en/linkcheckerrc.5:112 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:198 +#: en/linkcheckerrc.5:116 msgid "" "Regular expression to add more URLs recognized as internal links. Default " "is that URLs given on the command line are internal." msgstr "" #. type: TP -#: en/linkcheckerrc.5:200 +#: en/linkcheckerrc.5:118 #, no-wrap msgid "BI (MULTILINE)" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:204 +#: en/linkcheckerrc.5:122 msgid "Check but do not recurse into URLs matching the given regular expressions." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:206 +#: en/linkcheckerrc.5:124 msgid "Command line option: B<--no-follow-url>" msgstr "" +#. type: TP +#: en/linkcheckerrc.5:124 +#, no-wrap +msgid "B[B<0>|B<1>]" +msgstr "" + +#. type: Plain text +#: en/linkcheckerrc.5:127 +msgid "Check external links. Default is to check internal links only." +msgstr "" + +#. type: Plain text +#: en/linkcheckerrc.5:129 +msgid "Command line option: B<--checkextern>" +msgstr "" + #. type: SS -#: en/linkcheckerrc.5:206 +#: en/linkcheckerrc.5:129 #, no-wrap msgid "[authentication]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:207 +#: en/linkcheckerrc.5:130 #, no-wrap msgid "BI I [I] (MULTILINE)" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:213 +#: en/linkcheckerrc.5:136 msgid "" "Provide different user/password pairs for different link types. Entries are " "a triple (URL regex, username, password) or a tuple (URL regex, username), " @@ -1902,14 +1684,14 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:216 +#: en/linkcheckerrc.5:139 msgid "" "The password is optional and if missing it has to be entered at the " "commandline." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:222 +#: en/linkcheckerrc.5:145 msgid "" "If the regular expression matches the checked URL, the given user/password " "pair is used for authentication. The commandline options B<-u> and B<-p> " @@ -1919,91 +1701,72 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:224 +#: en/linkcheckerrc.5:147 msgid "Command line option: B<-u>, B<-p>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:224 +#: en/linkcheckerrc.5:147 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:229 +#: en/linkcheckerrc.5:151 msgid "" "A login URL to be visited before checking. Also needs authentication data " -"set for it, and implies using cookies because most logins use cookies " -"nowadays." +"set for it." msgstr "" #. type: TP -#: en/linkcheckerrc.5:229 +#: en/linkcheckerrc.5:151 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:232 +#: en/linkcheckerrc.5:154 msgid "The name of the user CGI field. Default name is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:232 +#: en/linkcheckerrc.5:154 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:235 +#: en/linkcheckerrc.5:157 msgid "The name of the password CGI field. Default name is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:235 +#: en/linkcheckerrc.5:157 #, no-wrap msgid "BIB<:>I (MULTILINE)" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:239 +#: en/linkcheckerrc.5:161 msgid "" "Optionally any additional CGI name/value pairs. Note that the default values " "are submitted automatically." msgstr "" #. type: SS -#: en/linkcheckerrc.5:239 +#: en/linkcheckerrc.5:161 #, no-wrap msgid "[output]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:240 -#, no-wrap -msgid "B[B<0>|B<1>]" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:244 -msgid "" -"If set log all checked URLs, even duplicates. Default is to log duplicate " -"URLs only once." -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:246 -msgid "Command line option: B<--complete>" -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:246 +#: en/linkcheckerrc.5:162 #, no-wrap msgid "BI[B<,>I...]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:252 +#: en/linkcheckerrc.5:168 msgid "" "Print debugging output for the given loggers. Available loggers are " "B, B, B, B, B, B and B. " @@ -2011,25 +1774,25 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:254 +#: en/linkcheckerrc.5:170 msgid "Command line option: B<--debug>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:254 +#: en/linkcheckerrc.5:170 #, no-wrap msgid "BI[B<,>I...]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:259 +#: en/linkcheckerrc.5:175 msgid "" "Output to a files BI, " "B<$HOME/.linkchecker/blacklist> for B output." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:265 +#: en/linkcheckerrc.5:181 msgid "" "Valid file output types are B, B, B, B, B, " "B, B, B or B Default is no file output. The " @@ -2038,18 +1801,18 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:267 +#: en/linkcheckerrc.5:183 msgid "Command line option: B<--file-output>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:267 +#: en/linkcheckerrc.5:183 #, no-wrap msgid "BI[BI]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:273 +#: en/linkcheckerrc.5:189 msgid "" "Specify output type as B, B, B, B, B, B, " "B, B or B. Default type is B. The various " @@ -2057,7 +1820,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:277 +#: en/linkcheckerrc.5:193 msgid "" "The I specifies the output encoding, the default is that of your " "locale. Valid encodings are listed at " @@ -2065,136 +1828,136 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:279 +#: en/linkcheckerrc.5:195 msgid "Command line option: B<--output>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:279 +#: en/linkcheckerrc.5:195 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:283 +#: en/linkcheckerrc.5:199 msgid "" "If set, operate quiet. An alias for B. This is only useful with " "B." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:285 en/linkcheckerrc.5:295 +#: en/linkcheckerrc.5:201 en/linkcheckerrc.5:211 msgid "Command line option: B<--verbose>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:285 +#: en/linkcheckerrc.5:201 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:288 +#: en/linkcheckerrc.5:204 msgid "Control printing check status messages. Default is 1." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:290 +#: en/linkcheckerrc.5:206 msgid "Command line option: B<--no-status>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:290 +#: en/linkcheckerrc.5:206 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:293 +#: en/linkcheckerrc.5:209 msgid "" "If set log all checked URLs once. Default is to log only errors and " "warnings." msgstr "" #. type: TP -#: en/linkcheckerrc.5:295 +#: en/linkcheckerrc.5:211 #, no-wrap msgid "B[B<0>|B<1>]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:298 +#: en/linkcheckerrc.5:214 msgid "If set log warnings. Default is to log warnings." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:300 +#: en/linkcheckerrc.5:216 msgid "Command line option: B<--no-warnings>" msgstr "" #. type: SS -#: en/linkcheckerrc.5:300 +#: en/linkcheckerrc.5:216 #, no-wrap msgid "[text]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:301 en/linkcheckerrc.5:364 en/linkcheckerrc.5:374 en/linkcheckerrc.5:384 en/linkcheckerrc.5:400 en/linkcheckerrc.5:416 en/linkcheckerrc.5:447 en/linkcheckerrc.5:454 en/linkcheckerrc.5:464 en/linkcheckerrc.5:474 +#: en/linkcheckerrc.5:217 en/linkcheckerrc.5:280 en/linkcheckerrc.5:290 en/linkcheckerrc.5:300 en/linkcheckerrc.5:316 en/linkcheckerrc.5:332 en/linkcheckerrc.5:363 en/linkcheckerrc.5:370 en/linkcheckerrc.5:380 en/linkcheckerrc.5:390 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:305 +#: en/linkcheckerrc.5:221 msgid "" "Specify output filename for text logging. Default filename is " "B." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:307 +#: en/linkcheckerrc.5:223 msgid "Command line option: B<--file-output=>" msgstr "" #. type: TP -#: en/linkcheckerrc.5:307 en/linkcheckerrc.5:367 en/linkcheckerrc.5:377 en/linkcheckerrc.5:387 en/linkcheckerrc.5:403 en/linkcheckerrc.5:419 en/linkcheckerrc.5:457 en/linkcheckerrc.5:467 en/linkcheckerrc.5:477 +#: en/linkcheckerrc.5:223 en/linkcheckerrc.5:283 en/linkcheckerrc.5:293 en/linkcheckerrc.5:303 en/linkcheckerrc.5:319 en/linkcheckerrc.5:335 en/linkcheckerrc.5:373 en/linkcheckerrc.5:383 en/linkcheckerrc.5:393 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:311 +#: en/linkcheckerrc.5:227 msgid "" "Comma-separated list of parts that have to be logged. See B " "below." msgstr "" #. type: TP -#: en/linkcheckerrc.5:313 en/linkcheckerrc.5:370 en/linkcheckerrc.5:380 en/linkcheckerrc.5:390 en/linkcheckerrc.5:406 en/linkcheckerrc.5:422 en/linkcheckerrc.5:450 en/linkcheckerrc.5:460 en/linkcheckerrc.5:470 en/linkcheckerrc.5:480 +#: en/linkcheckerrc.5:229 en/linkcheckerrc.5:286 en/linkcheckerrc.5:296 en/linkcheckerrc.5:306 en/linkcheckerrc.5:322 en/linkcheckerrc.5:338 en/linkcheckerrc.5:366 en/linkcheckerrc.5:376 en/linkcheckerrc.5:386 en/linkcheckerrc.5:396 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:317 +#: en/linkcheckerrc.5:233 msgid "" "Valid encodings are listed in " "B." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:319 +#: en/linkcheckerrc.5:235 msgid "Default encoding is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:319 +#: en/linkcheckerrc.5:235 #, no-wrap msgid "I" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:328 +#: en/linkcheckerrc.5:244 msgid "" "Color settings for the various log parts, syntax is I or " "IB<;>I. The I can be B, B, B, " @@ -2204,332 +1967,332 @@ msgid "" msgstr "" #. type: TP -#: en/linkcheckerrc.5:330 +#: en/linkcheckerrc.5:246 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:333 +#: en/linkcheckerrc.5:249 msgid "Set parent color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:333 +#: en/linkcheckerrc.5:249 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:336 +#: en/linkcheckerrc.5:252 msgid "Set URL color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:336 +#: en/linkcheckerrc.5:252 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:339 +#: en/linkcheckerrc.5:255 msgid "Set name color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:339 +#: en/linkcheckerrc.5:255 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:342 +#: en/linkcheckerrc.5:258 msgid "Set real URL color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:342 +#: en/linkcheckerrc.5:258 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:345 +#: en/linkcheckerrc.5:261 msgid "Set base URL color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:345 +#: en/linkcheckerrc.5:261 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:348 +#: en/linkcheckerrc.5:264 msgid "Set valid color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:348 +#: en/linkcheckerrc.5:264 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:351 +#: en/linkcheckerrc.5:267 msgid "Set invalid color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:351 +#: en/linkcheckerrc.5:267 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:354 +#: en/linkcheckerrc.5:270 msgid "Set info color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:354 +#: en/linkcheckerrc.5:270 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:357 +#: en/linkcheckerrc.5:273 msgid "Set warning color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:357 +#: en/linkcheckerrc.5:273 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:360 +#: en/linkcheckerrc.5:276 msgid "Set download time color. Default is B." msgstr "" #. type: TP -#: en/linkcheckerrc.5:360 +#: en/linkcheckerrc.5:276 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:363 +#: en/linkcheckerrc.5:279 msgid "Set reset color. Default is B." msgstr "" #. type: SS -#: en/linkcheckerrc.5:363 +#: en/linkcheckerrc.5:279 #, no-wrap msgid "[gml]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:367 en/linkcheckerrc.5:370 en/linkcheckerrc.5:373 en/linkcheckerrc.5:377 en/linkcheckerrc.5:380 en/linkcheckerrc.5:383 en/linkcheckerrc.5:387 en/linkcheckerrc.5:390 en/linkcheckerrc.5:393 en/linkcheckerrc.5:403 en/linkcheckerrc.5:406 en/linkcheckerrc.5:409 en/linkcheckerrc.5:419 en/linkcheckerrc.5:422 en/linkcheckerrc.5:425 en/linkcheckerrc.5:450 en/linkcheckerrc.5:453 en/linkcheckerrc.5:457 en/linkcheckerrc.5:460 en/linkcheckerrc.5:463 en/linkcheckerrc.5:467 en/linkcheckerrc.5:470 en/linkcheckerrc.5:473 en/linkcheckerrc.5:477 en/linkcheckerrc.5:480 en/linkcheckerrc.5:483 +#: en/linkcheckerrc.5:283 en/linkcheckerrc.5:286 en/linkcheckerrc.5:289 en/linkcheckerrc.5:293 en/linkcheckerrc.5:296 en/linkcheckerrc.5:299 en/linkcheckerrc.5:303 en/linkcheckerrc.5:306 en/linkcheckerrc.5:309 en/linkcheckerrc.5:319 en/linkcheckerrc.5:322 en/linkcheckerrc.5:325 en/linkcheckerrc.5:335 en/linkcheckerrc.5:338 en/linkcheckerrc.5:341 en/linkcheckerrc.5:366 en/linkcheckerrc.5:369 en/linkcheckerrc.5:373 en/linkcheckerrc.5:376 en/linkcheckerrc.5:379 en/linkcheckerrc.5:383 en/linkcheckerrc.5:386 en/linkcheckerrc.5:389 en/linkcheckerrc.5:393 en/linkcheckerrc.5:396 en/linkcheckerrc.5:399 msgid "See [text] section above." msgstr "" #. type: SS -#: en/linkcheckerrc.5:373 +#: en/linkcheckerrc.5:289 #, no-wrap msgid "[dot]" msgstr "" #. type: SS -#: en/linkcheckerrc.5:383 +#: en/linkcheckerrc.5:299 #, no-wrap msgid "[csv]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:393 en/linkcheckerrc.5:412 +#: en/linkcheckerrc.5:309 en/linkcheckerrc.5:328 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:396 +#: en/linkcheckerrc.5:312 msgid "Set CSV separator. Default is a comma (B<,>)." msgstr "" #. type: TP -#: en/linkcheckerrc.5:396 +#: en/linkcheckerrc.5:312 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:399 +#: en/linkcheckerrc.5:315 msgid "Set CSV quote character. Default is a double quote (B<\">)." msgstr "" #. type: SS -#: en/linkcheckerrc.5:399 +#: en/linkcheckerrc.5:315 #, no-wrap msgid "[sql]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:409 +#: en/linkcheckerrc.5:325 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:412 +#: en/linkcheckerrc.5:328 msgid "Set database name to store into. Default is B." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:415 +#: en/linkcheckerrc.5:331 msgid "Set SQL command separator character. Default is a semicolor (B<;>)." msgstr "" #. type: SS -#: en/linkcheckerrc.5:415 +#: en/linkcheckerrc.5:331 #, no-wrap msgid "[html]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:425 +#: en/linkcheckerrc.5:341 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:428 +#: en/linkcheckerrc.5:344 msgid "Set HTML background color. Default is B<#fff7e5>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:428 +#: en/linkcheckerrc.5:344 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:431 +#: en/linkcheckerrc.5:347 msgid "Set HTML URL color. Default is B<#dcd5cf>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:431 +#: en/linkcheckerrc.5:347 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:434 +#: en/linkcheckerrc.5:350 msgid "Set HTML border color. Default is B<#000000>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:434 +#: en/linkcheckerrc.5:350 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:437 +#: en/linkcheckerrc.5:353 msgid "Set HTML link color. Default is B<#191c83>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:437 +#: en/linkcheckerrc.5:353 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:440 +#: en/linkcheckerrc.5:356 msgid "Set HTML warning color. Default is B<#e0954e>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:440 +#: en/linkcheckerrc.5:356 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:443 +#: en/linkcheckerrc.5:359 msgid "Set HTML error color. Default is B<#db4930>." msgstr "" #. type: TP -#: en/linkcheckerrc.5:443 +#: en/linkcheckerrc.5:359 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:446 +#: en/linkcheckerrc.5:362 msgid "Set HTML valid color. Default is B<#3ba557>." msgstr "" #. type: SS -#: en/linkcheckerrc.5:446 +#: en/linkcheckerrc.5:362 #, no-wrap msgid "[blacklist]" msgstr "" #. type: SS -#: en/linkcheckerrc.5:453 +#: en/linkcheckerrc.5:369 #, no-wrap msgid "[xml]" msgstr "" #. type: SS -#: en/linkcheckerrc.5:463 +#: en/linkcheckerrc.5:379 #, no-wrap msgid "[gxml]" msgstr "" #. type: SS -#: en/linkcheckerrc.5:473 +#: en/linkcheckerrc.5:389 #, no-wrap msgid "[sitemap]" msgstr "" #. type: TP -#: en/linkcheckerrc.5:483 +#: en/linkcheckerrc.5:399 #, no-wrap msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:487 +#: en/linkcheckerrc.5:403 msgid "" "A number between 0.0 and 1.0 determining the priority. The default priority " "for the first URL is 1.0, for all child URLs 0.5." msgstr "" #. type: TP -#: en/linkcheckerrc.5:487 +#: en/linkcheckerrc.5:403 #, no-wrap msgid "B[B|B|B|B|B|B|B]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:490 +#: en/linkcheckerrc.5:406 msgid "The frequence pages are changing with." msgstr "" #. type: SH -#: en/linkcheckerrc.5:491 +#: en/linkcheckerrc.5:407 #, no-wrap msgid "LOGGER PARTS" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:507 +#: en/linkcheckerrc.5:423 #, no-wrap msgid "" " B (for all parts)\n" @@ -2551,13 +2314,13 @@ msgid "" msgstr "" #. type: SH -#: en/linkcheckerrc.5:507 +#: en/linkcheckerrc.5:423 #, no-wrap msgid "MULTILINE" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:511 +#: en/linkcheckerrc.5:427 msgid "" "Some option values can span multiple lines. Each line has to be indented for " "that to work. Lines starting with a hash (B<#>) will be ignored, though they " @@ -2565,7 +2328,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:517 +#: en/linkcheckerrc.5:433 #, no-wrap msgid "" " ignore=\n" @@ -2576,13 +2339,13 @@ msgid "" msgstr "" #. type: SH -#: en/linkcheckerrc.5:518 +#: en/linkcheckerrc.5:434 #, no-wrap msgid "EXAMPLE" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:521 +#: en/linkcheckerrc.5:437 #, no-wrap msgid "" " [output]\n" @@ -2590,7 +2353,7 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:524 +#: en/linkcheckerrc.5:440 #, no-wrap msgid "" " [checking]\n" @@ -2598,325 +2361,309 @@ msgid "" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:527 +#: en/linkcheckerrc.5:443 #, no-wrap msgid "" " [filtering]\n" " ignorewarnings=http-moved-permanent\n" msgstr "" -#. type: SH -#: en/linkcheckerrc.5:528 -#, no-wrap -msgid "WARNINGS" -msgstr "" - #. type: Plain text -#: en/linkcheckerrc.5:531 +#: en/linkcheckerrc.5:448 msgid "" -"The following warnings are recognized in the 'ignorewarnings' config file " -"entry:" +"All plugins have a separate section. If the section appears in the " +"configuration file the plugin is enabled. Some plugins read extra options " +"in their section." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:532 +#. type: SS +#: en/linkcheckerrc.5:449 #, no-wrap -msgid "B" +msgid "[AnchorCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:535 -msgid "The file: URL is missing a trailing slash." +#: en/linkcheckerrc.5:451 +msgid "Checks validity of HTML anchors." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:535 +#. type: SS +#: en/linkcheckerrc.5:452 #, no-wrap -msgid "B" +msgid "[LocationInfo]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:538 -msgid "The file: path is not the same as the system specific path." +#: en/linkcheckerrc.5:455 +msgid "" +"Adds the country and if possible city name of the URL host as info. Needs " +"GeoIP or pygeoip and a local country or city lookup DB installed." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:538 +#. type: SS +#: en/linkcheckerrc.5:456 #, no-wrap -msgid "B" +msgid "[RegexCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:541 -msgid "The ftp: URL is missing a trailing slash." +#: en/linkcheckerrc.5:460 +msgid "" +"Define a regular expression which prints a warning if it matches any content " +"of the checked link. This applies only to valid pages, so we can get their " +"content." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:541 +#. type: Plain text +#: en/linkcheckerrc.5:464 +msgid "" +"Use this to check for pages that contain some form of error message, for " +"example 'This page has moved' or 'Oracle Application error'." +msgstr "" + +#. type: SS +#: en/linkcheckerrc.5:468 #, no-wrap -msgid "B" +msgid "[SslCertificateCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:544 -msgid "Unsupported HTTP authentication method." +#: en/linkcheckerrc.5:472 +msgid "" +"Check SSL certificate expiration date. Only internal https: links will be " +"checked. A domain will only be checked once to avoid duplicate warnings." msgstr "" #. type: TP -#: en/linkcheckerrc.5:544 +#: en/linkcheckerrc.5:472 #, no-wrap -msgid "B" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:547 -msgid "An error occurred while storing a cookie." +#: en/linkcheckerrc.5:475 +msgid "Configures the expiration warning time in days." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:547 +#. type: SS +#: en/linkcheckerrc.5:476 #, no-wrap -msgid "B" +msgid "[HtmlSyntaxCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:550 -msgid "An error occurred while decompressing the URL content." +#: en/linkcheckerrc.5:479 +msgid "" +"Check the syntax of HTML pages with the online W3C HTML validator. See " +"http://validator.w3.org/docs/api.html." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:550 +#. type: SS +#: en/linkcheckerrc.5:480 #, no-wrap -msgid "B" +msgid "[CssSyntaxCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:553 -msgid "The URL had no content." +#: en/linkcheckerrc.5:483 +msgid "" +"Check the syntax of HTML pages with the online W3C CSS validator. See " +"http://jigsaw.w3.org/css-validator/manual.html#expert." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:553 +#. type: SS +#: en/linkcheckerrc.5:484 #, no-wrap -msgid "B" +msgid "[VirusCheck]" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:556 -msgid "The URL has moved permanently." +#: en/linkcheckerrc.5:487 +msgid "" +"Checks the page content for virus infections with clamav. A local clamav " +"daemon must be installed." msgstr "" #. type: TP -#: en/linkcheckerrc.5:556 +#: en/linkcheckerrc.5:487 #, no-wrap -msgid "B" +msgid "BI" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:559 -msgid "The http: URL checking has been denied." +#: en/linkcheckerrc.5:490 +msgid "Filename of B config file." msgstr "" -#. type: TP -#: en/linkcheckerrc.5:559 +#. type: SH +#: en/linkcheckerrc.5:491 #, no-wrap -msgid "B" +msgid "WARNINGS" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:562 -msgid "The URL content is encoded with an unknown encoding." +#: en/linkcheckerrc.5:494 +msgid "" +"The following warnings are recognized in the 'ignorewarnings' config file " +"entry:" msgstr "" #. type: TP -#: en/linkcheckerrc.5:562 +#: en/linkcheckerrc.5:495 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:565 -msgid "The URL has been redirected to an URL of a different type." +#: en/linkcheckerrc.5:498 +msgid "The file: URL is missing a trailing slash." msgstr "" #. type: TP -#: en/linkcheckerrc.5:565 +#: en/linkcheckerrc.5:498 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:568 -msgid "The SSL certificate is invalid or expired." +#: en/linkcheckerrc.5:501 +msgid "The file: path is not the same as the system specific path." msgstr "" #. type: TP -#: en/linkcheckerrc.5:568 +#: en/linkcheckerrc.5:501 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:571 -msgid "The URL has been ignored." +#: en/linkcheckerrc.5:504 +msgid "The ftp: URL is missing a trailing slash." msgstr "" #. type: TP -#: en/linkcheckerrc.5:571 +#: en/linkcheckerrc.5:504 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:574 -msgid "No connection to a MX host could be established." +#: en/linkcheckerrc.5:507 +msgid "An error occurred while storing a cookie." msgstr "" #. type: TP -#: en/linkcheckerrc.5:574 +#: en/linkcheckerrc.5:507 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:577 -msgid "The mail MX host could not be found." +#: en/linkcheckerrc.5:510 +msgid "The URL had no content." msgstr "" #. type: TP -#: en/linkcheckerrc.5:577 +#: en/linkcheckerrc.5:510 #, no-wrap -msgid "B" +msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:580 -msgid "The mailto: address could not be verified." +#: en/linkcheckerrc.5:513 +msgid "The mail MX host could not be found." msgstr "" #. type: TP -#: en/linkcheckerrc.5:580 +#: en/linkcheckerrc.5:513 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:583 +#: en/linkcheckerrc.5:516 msgid "The NNTP newsgroup could not be found." msgstr "" #. type: TP -#: en/linkcheckerrc.5:583 +#: en/linkcheckerrc.5:516 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:586 +#: en/linkcheckerrc.5:519 msgid "No NNTP server was found." msgstr "" #. type: TP -#: en/linkcheckerrc.5:586 -#, no-wrap -msgid "B" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:589 -msgid "URL anchor was not found." -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:589 -#, no-wrap -msgid "B" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:592 -msgid "The URL content size and download size are unequal." -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:592 +#: en/linkcheckerrc.5:519 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:595 +#: en/linkcheckerrc.5:522 msgid "The URL content size is zero." msgstr "" #. type: TP -#: en/linkcheckerrc.5:595 +#: en/linkcheckerrc.5:522 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:598 +#: en/linkcheckerrc.5:525 msgid "The URL content size is too large." msgstr "" #. type: TP -#: en/linkcheckerrc.5:598 +#: en/linkcheckerrc.5:525 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:601 +#: en/linkcheckerrc.5:528 msgid "The effective URL is different from the original." msgstr "" #. type: TP -#: en/linkcheckerrc.5:601 +#: en/linkcheckerrc.5:528 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:604 +#: en/linkcheckerrc.5:531 msgid "Could not get the content of the URL." msgstr "" #. type: TP -#: en/linkcheckerrc.5:604 +#: en/linkcheckerrc.5:531 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:607 +#: en/linkcheckerrc.5:534 msgid "The IP is obfuscated." msgstr "" #. type: TP -#: en/linkcheckerrc.5:607 -#, no-wrap -msgid "B" -msgstr "" - -#. type: Plain text -#: en/linkcheckerrc.5:610 -msgid "The warning regular expression was found in the URL contents." -msgstr "" - -#. type: TP -#: en/linkcheckerrc.5:610 +#: en/linkcheckerrc.5:534 #, no-wrap msgid "B" msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:613 +#: en/linkcheckerrc.5:537 msgid "The URL contains leading or trailing whitespace." msgstr "" #. type: Plain text -#: en/linkcheckerrc.5:616 +#: en/linkcheckerrc.5:540 msgid "linkchecker(1)" msgstr "" diff --git a/doc/upgrading.txt b/doc/upgrading.txt index 52f1a9855..89e71e831 100644 --- a/doc/upgrading.txt +++ b/doc/upgrading.txt @@ -1,5 +1,43 @@ Upgrading ========= +Migrating from 8.x to 9.0 +------------------------- +The Python requests module is now required. + +Several checks have been moved to plugins (see below). +Plugins have to be enabled in the configuration file. + +The following commandline and configuration options have been deprecated +and do not have any effect: +--anchors, anchors: moved to plugin AnchorCheck +--check-css, checkcss: moved to plugin CssSyntaxCheck +--check-html, checkhtml: moved to plugin HtmlSyntaxCheck +--complete: feature removed +--cookies, sendcookies, storecookies: cookies are sent/stored per default +--pause, wait: replaced with numrequestspersecond +--scan-virus, scanvirus: moved to plugin VirusCheck +--warning-regex: moved to plugin RegexCheck +--warning-size-bytes, warnsizebytes: feature removed +warnsslcertdaysvalid: moved to plugin SslCertificationCheck + +The "html" logger generates HTML5 documents now. + +The following warnings have been removed: +- http-auth-unauthorized: removed +- http-auth-unknonwn: removed +- http-decompress-error: removed +- http-robots-denied: downgraded to info +- http-moved-permanent: downgraded to info +- http-unsupported-encoding: removed +- https-certificate-error: is an error now +- mail-unverified-address: removed +- mail-no-connection: removed +- syntax-css: moved to plugin +- syntax-html: moved to plugin +- url-anchor-not-found: moved to plugin +- url-content-size-unequal: removed +- url-warnregex-found: moved to plugin + Migrating from 8.4 to 8.5 -------------------------- Custom output loggers have been changed. diff --git a/doc/web/content/index.md b/doc/web/content/index.md index adfa80aad..e46dec817 100644 --- a/doc/web/content/index.md +++ b/doc/web/content/index.md @@ -21,8 +21,9 @@ Features - honors robots.txt exclusion protocol - Cookie support - HTML5 support -- HTML and CSS syntax check -- Antivirus check +- [Plugin support](plugins.html) + allowing custom page checks. Currently available are + HTML and CSS syntax checks, Antivirus checks, and more. - Different interfaces: command line, GUI and web interface - ... and a lot more check options documented in the [manual page](man1/linkchecker.1.html). diff --git a/doc/web/content/plugins.md b/doc/web/content/plugins.md new file mode 100644 index 000000000..f7de29c71 --- /dev/null +++ b/doc/web/content/plugins.md @@ -0,0 +1,11 @@ +title: Plugin support +--- + +Plugin documentation +===================== + +Standard plugins +================= + +Custom plugins +=============== diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py index 8648050f6..82ffea331 100644 --- a/linkcheck/HtmlParser/__init__.py +++ b/linkcheck/HtmlParser/__init__.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/HtmlParser/htmllex.c b/linkcheck/HtmlParser/htmllex.c index 82c091db8..9f8f5e383 100644 --- a/linkcheck/HtmlParser/htmllex.c +++ b/linkcheck/HtmlParser/htmllex.c @@ -2612,7 +2612,7 @@ static yyconst flex_int32_t yy_rule_linenum[131] = #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET #line 1 "htmllex.l" -/* Copyright (C) 2000-2012 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2951,6 +2951,10 @@ int yyget_lineno (yyscan_t yyscanner ); void yyset_lineno (int line_number ,yyscan_t yyscanner ); +int yyget_column (yyscan_t yyscanner ); + +void yyset_column (int column_no ,yyscan_t yyscanner ); + /* %if-bison-bridge */ YYSTYPE * yyget_lval (yyscan_t yyscanner ); @@ -3132,7 +3136,7 @@ YY_DECL /*********************** EOF ************************/ -#line 3135 "htmllex.c" +#line 3139 "htmllex.c" yylval = yylval_param; @@ -4683,7 +4687,7 @@ YY_RULE_SETUP #line 1091 "htmllex.l" ECHO; YY_BREAK -#line 4686 "htmllex.c" +#line 4690 "htmllex.c" case YY_END_OF_BUFFER: { diff --git a/linkcheck/HtmlParser/htmllex.l b/linkcheck/HtmlParser/htmllex.l index 7e2c6b180..c3e2a1392 100644 --- a/linkcheck/HtmlParser/htmllex.l +++ b/linkcheck/HtmlParser/htmllex.l @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2012 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py index 80d7f20e2..8b53759e0 100644 --- a/linkcheck/HtmlParser/htmllib.py +++ b/linkcheck/HtmlParser/htmllib.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2009 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/HtmlParser/htmlparse.c b/linkcheck/HtmlParser/htmlparse.c index 59b80e7ea..b1de1b70e 100644 --- a/linkcheck/HtmlParser/htmlparse.c +++ b/linkcheck/HtmlParser/htmlparse.c @@ -68,7 +68,7 @@ /* Line 268 of yacc.c */ #line 1 "htmlparse.y" -/* Copyright (C) 2000-2011 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/linkcheck/HtmlParser/htmlparse.y b/linkcheck/HtmlParser/htmlparse.y index 5e8ab7eb8..088694646 100644 --- a/linkcheck/HtmlParser/htmlparse.y +++ b/linkcheck/HtmlParser/htmlparse.y @@ -1,5 +1,5 @@ %{ -/* Copyright (C) 2000-2011 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/linkcheck/HtmlParser/htmlsax.h b/linkcheck/HtmlParser/htmlsax.h index 9fde95433..c5812c5f8 100644 --- a/linkcheck/HtmlParser/htmlsax.h +++ b/linkcheck/HtmlParser/htmlsax.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2010 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index cebdbb89a..ca3e8090f 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -68,12 +68,14 @@ def get_config_dir (): LOG_CACHE = "linkcheck.cache" LOG_GUI = "linkcheck.gui" LOG_THREAD = "linkcheck.thread" +LOG_PLUGIN = "linkcheck.plugin" lognames = { "cmdline": LOG_CMDLINE, "checking": LOG_CHECK, "cache": LOG_CACHE, "gui": LOG_GUI, "thread": LOG_THREAD, + "plugin": LOG_PLUGIN, "all": LOG_ROOT, } lognamelist = ", ".join(repr(name) for name in lognames) diff --git a/linkcheck/ansicolor.py b/linkcheck/ansicolor.py index 9ec804622..558070f42 100644 --- a/linkcheck/ansicolor.py +++ b/linkcheck/ansicolor.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/__init__.py b/linkcheck/bookmarks/__init__.py index 067bb316e..a5bad89a8 100644 --- a/linkcheck/bookmarks/__init__.py +++ b/linkcheck/bookmarks/__init__.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/chrome.py b/linkcheck/bookmarks/chrome.py index 9b1b239db..39885d09f 100644 --- a/linkcheck/bookmarks/chrome.py +++ b/linkcheck/bookmarks/chrome.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011-2012 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/chromium.py b/linkcheck/bookmarks/chromium.py index 6666b6440..31fb8b8bd 100644 --- a/linkcheck/bookmarks/chromium.py +++ b/linkcheck/bookmarks/chromium.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011-2012 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/firefox.py b/linkcheck/bookmarks/firefox.py index fa4262005..2e95a1fbf 100644 --- a/linkcheck/bookmarks/firefox.py +++ b/linkcheck/bookmarks/firefox.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/opera.py b/linkcheck/bookmarks/opera.py index e543d893b..80dc0d404 100644 --- a/linkcheck/bookmarks/opera.py +++ b/linkcheck/bookmarks/opera.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011-2012 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/bookmarks/safari.py b/linkcheck/bookmarks/safari.py index 73cc6601a..37eafd7fd 100644 --- a/linkcheck/bookmarks/safari.py +++ b/linkcheck/bookmarks/safari.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011-2012 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/cache/__init__.py b/linkcheck/cache/__init__.py index cd9775f8a..29e47758d 100644 --- a/linkcheck/cache/__init__.py +++ b/linkcheck/cache/__init__.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2009 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/cache/connection.py b/linkcheck/cache/connection.py deleted file mode 100644 index 4aeee3f2d..000000000 --- a/linkcheck/cache/connection.py +++ /dev/null @@ -1,223 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2014 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Store and retrieve open connections. -""" - -import time -from .. import log, LOG_CACHE -from ..decorators import synchronized -from ..lock import get_lock, get_semaphore -from ..containers import enum - -_lock = get_lock("connection") -_wait_lock = get_lock("connwait") - -ConnectionTypes = ("ftp", "http", "https") -ConnectionState = enum("available", "busy") - - -def get_connection_id(connection): - """Return unique id for connection object.""" - return id(connection) - - -def is_expired(curtime, conn_data): - """Test if connection is expired.""" - return (curtime+5.0) >= conn_data[2] - - -class ConnectionPool (object): - """Thread-safe cache, storing a set of connections for URL retrieval.""" - - def __init__ (self, limits, wait=0): - """ - Initialize an empty connection dictionary which will have the form: - {(type, host, port) -> (lock, {id -> [connection, state, expiration time]})} - - Connection can be any open connection object (HTTP, FTP, ...). - State is of type ConnectionState (either 'available' or 'busy'). - Expiration time is the point of time in seconds when this - connection will be timed out. - - The type is the connection type and an either 'ftp' or 'http'. - The host is the hostname as string, port the port number as an integer. - - For each type, the maximum number of connections to one single host is defined - in limits. - """ - # open connections - self.connections = {} - # {host -> due time} - self.times = {} - # {host -> wait} - self.host_waits = {} - if wait < 0: - raise ValueError("negative wait value %d" % wait) - self.wait = wait - # {connection type -> max number of connections to one host} - self.limits = limits - - @synchronized(_wait_lock) - def host_wait (self, host, wait): - """Set a host specific time to wait between requests.""" - if wait < 0: - raise ValueError("negative wait value %d" % wait) - self.host_waits[host] = wait - - @synchronized(_wait_lock) - def wait_for_host (self, host): - """Honor wait time for given host.""" - t = time.time() - if host in self.times: - due_time = self.times[host] - if due_time > t: - wait = due_time - t - log.debug(LOG_CACHE, - "waiting for %.01f seconds on connection to %s", wait, host) - time.sleep(wait) - t = time.time() - self.times[host] = t + self.host_waits.get(host, self.wait) - - def _add (self, type, host, port, create_connection): - """Add connection to the pool with given parameters. - - @param type: the connection scheme (eg. http) - @ptype type: string - @param host: the hostname - @ptype host: string - @param port: the port number - @ptype port: int - @param create_connection: function to create a new connection object - @ptype create_connection: callable - @return: newly created connection - @rtype: HTTP(S)Connection or FTPConnection - """ - self.wait_for_host(host) - connection = create_connection(type, host, port) - cid = get_connection_id(connection) - expiration = None - conn_data = [connection, 'busy', expiration] - key = (type, host, port) - if key in self.connections: - lock, entries = self.connections[key] - entries[cid] = conn_data - else: - lock = get_semaphore("%s:%d" % (host, port), self.limits[type]) - lock.acquire() - log.debug(LOG_CACHE, "Acquired lock for %s://%s:%d" % key) - entries = {cid: conn_data} - self.connections[key] = (lock, entries) - return connection - - @synchronized(_lock) - def get (self, type, host, port, create_connection): - """Get open connection if available or create a new one. - - @param type: connection type - @ptype type: ConnectionType - @param host: hostname - @ptype host: string - @param port: port number - @ptype port: int - @return: Open connection object or None if none is available. - @rtype None or FTPConnection or HTTP(S)Connection - """ - assert type in ConnectionTypes, 'invalid type %r' % type - # 65536 == 2**16 - assert 0 < port < 65536, 'invalid port number %r' % port - key = (type, host, port) - if key not in self.connections: - return self._add(type, host, port, create_connection) - lock, entries = self.connections[key] - if not lock.acquire(False): - log.debug(LOG_CACHE, "wait for %s connection to %s:%d", - type, host, port) - return lock - log.debug(LOG_CACHE, "Acquired lock for %s://%s:%d" % key) - # either a connection is available or a new one can be created - t = time.time() - delete_entries = [] - try: - for id, conn_data in entries.items(): - if conn_data[1] == ConnectionState.available: - if is_expired(t, conn_data): - delete_entries.append(id) - else: - conn_data[1] = ConnectionState.busy - log.debug(LOG_CACHE, - "reusing connection %s timing out in %.01f seconds", - key, (conn_data[2] - t)) - return conn_data[0] - finally: - for id in delete_entries: - del entries[id] - # make a new connection - return self._add(type, host, port, create_connection) - - @synchronized(_lock) - def release (self, type, host, port, connection, expiration=None): - """Release a used connection.""" - key = (type, host, port) - if key in self.connections: - lock, entries = self.connections[key] - id = get_connection_id(connection) - if id in entries: - log.debug(LOG_CACHE, "Release lock for %s://%s:%d and expiration %s", type, host, port, expiration) - # if the connection is reusable, set it to available, else delete it - if expiration is None: - del entries[id] - else: - entries[id][1] = ConnectionState.available - entries[id][2] = expiration - lock.release() - else: - log.warn(LOG_CACHE, "Release unknown connection %s://%s:%d from entries %s", type, host, port, entries.keys()) - else: - log.warn(LOG_CACHE, "Release unknown connection %s://%s:%d", type, host, port) - - @synchronized(_lock) - def remove_expired (self): - """Remove expired or soon to be expired connections from this pool.""" - t = time.time() - for lock, entries in self.connections.values(): - delete_entries = [] - for id, conn_data in entries.items(): - if conn_data[1] == 'available' and (t+5.0) >= conn_data[2]: - try_close(conn_data[0]) - delete_entries.add(id) - for id in delete_entries: - del entries[id] - lock.release() - log.debug(LOG_CACHE, "released lock for id %s", id) - - @synchronized(_lock) - def clear (self): - """Remove all connections from this cache, even if busy.""" - for lock, entries in self.connections.values(): - for conn_data in entries.values(): - try_close(conn_data[0]) - self.connections.clear() - - -def try_close (connection): - """Close and remove a connection (not thread-safe, internal use only).""" - try: - connection.close() - except Exception: - # ignore close errors - pass diff --git a/linkcheck/cache/cookie.py b/linkcheck/cache/cookie.py deleted file mode 100644 index c960a418a..000000000 --- a/linkcheck/cache/cookie.py +++ /dev/null @@ -1,83 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2014 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Store and retrieve cookies. -""" -from .. import log, LOG_CACHE, cookies -from ..decorators import synchronized -from ..lock import get_lock - - -_lock = get_lock("cookie") - -class CookieJar (object): - """Cookie storage, implementing the cookie handling policy.""" - - def __init__ (self): - """Initialize empty cookie cache.""" - # Store all cookies in a set. - self.cache = set() - - @synchronized(_lock) - def add (self, headers, scheme, host, path): - """Parse cookie values, add to cache.""" - errors = [] - for h in headers.getallmatchingheaders("Set-Cookie"): - # RFC 2109 (Netscape) cookie type - name, value = h.split(':', 1) - try: - cookie = cookies.NetscapeCookie(value, scheme, host, path) - if cookie in self.cache: - self.cache.remove(cookie) - if not cookie.is_expired(): - self.cache.add(cookie) - except cookies.CookieError as msg: - errmsg = "Invalid cookie %r for %s:%s%s: %s" % ( - h, scheme, host, path, msg) - errors.append(errmsg) - for h in headers.getallmatchingheaders("Set-Cookie2"): - # RFC 2965 cookie type - name, value = h.split(':', 1) - try: - cookie = cookies.Rfc2965Cookie(value, scheme, host, path) - if cookie in self.cache: - self.cache.remove(cookie) - if not cookie.is_expired(): - self.cache.add(cookie) - except cookies.CookieError as msg: - errmsg = "Invalid cookie2 %r for %s:%s%s: %s" % ( - h, scheme, host, path, msg) - errors.append(errmsg) - return errors - - @synchronized(_lock) - def get (self, scheme, host, port, path): - """Cookie cache getter function. Return ordered list of cookies - which match the given host, port and path. - Cookies with more specific paths are listed first.""" - cookies = [x for x in self.cache if x.check_expired() and \ - x.is_valid_for(scheme, host, port, path)] - # order cookies with more specific (ie. longer) paths first - cookies.sort(key=lambda c: len(c.attributes['path']), reverse=True) - log.debug(LOG_CACHE, "Found %d cookies for host %r path %r", - len(cookies), host, path) - return cookies - - @synchronized(_lock) - def __str__ (self): - """Return stored cookies as string.""" - return "" % self.cache diff --git a/linkcheck/cache/robots_txt.py b/linkcheck/cache/robots_txt.py index 113d4e9e2..2b0ec4a4d 100644 --- a/linkcheck/cache/robots_txt.py +++ b/linkcheck/cache/robots_txt.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2012 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/cache/urlqueue.py b/linkcheck/cache/urlqueue.py index 8daee3e99..fe3f26a56 100644 --- a/linkcheck/cache/urlqueue.py +++ b/linkcheck/cache/urlqueue.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -23,9 +23,6 @@ from .. import log, LOG_CACHE -LARGE_QUEUE_THRESHOLD = 1000 -FRONT_CHUNK_SIZE = 100 - class Timeout (StandardError): """Raised by join()""" pass @@ -55,8 +52,8 @@ def __init__ (self, max_allowed_puts=None): self.all_tasks_done = threading.Condition(self.mutex) self.unfinished_tasks = 0 self.finished_tasks = 0 - self.in_progress = {} - self.seen = {} + self.in_progress = 0 + self.seen = set() self.shutdown = False # Each put() decreases the number of allowed puts. # This way we can restrict the number of URLs that are checked. @@ -103,24 +100,29 @@ def _get (self, timeout): if remaining <= 0.0: raise Empty() self.not_empty.wait(remaining) - url_data = self.queue.popleft() - if url_data.has_result: - # Already checked and copied from cache. - pass - else: - key = url_data.cache_url_key - assert key is not None - self.in_progress[key] = url_data - return url_data + self.in_progress += 1 + return self.queue.popleft() def put (self, item): """Put an item into the queue. Block if necessary until a free slot is available. """ + if self.put_denied(item): + return with self.mutex: self._put(item) self.not_empty.notify() + def put_denied(self, url_data): + """Determine if put() will not append the item on the queue. + @return True (reliable) or False (unreliable) + """ + if self.shutdown or self.allowed_puts == 0: + return True + if url_data.cache_url_key is not None and url_data.cache_url_key in self.seen: + return True + return False + def _put (self, url_data): """Put URL in queue, increase number of unfished tasks.""" if self.shutdown: @@ -133,17 +135,16 @@ def _put (self, url_data): self.allowed_puts -= 1 log.debug(LOG_CACHE, "queueing %s", url_data) key = url_data.cache_url_key - # cache key is None for URLs with invalid syntax - assert key is not None or url_data.has_result, "invalid cache key in %s" % url_data - if key in self.seen: - self.seen[key] += 1 - if key is not None: - # do not check duplicate URLs + if key is not None: + if key in self.seen: + # don't check duplicate URLs return - else: - self.seen[key] = 0 - self.queue.append(url_data) + self.seen.add(key) self.unfinished_tasks += 1 + if url_data.has_result: + self.queue.appendleft(url_data) + else: + self.queue.append(url_data) def task_done (self, url_data): """ @@ -163,17 +164,11 @@ def task_done (self, url_data): with self.all_tasks_done: log.debug(LOG_CACHE, "task_done %s", url_data) # check for aliases (eg. through HTTP redirections) - if hasattr(url_data, "aliases"): - for key in url_data.aliases: - if key in self.seen: - self.seen[key] += 1 - else: - self.seen[key] = 0 - key = url_data.cache_url_key - if key in self.in_progress: - del self.in_progress[key] + if hasattr(url_data, "aliases") and url_data.aliases: + self.seen.update(url_data.aliases) self.finished_tasks += 1 self.unfinished_tasks -= 1 + self.in_progress -= 1 if self.unfinished_tasks <= 0: if self.unfinished_tasks < 0: raise ValueError('task_done() called too many times') @@ -216,7 +211,5 @@ def do_shutdown (self): def status (self): """Get tuple (finished tasks, in progress, queue size).""" - with self.mutex: - return (self.finished_tasks, - len(self.in_progress), len(self.queue)) - + # no need to acquire self.mutex since the numbers are unreliable anyways. + return (self.finished_tasks, self.in_progress, len(self.queue)) diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py index 31e66e912..a859162c9 100644 --- a/linkcheck/checker/__init__.py +++ b/linkcheck/checker/__init__.py @@ -101,43 +101,46 @@ def get_url_from (base_url, recursion_level, aggregate, base_ref = strformat.unicode_safe(base_ref) name = strformat.unicode_safe(name) url = absolute_url(base_url_stripped, base_ref, parent_url).lower() + scheme = None if not (url or name): # use filename as base url, with slash as path seperator name = base_url.replace("\\", "/") - if parent_content_type == 'application/x-httpd-php' and \ - '' in base_url and url.startswith('file:'): - # ignore but warn about URLs from local PHP files with execution directives + elif ":" in url: + scheme = url.split(":", 1)[0].lower() + allowed_schemes = aggregate.config["allowedschemes"] + # ignore local PHP files with execution directives + local_php = (parent_content_type == 'application/x-httpd-php' and + '' in base_url and scheme == 'file') + if local_php or (allowed_schemes and scheme not in allowed_schemes): klass = ignoreurl.IgnoreUrl else: - assume_local_file = recursion_level == 0 - klass = get_urlclass_from(url, assume_local_file=assume_local_file) + assume_local_file = (recursion_level == 0) + klass = get_urlclass_from(scheme, assume_local_file=assume_local_file) log.debug(LOG_CHECK, "%s handles url %s", klass.__name__, base_url) return klass(base_url, recursion_level, aggregate, parent_url=parent_url, base_ref=base_ref, line=line, column=column, name=name, extern=extern) -def get_urlclass_from (url, assume_local_file=False): - """Return checker class for given URL. If URL does not start - with a URL scheme and assume_local_file is True, assume that - the given URL is a local file.""" - if url.startswith("http:"): +def get_urlclass_from (scheme, assume_local_file=False): + """Return checker class for given URL scheme. If the scheme + cannot be matched and assume_local_file is True, assume a local file. + """ + if scheme in ("http", "https"): klass = httpurl.HttpUrl - elif url.startswith("ftp:"): + elif scheme == "ftp": klass = ftpurl.FtpUrl - elif url.startswith("file:"): + elif scheme == "file": klass = fileurl.FileUrl - elif url.startswith("telnet:"): + elif scheme == "telnet": klass = telneturl.TelnetUrl - elif url.startswith("mailto:"): + elif scheme == "mailto": klass = mailtourl.MailtoUrl - elif url.startswith("https:"): - klass = httpsurl.HttpsUrl - elif url.startswith(("nntp:", "news:", "snews:")): + elif scheme in ("nntp", "news", "snews"): klass = nntpurl.NntpUrl - elif url.startswith('dns:'): + elif scheme == "dns": klass = dnsurl.DnsUrl - elif unknownurl.is_unknown_url(url): + elif scheme and unknownurl.is_unknown_scheme(scheme): klass = unknownurl.UnknownUrl elif assume_local_file: klass = fileurl.FileUrl @@ -168,4 +171,4 @@ def get_index_html (urls): # all the URL classes from . import (fileurl, unknownurl, ftpurl, httpurl, dnsurl, - httpsurl, mailtourl, telneturl, nntpurl, ignoreurl) + mailtourl, telneturl, nntpurl, ignoreurl) diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py index 1bc841aa2..9b98def22 100644 --- a/linkcheck/checker/const.py +++ b/linkcheck/checker/const.py @@ -21,8 +21,8 @@ import select import nntplib import ftplib -import httplib as orighttplib -from .. import LinkCheckerError, httplib2 as httplib +import requests +from .. import LinkCheckerError from dns.exception import DNSException # Catch these exception on syntax checks. @@ -45,9 +45,8 @@ nntplib.error_perm, nntplib.error_proto, EOFError, - # http error - httplib.error, - orighttplib.error, + # http errors + requests.exceptions.RequestException, # ftp errors ftplib.error_reply, ftplib.error_temp, @@ -75,39 +74,25 @@ # some constants URL_MAX_LENGTH = 2000 -URL_WARN_LENGTH = 255 +URL_WARN_LENGTH = 1024 # the warnings WARN_URL_EFFECTIVE_URL = "url-effective-url" WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content" -WARN_URL_ANCHOR_NOT_FOUND = "url-anchor-not-found" -WARN_URL_WARNREGEX_FOUND = "url-warnregex-found" WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large" WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero" -WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal" WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip" WARN_URL_TOO_LONG = "url-too-long" WARN_URL_WHITESPACE = "url-whitespace" WARN_FILE_MISSING_SLASH = "file-missing-slash" WARN_FILE_SYSTEM_PATH = "file-system-path" WARN_FTP_MISSING_SLASH = "ftp-missing-slash" -WARN_HTTP_ROBOTS_DENIED = "http-robots-denied" -WARN_HTTP_MOVED_PERMANENT = "http-moved-permanent" WARN_HTTP_EMPTY_CONTENT = "http-empty-content" WARN_HTTP_COOKIE_STORE_ERROR = "http-cookie-store-error" -WARN_HTTP_DECOMPRESS_ERROR = "http-decompress-error" -WARN_HTTP_UNSUPPORTED_ENCODING = "http-unsupported-encoding" -WARN_HTTP_AUTH_UNKNOWN = "http-auth-unknonwn" -WARN_HTTP_AUTH_UNAUTHORIZED = "http-auth-unauthorized" -WARN_HTTPS_CERTIFICATE = "https-certificate-error" WARN_IGNORE_URL = "ignore-url" WARN_MAIL_NO_MX_HOST = "mail-no-mx-host" -WARN_MAIL_UNVERIFIED_ADDRESS = "mail-unverified-address" -WARN_MAIL_NO_CONNECTION = "mail-no-connection" WARN_NNTP_NO_SERVER = "nntp-no-server" WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup" -WARN_SYNTAX_HTML = "syntax-html" -WARN_SYNTAX_CSS = "syntax-css" # registered warnings Warnings = { @@ -115,41 +100,20 @@ _("The effective URL is different from the original."), WARN_URL_ERROR_GETTING_CONTENT: _("Could not get the content of the URL."), - WARN_URL_ANCHOR_NOT_FOUND: _("URL anchor was not found."), - WARN_URL_WARNREGEX_FOUND: - _("The warning regular expression was found in the URL contents."), WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."), WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."), - WARN_URL_CONTENT_SIZE_UNEQUAL: _("The URL content size and download size are unequal."), WARN_URL_TOO_LONG: _("The URL is longer than the recommended size."), WARN_URL_WHITESPACE: _("The URL contains leading or trailing whitespace."), WARN_FILE_MISSING_SLASH: _("The file: URL is missing a trailing slash."), WARN_FILE_SYSTEM_PATH: _("The file: path is not the same as the system specific path."), WARN_FTP_MISSING_SLASH: _("The ftp: URL is missing a trailing slash."), - WARN_HTTP_ROBOTS_DENIED: _("The http: URL checking has been denied."), - WARN_HTTP_MOVED_PERMANENT: _("The URL has moved permanently."), WARN_HTTP_EMPTY_CONTENT: _("The URL had no content."), WARN_HTTP_COOKIE_STORE_ERROR: _("An error occurred while storing a cookie."), - WARN_HTTP_DECOMPRESS_ERROR: - _("An error occurred while decompressing the URL content."), - WARN_HTTP_UNSUPPORTED_ENCODING: - _("The URL content is encoded with an unknown encoding."), - WARN_HTTP_AUTH_UNKNOWN: - _("Unsupported HTTP authentication method."), - WARN_HTTP_AUTH_UNAUTHORIZED: - _("Unauthorized access without HTTP authentication."), - WARN_HTTPS_CERTIFICATE: _("The SSL certificate is invalid or expired."), WARN_IGNORE_URL: _("The URL has been ignored."), WARN_MAIL_NO_MX_HOST: _("The mail MX host could not be found."), - WARN_MAIL_UNVERIFIED_ADDRESS: - _("The mailto: address could not be verified."), - WARN_MAIL_NO_CONNECTION: - _("No connection to a MX host could be established."), WARN_NNTP_NO_SERVER: _("No NNTP server was found."), WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."), WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."), - WARN_SYNTAX_HTML: _("HTML syntax error."), - WARN_SYNTAX_CSS: _("CSS syntax error."), } diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index bbce0edd7..af7129f65 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ import urllib2 from datetime import datetime -from . import urlbase, get_index_html, get_url_from +from . import urlbase, get_index_html from .. import log, LOG_CHECK, fileutil, LinkCheckerError, url as urlutil from ..bookmarks import firefox from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH @@ -163,8 +163,6 @@ def add_size_info (self): return filename = self.get_os_filename() self.size = fileutil.get_size(filename) - if self.dlsize == -1: - self.dlsize = self.size self.modified = datetime.utcfromtimestamp(fileutil.get_mtime(filename)) def check_connection (self): @@ -203,16 +201,13 @@ def check_case_sensitivity (self): def read_content (self): """Return file content, or in case of directories a dummy HTML file with links to the files.""" - if self.size > self.MaxFilesizeBytes: - raise LinkCheckerError(_("File size too large")) if self.is_directory(): data = get_index_html(get_files(self.get_os_filename())) if isinstance(data, unicode): data = data.encode("iso8859-1", "ignore") - size = len(data) else: - data, size = super(FileUrl, self).read_content() - return data, size + data = super(FileUrl, self).read_content() + return data def is_html (self): """Check if file is a HTML file.""" @@ -272,27 +267,6 @@ def is_parseable (self): log.debug(LOG_CHECK, "File with content type %r is not parseable.", ctype) return False - def parse_url (self): - """Parse file contents for new links to check.""" - if self.is_directory(): - self.parse_html() - elif firefox.has_sqlite and firefox.extension.search(self.url): - self.parse_firefox() - else: - mime = self.get_content_type() - key = self.ContentMimetypes[mime] - getattr(self, "parse_"+key)() - self.add_num_url_info() - - def parse_firefox (self): - """Parse a Firefox3 bookmark file.""" - log.debug(LOG_CHECK, "Parsing Firefox bookmarks %s", self) - filename = self.get_os_filename() - for url, name in firefox.parse_bookmark_file(filename): - url_data = get_url_from(url, self.recursion_level+1, - self.aggregate, parent_url=self.url, name=name) - self.aggregate.urlqueue.put(url_data) - def get_content_type (self): """Return URL content type, or an empty string if content type could not be found.""" @@ -326,6 +300,5 @@ def add_url (self, url, line=0, column=0, name=u"", base=None): webroot = self.aggregate.config["localwebroot"] if webroot and url and url.startswith(u"/"): url = webroot + url[1:] - log.debug(LOG_CHECK, "Applied local webroot `%s' to `%s'.", - webroot, url) + log.debug(LOG_CHECK, "Applied local webroot `%s' to `%s'.", webroot, url) super(FileUrl, self).add_url(url, line=line, column=column, name=name, base=base) diff --git a/linkcheck/checker/ftpurl.py b/linkcheck/checker/ftpurl.py index 6572ee73d..db0eb4315 100644 --- a/linkcheck/checker/ftpurl.py +++ b/linkcheck/checker/ftpurl.py @@ -22,11 +22,11 @@ from cStringIO import StringIO from .. import log, LOG_CHECK, LinkCheckerError, fileutil -from . import proxysupport, httpurl, internpaturl, get_index_html, pooledconnection +from . import proxysupport, httpurl, internpaturl, get_index_html from .const import WARN_FTP_MISSING_SLASH -class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledconnection.PooledConnection): +class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): """ Url link with ftp scheme. """ @@ -70,14 +70,9 @@ def check_connection (self): def login (self): """Log into ftp server and check the welcome message.""" - def create_connection(scheme, host, port): - """Create a new ftp connection.""" - connection = ftplib.FTP(timeout=self.aggregate.config["timeout"]) - if log.is_debug(LOG_CHECK): - connection.set_debuglevel(1) - return connection - scheme, host, port = self.get_netloc() - self.get_pooled_connection(scheme, host, port, create_connection) + self.url_connection = ftplib.FTP(timeout=self.aggregate.config["timeout"]) + if log.is_debug(LOG_CHECK): + self.url_connection.set_debuglevel(1) try: self.url_connection.connect(self.host, self.port) _user, _password = self.get_user_password() @@ -92,6 +87,7 @@ def create_connection(scheme, host, port): # note that the info may change every time a user logs in, # so don't add it to the url_data info. log.debug(LOG_CHECK, "FTP info %s", info) + pass else: raise LinkCheckerError(_("Got no answer from FTP server")) except EOFError as msg: @@ -105,6 +101,7 @@ def negotiate_encoding (self): features = self.url_connection.sendcmd("FEAT") except ftplib.error_perm as msg: log.debug(LOG_CHECK, "Ignoring error when getting FTP features: %s" % msg) + pass else: log.debug(LOG_CHECK, "FTP features %s", features) if " UTF-8" in features.splitlines(): @@ -176,7 +173,7 @@ def is_parseable (self): """See if URL target is parseable for recursion.""" if self.is_directory(): return True - ctype = self.get_content_type(self.get_content) + ctype = self.get_content_type() if ctype in self.ContentMimetypes: return True log.debug(LOG_CHECK, "URL with content type %r is not parseable.", ctype) @@ -188,20 +185,11 @@ def is_directory (self): path = self.urlparts[2] return (not path) or path.endswith('/') - def parse_url (self): - """Parse URL target for links.""" - if self.is_directory(): - self.parse_html() - return - key = self.ContentMimetypes[self.get_content_type(self.get_content)] - getattr(self, "parse_"+key)() - self.add_num_url_info() - - def get_content_type (self, read=None): + def get_content_type (self): """Return URL content type, or an empty string if content type could not be found.""" if self.content_type is None: - self.content_type = fileutil.guess_mimetype(self.url, read=read) + self.content_type = fileutil.guess_mimetype(self.url, read=self.get_content) return self.content_type def read_content (self): @@ -210,6 +198,7 @@ def read_content (self): if self.is_directory(): self.url_connection.cwd(self.filename) self.files = self.get_files() + # XXX limit number of files? data = get_index_html(self.files) else: # download file in BINARY mode @@ -217,20 +206,20 @@ def read_content (self): buf = StringIO() def stor_data (s): """Helper method storing given data""" - self.aggregate.add_download_data(self.cache_content_key, s) # limit the download size - if (buf.tell() + len(s)) > self.MaxFilesizeBytes: + if (buf.tell() + len(s)) > self.max_size: raise LinkCheckerError(_("FTP file size too large")) buf.write(s) self.url_connection.retrbinary(ftpcmd, stor_data) data = buf.getvalue() buf.close() - return data, len(data) + return data def close_connection (self): """Release the open connection from the connection pool.""" - if self.url_connection is None: - return - scheme, host, port = self.get_netloc() - self.aggregate.connections.release(scheme, host, port, self.url_connection) - self.url_connection = None + if self.url_connection is not None: + try: + self.url_connection.quit() + except Exception: + pass + self.url_connection = None diff --git a/linkcheck/checker/httpheaders.py b/linkcheck/checker/httpheaders.py index a943d02c1..3c9d88226 100644 --- a/linkcheck/checker/httpheaders.py +++ b/linkcheck/checker/httpheaders.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2012 Bastian Kleineidam +# Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/httpsurl.py b/linkcheck/checker/httpsurl.py deleted file mode 100644 index 1636cefc5..000000000 --- a/linkcheck/checker/httpsurl.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2014 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Handle https links. -""" -import time -from . import httpurl -from .const import WARN_HTTPS_CERTIFICATE -from .. import log, LOG_CHECK, strformat - - -class HttpsUrl (httpurl.HttpUrl): - """ - Url link with https scheme. - """ - - def local_check (self): - """ - Check connection if SSL is supported, else ignore. - """ - if httpurl.supportHttps: - super(HttpsUrl, self).local_check() - else: - self.add_info(_("%s URL ignored.") % self.scheme.capitalize()) - - def get_http_object (self, scheme, host, port): - """Open a HTTP connection and check the SSL certificate.""" - super(HttpsUrl, self).get_http_object(scheme, host, port) - self.check_ssl_certificate(self.url_connection.sock, host) - - def check_ssl_certificate(self, ssl_sock, host): - """Run all SSL certificate checks that have not yet been done. - OpenSSL already checked the SSL notBefore and notAfter dates. - """ - if not hasattr(ssl_sock, "getpeercert"): - # the URL was a HTTPS -> HTTP redirect - return - cert = ssl_sock.getpeercert() - log.debug(LOG_CHECK, "Got SSL certificate %s", cert) - if not cert: - return - if 'subject' in cert: - self.check_ssl_hostname(ssl_sock, cert, host) - else: - msg = _('certificate did not include "subject" information') - self.add_ssl_warning(ssl_sock, msg) - if 'notAfter' in cert: - self.check_ssl_valid_date(ssl_sock, cert) - else: - msg = _('certificate did not include "notAfter" information') - self.add_ssl_warning(ssl_sock, msg) - - def check_ssl_hostname(self, ssl_sock, cert, host): - """Check the hostname against the certificate according to - RFC2818. - """ - try: - match_hostname(cert, host) - except CertificateError as msg: - self.add_ssl_warning(ssl_sock, msg) - - def check_ssl_valid_date(self, ssl_sock, cert): - """Check if the certificate is still valid, or if configured check - if it's at least a number of days valid. - """ - import ssl - checkDaysValid = self.aggregate.config["warnsslcertdaysvalid"] - try: - notAfter = ssl.cert_time_to_seconds(cert['notAfter']) - except ValueError as msg: - msg = _('invalid certficate "notAfter" value %r') % cert['notAfter'] - self.add_ssl_warning(ssl_sock, msg) - return - curTime = time.time() - # Calculate seconds until certifcate expires. Can be negative if - # the certificate is already expired. - secondsValid = notAfter - curTime - if secondsValid < 0: - msg = _('certficate is expired on %s') % cert['notAfter'] - self.add_ssl_warning(ssl_sock, msg) - elif checkDaysValid > 0 and \ - secondsValid < (checkDaysValid * strformat.SECONDS_PER_DAY): - strSecondsValid = strformat.strduration_long(secondsValid) - msg = _('certificate is only %s valid') % strSecondsValid - self.add_ssl_warning(ssl_sock, msg) - - def add_ssl_warning(self, ssl_sock, msg): - """Add a warning message about an SSL certificate error.""" - cipher_name, ssl_protocol, secret_bits = ssl_sock.cipher() - err = _(u"SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s.") - attrs = dict(msg=msg, cipher=cipher_name, protocol=ssl_protocol) - self.add_warning(err % attrs, tag=WARN_HTTPS_CERTIFICATE) - - -# Copied from ssl.py in Python 3: -# Wrapper module for _ssl, providing some additional facilities -# implemented in Python. Written by Bill Janssen. -import re - -class CertificateError(ValueError): - """Raised on certificate errors.""" - pass - - -def _dnsname_to_pat(dn, max_wildcards=1): - """Convert a DNS certificate name to a hostname matcher.""" - pats = [] - for frag in dn.split(r'.'): - if frag.count('*') > max_wildcards: - # Issue #17980: avoid denials of service by refusing more - # than one wildcard per fragment. A survery of established - # policy among SSL implementations showed it to be a - # reasonable choice. - raise CertificateError( - "too many wildcards in certificate DNS name: " + repr(dn)) - if frag == '*': - # When '*' is a fragment by itself, it matches a non-empty dotless - # fragment. - pats.append('[^.]+') - else: - # Otherwise, '*' matches any dotless fragment. - frag = re.escape(frag) - pats.append(frag.replace(r'\*', '[^.]*')) - return re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE) - - -def match_hostname(cert, hostname): - """Verify that *cert* (in decoded format as returned by - SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 rules - are mostly followed, but IP addresses are not accepted for *hostname*. - - CertificateError is raised on failure. On success, the function - returns nothing. - """ - if not cert: - raise ValueError("empty or no certificate") - dnsnames = [] - san = cert.get('subjectAltName', ()) - for key, value in san: - if key == 'DNS': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if not dnsnames: - # The subject is only checked when there is no dNSName entry - # in subjectAltName - for sub in cert.get('subject', ()): - for key, value in sub: - # XXX according to RFC 2818, the most specific Common Name - # must be used. - if key == 'commonName': - if _dnsname_to_pat(value).match(hostname): - return - dnsnames.append(value) - if len(dnsnames) > 1: - raise CertificateError("hostname %r " - "doesn't match either of %s" - % (hostname, ', '.join(map(repr, dnsnames)))) - elif len(dnsnames) == 1: - raise CertificateError("hostname %r " - "doesn't match %r" - % (hostname, dnsnames[0])) - else: - raise CertificateError("no appropriate commonName or " - "subjectAltName fields were found") diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 06b3a8a76..526229d56 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -18,26 +18,14 @@ Handle http links. """ -import urlparse -import os -import errno -import zlib -import socket -import rfc822 -import time +import requests from cStringIO import StringIO -from datetime import datetime -from .. import (log, LOG_CHECK, gzip2 as gzip, strformat, url as urlutil, - httplib2 as httplib, LinkCheckerError, httputil, configuration) -from . import (internpaturl, proxysupport, httpheaders as headers, urlbase, - get_url_from, pooledconnection) +from .. import (log, LOG_CHECK, strformat, + url as urlutil, LinkCheckerError) +from . import (internpaturl, proxysupport, httpheaders as headers) # import warnings -from .const import WARN_HTTP_ROBOTS_DENIED, \ - WARN_HTTP_MOVED_PERMANENT, \ - WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_COOKIE_STORE_ERROR, \ - WARN_HTTP_DECOMPRESS_ERROR, WARN_HTTP_UNSUPPORTED_ENCODING, \ - WARN_HTTP_AUTH_UNKNOWN, WARN_HTTP_AUTH_UNAUTHORIZED +from .const import WARN_HTTP_EMPTY_CONTENT # assumed HTTP header encoding HEADER_ENCODING = "iso-8859-1" @@ -46,18 +34,7 @@ # helper alias unicode_safe = strformat.unicode_safe -supportHttps = hasattr(httplib, "HTTPSConnection") - -SUPPORTED_ENCODINGS = ('x-gzip', 'gzip', 'deflate') -# Accept-Encoding header value -ACCEPT_ENCODING = ",".join(SUPPORTED_ENCODINGS) -# Accept-Charset header value -ACCEPT_CHARSET = "utf-8,ISO-8859-1;q=0.7,*;q=0.3" -# Accept mime type header value -ACCEPT = "Accept:text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - - -class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledconnection.PooledConnection): +class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): """ Url link with http scheme. """ @@ -67,28 +44,16 @@ def reset (self): Initialize HTTP specific variables. """ super(HttpUrl, self).reset() - self.max_redirects = 5 - self.has301status = False - # flag if connection is persistent - self.persistent = False - # URLs seen through 301/302 redirections + # URLs seen through redirections self.aliases = [] # initialize check data - self.headers = None + self.headers = {} self.auth = None - self.cookies = [] - # temporary data filled when reading redirections - self._data = None - # flag telling if GET method is allowed; determined by robots.txt - self.method_get_allowed = True - # HttpResponse object - self.response = None def allows_robots (self, url): """ Fetch and parse the robots.txt of given url. Checks if LinkChecker - can get the requested resource content. HEAD requests however are - still allowed. + can get the requested resource content. @param url: the url to be requested @type url: string @@ -98,9 +63,8 @@ def allows_robots (self, url): roboturl = self.get_robots_txt_url() user, password = self.get_user_password() rb = self.aggregate.robots_txt - callback = self.aggregate.connections.host_wait - return rb.allows_url(roboturl, url, self.proxy, user, password, - callback=callback) + #callback = self.aggregate.connections.host_wait + return rb.allows_url(roboturl, self.url, self.proxy, user, password) def add_size_info (self): """Get size of URL content from HTTP header.""" @@ -110,8 +74,6 @@ def add_size_info (self): # the content data is always decoded. try: self.size = int(self.getheader("Content-Length")) - if self.dlsize == -1: - self.dlsize = self.size except (ValueError, OverflowError): pass else: @@ -134,164 +96,56 @@ def check_connection (self): - 5xx: Server Error - The server failed to fulfill an apparently valid request """ + self.session = self.aggregate.get_request_session() # set the proxy, so a 407 status after this is an error self.set_proxy(self.aggregate.config["proxy"].get(self.scheme)) self.construct_auth() # check robots.txt if not self.allows_robots(self.url): - # remove all previously stored results - self.add_warning( - _("Access denied by robots.txt, skipping content checks."), - tag=WARN_HTTP_ROBOTS_DENIED) - self.method_get_allowed = False - # first try with HEAD - self.method = "HEAD" + self.add_info(_("Access denied by robots.txt, checked only syntax.")) + self.set_result(_("syntax OK")) + self.do_check_content = False + return # check the http connection - self.check_http_connection() - # redirections might have changed the URL - self.url = urlutil.urlunsplit(self.urlparts) - # check response - if self.response is not None: - self.check_response() - self.close_response() - - def check_http_connection (self): - """ - Check HTTP connection and return get response and a flag - if the check algorithm had to fall back to the GET method. - - @return: response or None if url is already handled - @rtype: HttpResponse or None - """ - while True: - # XXX refactor this - self.close_response() - try: - self._try_http_response() - except httplib.BadStatusLine as msg: - # some servers send empty HEAD replies - if self.method == "HEAD" and self.method_get_allowed: - log.debug(LOG_CHECK, "Bad status line %r: falling back to GET", msg) - self.fallback_to_get() - continue - raise - except socket.error as msg: - # some servers reset the connection on HEAD requests - if self.method == "HEAD" and self.method_get_allowed and \ - msg[0] == errno.ECONNRESET: - self.fallback_to_get() - continue - raise - - uheaders = unicode_safe(self.headers, encoding=HEADER_ENCODING) - log.debug(LOG_CHECK, "Headers: %s", uheaders) - # proxy enforcement (overrides standard proxy) - if self.response.status == 305 and self.headers: - oldproxy = (self.proxy, self.proxyauth) - newproxy = self.getheader("Location") - if newproxy: - self.add_info(_("Enforced proxy `%(name)s'.") % - {"name": newproxy}) - self.set_proxy(newproxy) - self.close_response() - if self.proxy is None: - self.set_result( - _("Missing 'Location' header with enforced proxy status 305, aborting."), - valid=False) - return - elif not self.proxy: - self.set_result( - _("Empty 'Location' header value with enforced proxy status 305, aborting."), - valid=False) - return - self._try_http_response() - # restore old proxy settings - self.proxy, self.proxyauth = oldproxy - try: - tries = self.follow_redirections() - except httplib.BadStatusLine as msg: - # some servers send empty HEAD replies - if self.method == "HEAD" and self.method_get_allowed: - log.debug(LOG_CHECK, "Bad status line %r: falling back to GET", msg) - self.fallback_to_get() - continue - raise - if tries == -1: - log.debug(LOG_CHECK, "already handled") - self.close_response() - self.do_check_content = False - return - if tries >= self.max_redirects: - if self.method == "HEAD" and self.method_get_allowed: - # Microsoft servers tend to recurse HEAD requests - self.fallback_to_get() - continue - self.set_result(_("more than %d redirections, aborting") % - self.max_redirects, valid=False) - self.close_response() - self.do_check_content = False - return - if self.do_fallback(self.response.status): - self.fallback_to_get() - continue - # user authentication - if self.response.status == 401: - authenticate = self.getheader('WWW-Authenticate') - if authenticate is None: - # Either the server intentionally blocked this request, - # or there is a form on this page which requires - # manual user/password input. - # Either way, this is a warning. - self.add_warning(_("Unauthorized access without HTTP authentication."), - tag=WARN_HTTP_AUTH_UNAUTHORIZED) - return - if not authenticate.startswith("Basic"): - # LinkChecker only supports Basic authorization - args = {"auth": authenticate} - self.add_warning( - _("Unsupported HTTP authentication `%(auth)s', " \ - "only `Basic' authentication is supported.") % args, - tag=WARN_HTTP_AUTH_UNKNOWN) - return - if not self.auth: - self.construct_auth() - if self.auth: - continue - break - - def do_fallback(self, status): - """Check for fallback according to response status. - @param status: The HTTP response status - @ptype status: int - @return: True if checker should use GET, else False - @rtype: bool - """ - if self.method == "HEAD": - # Some sites do not support HEAD requests, for example - # youtube sends a 404 with HEAD, 200 with GET. Doh. - # A 405 "Method not allowed" status should also use GET. - if status >= 400: - log.debug(LOG_CHECK, "Method HEAD error %d, falling back to GET", status) - return True - # Other sites send 200 with HEAD, but 404 with GET. Bummer. - poweredby = self.getheader('X-Powered-By', u'') - server = self.getheader('Server', u'') - # Some servers (Zope, Apache Coyote/Tomcat, IIS have wrong - # content type with HEAD. This seems to be a common problem. - if (poweredby.startswith('Zope') or server.startswith('Zope') - or server.startswith('Apache-Coyote') - or ('ASP.NET' in poweredby and 'Microsoft-IIS' in server)): - return True - return False - - def fallback_to_get(self): - """Set method to GET and clear aliases.""" - self.close_response() - self.close_connection() - self.method = "GET" - self.aliases = [] - self.urlparts = strformat.url_unicode_split(self.url) - self.build_url_parts() + request = self.build_request() + self.send_request(request) + self.follow_redirections(request) + self.check_response() + + def build_request(self): + """Build a prepared request object.""" + clientheaders = { + "User-Agent": self.aggregate.config["useragent"], + "DNT": "1", + } + if (self.parent_url and + self.parent_url.lower().startswith(HTTP_SCHEMAS)): + clientheaders["Referer"] = self.parent_url + kwargs = dict( + method='GET', + url=self.url, + headers=clientheaders, + ) + if self.auth: + kwargs['auth'] = self.auth + log.debug(LOG_CHECK, "Prepare request with %s", kwargs) + request = requests.Request(**kwargs) + return self.session.prepare_request(request) + + def send_request(self, request): + """Send request and store response in self.url_connection.""" + # throttle the number of requests to each host + self.aggregate.wait_for_host(self.urlparts[1]) + kwargs = dict( + stream=True, + timeout=self.aggregate.config["timeout"], + allow_redirects=False, + ) + if self.scheme == "https" and self.aggregate.config["sslverify"]: + kwargs["verify"] = self.aggregate.config["sslverify"] + log.debug(LOG_CHECK, "Send request with %s", kwargs) + self.url_connection = self.session.send(request, **kwargs) + self.headers = self.url_connection.headers def construct_auth (self): """Construct HTTP Basic authentication credentials if there @@ -301,162 +155,34 @@ def construct_auth (self): return _user, _password = self.get_user_password() if _user is not None and _password is not None: - credentials = httputil.encode_base64("%s:%s" % (_user, _password)) - self.auth = "Basic " + credentials - log.debug(LOG_CHECK, "Using basic authentication") + self.auth = (_user, _password) def get_content_type (self): """Return content MIME type or empty string.""" - if self.content_type is None: - if self.headers: - self.content_type = headers.get_content_type(self.headers) - else: - self.content_type = u"" + if not self.content_type: + self.content_type = headers.get_content_type(self.headers) return self.content_type - def follow_redirections (self, set_result=True): + def follow_redirections(self, request): """Follow all redirections of http response.""" log.debug(LOG_CHECK, "follow all redirections") - redirected = self.url - tries = 0 - while self.response.status in [301, 302] and self.headers and \ - tries < self.max_redirects: - num = self.follow_redirection(set_result, redirected) - if num == -1: - return num - redirected = urlutil.urlunsplit(self.urlparts) - tries += num - return tries - - def follow_redirection (self, set_result, redirected): - """Follow one redirection of http response.""" - newurl = self.getheader("Location", - self.getheader("Uri", u"")) - # make new url absolute and unicode - newurl = urlparse.urljoin(redirected, unicode_safe(newurl)) - log.debug(LOG_CHECK, "Redirected to %r", newurl) - self.add_info(_("Redirected to `%(url)s'.") % {'url': newurl}) - # norm base url - can raise UnicodeError from url.idna_encode() - redirected, is_idn = urlbase.url_norm(newurl) - log.debug(LOG_CHECK, "Norm redirected to %r", redirected) - urlparts = strformat.url_unicode_split(redirected) - if not self.check_redirection_scheme(redirected, urlparts, set_result): - return -1 - if not self.check_redirection_newscheme(redirected, urlparts, set_result): - return -1 - if not self.check_redirection_domain(redirected, urlparts, - set_result): - return -1 - if not self.check_redirection_robots(redirected, set_result): - return -1 - num = self.check_redirection_recursion(redirected, set_result) - if num != 0: - return num - if set_result: - self.check301status() - self.close_response() - self.close_connection() - # remember redirected url as alias - self.aliases.append(redirected) - if self.anchor: - urlparts[4] = self.anchor - # note: urlparts has to be a list - self.urlparts = urlparts - self.build_url_parts() - # store cookies from redirect response - self.store_cookies() - # new response data - self._try_http_response() - return 1 - - def check_redirection_scheme (self, redirected, urlparts, set_result): - """Return True if redirection scheme is ok, else False.""" - if urlparts[0] in ('ftp', 'http', 'https'): - return True - # For security reasons do not allow redirects to protocols - # other than HTTP, HTTPS or FTP. - if set_result: - self.add_warning( - _("Redirection to url `%(newurl)s' is not allowed.") % - {'newurl': redirected}) - self.set_result(_("syntax OK")) - return False - - def check_redirection_domain (self, redirected, urlparts, set_result): - """Return True if redirection domain is ok, else False.""" - # XXX does not support user:pass@netloc format - if urlparts[1] != self.urlparts[1]: - # URL domain changed - if self.recursion_level == 0 and urlparts[0] in ('http', 'https'): - # Add intern patterns for redirection of URLs given by the - # user for HTTP schemes. - self.add_intern_pattern(url=redirected) - return True - # check extern filter again - self.extern = None - self.set_extern(redirected) - if self.extern[0] and self.extern[1]: - if set_result: - self.check301status() - self.add_info(_("The redirected URL is outside of the domain " - "filter, checked only syntax.")) - self.set_result(_("filtered")) - return False - return True - - def check_redirection_robots (self, redirected, set_result): - """Check robots.txt allowance for redirections. Return True if - allowed, else False.""" - if self.allows_robots(redirected): - return True - if set_result: - self.add_warning( - _("Access to redirected URL denied by robots.txt, " - "checked only syntax."), tag=WARN_HTTP_ROBOTS_DENIED) - self.set_result(_("syntax OK")) - return False - - def check_redirection_recursion (self, redirected, set_result): - """Check for recursive redirect. Return zero if no recursion - detected, max_redirects for recursion with HEAD request, - -1 otherwise.""" - all_seen = [self.cache_url_key] + self.aliases - if redirected not in all_seen: - return 0 - if self.method == "HEAD" and self.method_get_allowed: - # Microsoft servers tend to recurse HEAD requests - # fall back to the original url and use GET - return self.max_redirects - if set_result: - urls = "\n => ".join(all_seen + [redirected]) - self.set_result(_("recursive redirection encountered:\n %(urls)s") % - {"urls": urls}, valid=False) - return -1 - - def check_redirection_newscheme (self, redirected, urlparts, set_result): - """Check for HTTP(S)/FTP redirection. Return True for - redirection with same scheme, else False.""" - if urlparts[0] != self.urlparts[0]: - # changed scheme - newobj = get_url_from( - redirected, self.recursion_level, self.aggregate, - parent_url=self.parent_url, base_ref=self.base_ref, - line=self.line, column=self.column, name=self.name) - if set_result: - self.set_result(_("syntax OK")) - # append new object to queue - self.aggregate.urlqueue.put(newobj) - return False - raise LinkCheckerError(_('Cannot redirect to different scheme without result')) - return True - - def check301status (self): - """If response page has been permanently moved add a warning.""" - if self.response.status == 301 and not self.has301status: - self.add_warning(_("HTTP 301 (moved permanent) encountered: you" - " should update this link."), - tag=WARN_HTTP_MOVED_PERMANENT) - self.has301status = True + kwargs = dict( + stream=True, + ) + response = None + for response in self.session.resolve_redirects(self.url_connection, request, **kwargs): + newurl = response.url + log.debug(LOG_CHECK, "Redirected to %r", newurl) + self.aliases.append(newurl) + self.add_info(_("Redirected to `%(url)s'.") % {'url': newurl}) + urlparts = strformat.url_unicode_split(newurl) + if response is not None: + self.urlparts = urlparts + self.build_url_parts() + self.url_connection = response + self.headers = response.headers + self.url = urlutil.urlunsplit(urlparts) + self.scheme = urlparts[0].lower() def getheader (self, name, default=None): """Get decoded header value. @@ -471,271 +197,29 @@ def getheader (self, name, default=None): def check_response (self): """Check final result and log it.""" - if self.response.status >= 400: - self.set_result(u"%r %s" % (self.response.status, self.response.reason), + if self.url_connection.status_code >= 400: + self.set_result(u"%d %s" % (self.url_connection.status_code, self.url_connection.reason), valid=False) else: - if self.response.status == 204: + if self.url_connection.status_code == 204: # no content - self.add_warning(self.response.reason, + self.add_warning(self.url_connection.reason, tag=WARN_HTTP_EMPTY_CONTENT) - # store cookies for valid links - self.store_cookies() - if self.response.status >= 200: - self.set_result(u"%r %s" % (self.response.status, self.response.reason)) + if self.url_connection.status_code >= 200: + self.set_result(u"%r %s" % (self.url_connection.status_code, self.url_connection.reason)) else: self.set_result(_("OK")) - modified = rfc822.parsedate(self.getheader('Last-Modified', u'')) - if modified: - self.modified = datetime.utcfromtimestamp(time.mktime(modified)) - - def _try_http_response (self): - """Try to get a HTTP response object. For persistent - connections that the server closed unexpected, a new connection - will be opened. - """ - try: - self._get_http_response() - except socket.error as msg: - if msg.args[0] == 32 and self.persistent: - # server closed persistent connection - retry - log.debug(LOG_CHECK, "Server closed connection: retry") - self.persistent = False - self._get_http_response() - else: - raise - except httplib.BadStatusLine as msg: - if self.persistent: - # server closed connection - retry - log.debug(LOG_CHECK, "Empty status line: retry") - self.persistent = False - self._get_http_response() - else: - raise - - def _get_http_response (self): - """Send HTTP request and get response object.""" - scheme, host, port = self.get_netloc() - log.debug(LOG_CHECK, "Connecting to %r", host) - self.get_http_object(scheme, host, port) - self.add_connection_request() - self.add_connection_headers() - self.response = self.url_connection.getresponse(buffering=True) - self.headers = self.response.msg - self.content_type = None - self.persistent = not self.response.will_close - if self.persistent and self.method == "HEAD": - # Some servers send page content after a HEAD request, - # but only after making the *next* request. This breaks - # protocol synchronisation. Workaround here is to close - # the connection after HEAD. - # Example: http://www.empleo.gob.mx (Apache/1.3.33 (Unix) mod_jk) - self.persistent = False - # Note that for POST method the connection should also be closed, - # but this method is never used. - # If possible, use official W3C HTTP response name - if self.response.status in httplib.responses: - self.response.reason = httplib.responses[self.response.status] - if self.response.reason: - self.response.reason = unicode_safe(self.response.reason) - log.debug(LOG_CHECK, "Response: %s %s", self.response.status, self.response.reason) - - def add_connection_request(self): - """Add connection request.""" - # the anchor fragment is not part of a HTTP URL, see - # http://tools.ietf.org/html/rfc2616#section-3.2.2 - anchor = '' - if self.proxy: - path = urlutil.urlunsplit((self.urlparts[0], self.urlparts[1], - self.urlparts[2], self.urlparts[3], anchor)) - else: - path = urlutil.urlunsplit(('', '', self.urlparts[2], - self.urlparts[3], anchor)) - self.url_connection.putrequest(self.method, path, skip_host=True, - skip_accept_encoding=True) - - def add_connection_headers(self): - """Add connection header.""" - # be sure to use the original host as header even for proxies - self.url_connection.putheader("Host", self.urlparts[1]) - if self.auth: - # HTTP authorization - self.url_connection.putheader("Authorization", self.auth) - if self.proxyauth: - self.url_connection.putheader("Proxy-Authorization", - self.proxyauth) - if (self.parent_url and - self.parent_url.lower().startswith(HTTP_SCHEMAS)): - self.url_connection.putheader("Referer", self.parent_url) - self.url_connection.putheader("User-Agent", - self.aggregate.config["useragent"]) - # prefer compressed content - self.url_connection.putheader("Accept-Encoding", ACCEPT_ENCODING) - # prefer UTF-8 encoding - self.url_connection.putheader("Accept-Charset", ACCEPT_CHARSET) - # prefer parseable mime types - self.url_connection.putheader("Accept", ACCEPT) - # send do-not-track header - self.url_connection.putheader("DNT", "1") - if self.aggregate.config['sendcookies']: - self.send_cookies() - self.url_connection.endheaders() - def store_cookies (self): - """Save cookies from response headers.""" - if self.aggregate.config['storecookies']: - for c in self.cookies: - self.add_info(_("Sent Cookie: %(cookie)s.") % - {"cookie": c.client_header_value()}) - errors = self.aggregate.cookies.add(self.headers, - self.urlparts[0], self.urlparts[1], self.urlparts[2]) - if errors: - self.add_warning( - _("Could not store cookies from headers: %(error)s.") % - {'error': "\n".join(errors)}, - tag=WARN_HTTP_COOKIE_STORE_ERROR) - - def send_cookies (self): - """Add cookie headers to request.""" - scheme = self.urlparts[0] - host = self.urlparts[1] - port = urlutil.default_ports.get(scheme, 80) - host, port = urlutil.splitport(host, port) - path = self.urlparts[2] or u"/" - self.cookies = self.aggregate.cookies.get(scheme, host, port, path) - if not self.cookies: - return - # add one cookie header with all cookie data - # this is limited by maximum header length - headername = "Cookie" - headervalue = "" - max_value_len = headers.MAX_HEADER_BYTES - len(headername) - 2 - for c in self.cookies: - cookievalue = c.client_header_value() - if "version" in c.attributes: - # add separate header for explicit versioned cookie - if headervalue: - self.url_connection.putheader(headername, headervalue) - self.url_connection.putheader(headername, cookievalue) - headervalue = "" - continue - if headervalue: - cookievalue = "; " + cookievalue - if (len(headervalue) + len(cookievalue)) < max_value_len: - headervalue += cookievalue - else: - log.debug(LOG_CHECK, "Discard too-long cookie %r", cookievalue) - if headervalue: - log.debug(LOG_CHECK, "Sending cookie header %s:%s", headername, headervalue) - self.url_connection.putheader(headername, headervalue) - - def get_http_object (self, scheme, host, port): - """ - Open a HTTP connection. - - @param host: the host to connect to - @ptype host: string of the form [:] - @param scheme: 'http' or 'https' - @ptype scheme: string - @return: None - """ - self.close_connection() - def create_connection(scheme, host, port): - """Create a new http or https connection.""" - kwargs = dict(port=port, strict=True, timeout=self.aggregate.config["timeout"]) - if scheme == "http": - h = httplib.HTTPConnection(host, **kwargs) - elif scheme == "https" and supportHttps: - devel_dir = os.path.join(configuration.configdata.install_data, "config") - sslverify = self.aggregate.config["sslverify"] - if sslverify: - if sslverify is not True: - kwargs["ca_certs"] = sslverify - else: - kwargs["ca_certs"] = configuration.get_share_file(devel_dir, 'ca-certificates.crt') - h = httplib.HTTPSConnection(host, **kwargs) - else: - msg = _("Unsupported HTTP url scheme `%(scheme)s'") % {"scheme": scheme} - raise LinkCheckerError(msg) - if log.is_debug(LOG_CHECK): - h.set_debuglevel(1) - return h - self.get_pooled_connection(scheme, host, port, create_connection) - self.url_connection.connect() - - def read_content (self): - """Get content of the URL target. The content data is cached after - the first call to this method. - - @return: URL content, decompressed and decoded - @rtype: string - """ - assert self.method_get_allowed, 'unallowed content read' - if self.method != "GET" or self.response is None: - self.method = "GET" - self._try_http_response() - num = self.follow_redirections(set_result=False) - if not (0 <= num <= self.max_redirects): - raise LinkCheckerError(_("Redirection error")) - # Re-read size info, since the GET request result could be different - # than a former HEAD request. - self.add_size_info() - if self.size > self.MaxFilesizeBytes: - raise LinkCheckerError(_("File size too large")) - self.charset = headers.get_charset(self.headers) - return self._read_content() - - def _read_content (self): - """Read URL contents.""" - data = self.response.read(self.MaxFilesizeBytes+1) - if len(data) > self.MaxFilesizeBytes: - raise LinkCheckerError(_("File size too large")) - dlsize = len(data) - self.aggregate.add_download_data(self.cache_content_key, data) - encoding = headers.get_content_encoding(self.headers) - if encoding in SUPPORTED_ENCODINGS: - try: - if encoding == 'deflate': - f = StringIO(zlib.decompress(data)) - else: - f = gzip.GzipFile('', 'rb', 9, StringIO(data)) - except zlib.error as msg: - log.debug(LOG_CHECK, "Error %s data of len %d", encoding, len(data)) - self.add_warning(_("Decompress error %(err)s") % - {"err": str(msg)}, - tag=WARN_HTTP_DECOMPRESS_ERROR) - f = StringIO(data) - try: - data = f.read() - finally: - f.close() - return data, dlsize - - def encoding_supported (self): - """Check if page encoding is supported.""" - encoding = headers.get_content_encoding(self.headers) - if encoding and encoding not in SUPPORTED_ENCODINGS and \ - encoding != 'identity': - self.add_warning(_("Unsupported content encoding `%(encoding)s'.") % - {"encoding": encoding}, - tag=WARN_HTTP_UNSUPPORTED_ENCODING) - return False - return True - - def can_get_content(self): - """Check if it's allowed to read content.""" - return self.method_get_allowed - - def content_allows_robots (self): - """Check if it's allowed to read content before execution.""" - if not self.method_get_allowed: - return False - return super(HttpUrl, self).content_allows_robots() - - def check_warningregex (self): - """Check if it's allowed to read content before execution.""" - if self.method_get_allowed: - super(HttpUrl, self).check_warningregex() + def read_content(self): + """Return data and data size for this URL. + Can be overridden in subclasses.""" + maxbytes = self.aggregate.config["maxfilesizedownload"] + buf = StringIO() + for data in self.url_connection.iter_content(chunk_size=self.ReadChunkBytes): + if buf.tell() + len(data) > maxbytes: + raise LinkCheckerError(_("File size too large")) + buf.write(data) + return buf.getvalue() def is_html (self): """ @@ -748,22 +232,14 @@ def is_html (self): if not self.valid: return False mime = self.get_content_type() - if self.ContentMimetypes.get(mime) != "html": - return False - if self.headers: - return self.encoding_supported() - return True + return self.ContentMimetypes.get(mime) == "html" def is_css (self): """Return True iff content of this url is CSS stylesheet.""" if not self.valid: return False mime = self.get_content_type() - if self.ContentMimetypes.get(mime) != "css": - return False - if self.headers: - return self.encoding_supported() - return True + return self.ContentMimetypes.get(mime) == "css" def is_http (self): """ @@ -781,30 +257,13 @@ def is_parseable (self): @return: True if content is parseable @rtype: bool """ - if not (self.valid and self.headers): + if not self.valid: return False ctype = self.get_content_type() if ctype not in self.ContentMimetypes: log.debug(LOG_CHECK, "URL with content type %r is not parseable", ctype) return False - return self.encoding_supported() - - def parse_url (self): - """ - Parse file contents for new links to check. - """ - ctype = self.get_content_type() - if self.is_html(): - self.parse_html() - elif self.is_css(): - self.parse_css() - elif ctype == "application/x-shockwave-flash": - self.parse_swf() - elif ctype == "application/msword": - self.parse_word() - elif ctype == "text/vnd.wap.wml": - self.parse_wml() - self.add_num_url_info() + return True def get_robots_txt_url (self): """ @@ -814,28 +273,3 @@ def get_robots_txt_url (self): @rtype: string """ return "%s://%s/robots.txt" % tuple(self.urlparts[0:2]) - - def close_response(self): - """Close the HTTP response object.""" - if self.response is None: - return - self.response.close() - self.response = None - - def close_connection (self): - """Release the connection from the connection pool. Persistent - connections will not be closed. - """ - log.debug(LOG_CHECK, "Closing %s", self.url_connection) - if self.url_connection is None: - # no connection is open - return - # add to cached connections - scheme, host, port = self.get_netloc() - if self.persistent and self.url_connection.is_idle(): - expiration = time.time() + headers.http_keepalive(self.headers) - else: - self.close_response() - expiration = None - self.aggregate.connections.release(scheme, host, port, self.url_connection, expiration=expiration) - self.url_connection = None diff --git a/linkcheck/checker/ignoreurl.py b/linkcheck/checker/ignoreurl.py index 4a345dd24..6ac2630b1 100644 --- a/linkcheck/checker/ignoreurl.py +++ b/linkcheck/checker/ignoreurl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/internpaturl.py b/linkcheck/checker/internpaturl.py index 504ea872e..ab4a6ae0c 100644 --- a/linkcheck/checker/internpaturl.py +++ b/linkcheck/checker/internpaturl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2012 Bastian Kleineidam +# Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/mailtourl.py b/linkcheck/checker/mailtourl.py index ea070e016..5abaa22c2 100644 --- a/linkcheck/checker/mailtourl.py +++ b/linkcheck/checker/mailtourl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -21,16 +21,13 @@ import re import urllib import urlparse -import smtplib -import socket from email._parseaddr import AddressList from . import urlbase from .. import log, LOG_CHECK, strformat, url as urlutil from dns import resolver from ..network import iputil -from .const import WARN_MAIL_NO_MX_HOST, \ - WARN_MAIL_UNVERIFIED_ADDRESS, WARN_MAIL_NO_CONNECTION +from .const import WARN_MAIL_NO_MX_HOST def getaddresses (addr): @@ -287,78 +284,9 @@ def check_smtp_domain (self, mail): # debug output log.debug(LOG_CHECK, "found %d MX mailhosts:", len(answers)) for preference, host in mxdata: - log.debug(LOG_CHECK, - "MX host %r, preference %d", host, preference) - # connect - self.check_smtp_connect(mxdata, username, domain) - - def check_smtp_connect (self, mxdata, username, domain): - """ - Connect to SMTP servers and check emails. - - @param mxdata: list of (preference, host) tuples to check for - @type mxdata: list - @param username: the username to verify - @type username: string - """ - smtpconnect = 0 - for preference, host in mxdata: - try: - log.debug(LOG_CHECK, - "SMTP check for %r (preference %d)", host, preference) - self.url_connection = smtplib.SMTP(timeout=self.aggregate.config["timeout"]) - if log.is_debug(LOG_CHECK): - self.url_connection.set_debuglevel(1) - self.url_connection.connect(host) - log.debug(LOG_CHECK, "SMTP connected!") - smtpconnect = 1 - self.url_connection.helo() - mailaddress = "%s@%s" % (username, domain) - status, info = self.url_connection.verify(mailaddress) - log.debug(LOG_CHECK, "SMTP info %d %r", status, info) - d = { - 'info': "%d %s" % (status, str(info)), - 'mail': mailaddress, - } - if status == 250: - self.add_info(_("Verified address %(mail)s: %(info)s.") % d) - # check for 25x status code which means that the address - # could not be verified, but is sent anyway - elif 250 < status < 260: - self.add_info(_("Unverified but presumably valid" - " address %(mail)s: %(info)s.") % d) - else: - self.add_warning(_("Unverified address: %(info)s.") % d, - tag=WARN_MAIL_UNVERIFIED_ADDRESS) - except smtplib.SMTPException as msg: - self.add_warning( - _("MX mail host %(host)s did not accept connections: " - "%(error)s.") % {'host': host, 'error': str(msg)}, - tag=WARN_MAIL_NO_CONNECTION) - if smtpconnect: - break - if not smtpconnect: - self.set_result(_("Could not connect, but syntax is correct"), - overwrite=True) - else: - self.set_result(_("Found MX mail host %(host)s") % {'host': host}, - overwrite=True) - - def close_connection (self): - """ - Close a possibly opened SMTP connection. - """ - if self.url_connection is None: - # no connection is open - return - connection = self.url_connection - self.url_connection = None - try: - connection.quit() - except (smtplib.SMTPException, socket.error): - # ignore close errors - # socket.error is raised for example on timeouts + log.debug(LOG_CHECK, "MX host %r, preference %d", host, preference) pass + self.set_result(_("Valid mail address syntax")) def set_cache_keys (self): """ diff --git a/linkcheck/checker/nntpurl.py b/linkcheck/checker/nntpurl.py index c415dbfa3..f1d64dcd6 100644 --- a/linkcheck/checker/nntpurl.py +++ b/linkcheck/checker/nntpurl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/pooledconnection.py b/linkcheck/checker/pooledconnection.py deleted file mode 100644 index 21ad1a0ff..000000000 --- a/linkcheck/checker/pooledconnection.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2012 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Mixin class for URLs that pool connections. -""" - - -class PooledConnection (object): - """Support for connection pooling.""" - - def get_pooled_connection(self, scheme, host, port, create_connection): - """Get a connection from the connection pool.""" - get_connection = self.aggregate.connections.get - while True: - connection = get_connection(scheme, host, port, create_connection) - if hasattr(connection, 'acquire'): - # It's a connection lock object. - # This little trick avoids polling: wait for another - # connection to be released by acquiring the lock. - connection.acquire() - # The lock is immediately released since the calling - # connections.get() acquires it again. - connection.release() - else: - self.url_connection = connection - break diff --git a/linkcheck/checker/proxysupport.py b/linkcheck/checker/proxysupport.py index 80cefe035..b210d0f45 100644 --- a/linkcheck/checker/proxysupport.py +++ b/linkcheck/checker/proxysupport.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/telneturl.py b/linkcheck/checker/telneturl.py index 0c6a004f4..b7ce7483f 100644 --- a/linkcheck/checker/telneturl.py +++ b/linkcheck/checker/telneturl.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/checker/unknownurl.py b/linkcheck/checker/unknownurl.py index a4c2a71c1..f3a36f737 100644 --- a/linkcheck/checker/unknownurl.py +++ b/linkcheck/checker/unknownurl.py @@ -20,7 +20,6 @@ import re from . import urlbase -from .const import WARN_IGNORE_URL # from http://www.iana.org/assignments/uri-schemes.html ignored_schemes_permanent = r""" @@ -124,7 +123,7 @@ """ -ignored_schemes = "^(%s%s%s%s):" % ( +ignored_schemes = "^(%s%s%s%s)$" % ( ignored_schemes_permanent, ignored_schemes_provisional, ignored_schemes_historical, @@ -132,7 +131,7 @@ ) ignored_schemes_re = re.compile(ignored_schemes, re.VERBOSE) -is_unknown_url = ignored_schemes_re.search +is_unknown_scheme = ignored_schemes_re.match class UnknownUrl (urlbase.UrlBase): @@ -140,19 +139,16 @@ class UnknownUrl (urlbase.UrlBase): def local_check (self): """Only logs that this URL is unknown.""" - if self.extern[0] and self.extern[1]: - self.add_info(_("Outside of domain filter, checked only syntax.")) - elif self.ignored(): - self.add_warning(_("%(scheme)s URL ignored.") % - {"scheme": self.scheme.capitalize()}, - tag=WARN_IGNORE_URL) + if self.ignored(): + self.add_info(_("%(scheme)s URL ignored.") % + {"scheme": self.scheme.capitalize()}) else: self.set_result(_("URL is unrecognized or has invalid syntax"), valid=False) def ignored (self): """Return True if this URL scheme is ignored.""" - return ignored_schemes_re.search(self.url) + return is_unknown_scheme(self.scheme) def can_get_content (self): """Unknown URLs have no content. diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 76edb027c..b48168917 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -26,21 +26,19 @@ import errno import socket import select +from cStringIO import StringIO from . import absolute_url, get_url_from -from .. import (log, LOG_CHECK, LOG_CACHE, httputil, httplib2 as httplib, - strformat, LinkCheckerError, url as urlutil, trace, clamav, winutil, geoip, - fileutil, get_link_pat) +from .. import (log, LOG_CHECK, LOG_CACHE, + strformat, LinkCheckerError, url as urlutil, trace, get_link_pat, parser) from ..HtmlParser import htmlsax from ..htmlutil import linkparse from ..network import iputil from .const import (WARN_URL_EFFECTIVE_URL, WARN_URL_ERROR_GETTING_CONTENT, WARN_URL_OBFUSCATED_IP, - WARN_URL_ANCHOR_NOT_FOUND, WARN_URL_WARNREGEX_FOUND, - WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO, - WARN_URL_CONTENT_SIZE_UNEQUAL, WARN_URL_WHITESPACE, + WARN_URL_CONTENT_SIZE_ZERO, WARN_URL_CONTENT_SIZE_TOO_LARGE, + WARN_URL_WHITESPACE, WARN_URL_TOO_LONG, URL_MAX_LENGTH, URL_WARN_LENGTH, - WARN_SYNTAX_HTML, WARN_SYNTAX_CSS, ExcList, ExcSyntaxList, ExcNoCacheList) # helper alias @@ -71,17 +69,6 @@ def url_norm (url, encoding=None): raise LinkCheckerError(msg) -def getXmlText (parent, tag): - """Return XML content of given tag in parent element.""" - elem = parent.getElementsByTagName(tag)[0] - # Yes, the DOM standard is awful. - rc = [] - for node in elem.childNodes: - if node.nodeType == node.TEXT_NODE: - rc.append(node.data) - return ''.join(rc) - - class UrlBase (object): """An URL with additional information like validity etc.""" @@ -103,8 +90,8 @@ class UrlBase (object): "text/vnd.wap.wml": "wml", } - # Set maximum file size for downloaded files in bytes. - MaxFilesizeBytes = 1024*1024*5 + # Read in 16kb chunks + ReadChunkBytes = 1024*16 def __init__ (self, base_url, recursion_level, aggregate, parent_url=None, base_ref=None, line=-1, column=-1, @@ -173,8 +160,6 @@ def reset (self): self.urlparts = None # the scheme, host, port and anchor part of url self.scheme = self.host = self.port = self.anchor = None - # list of parsed anchors - self.anchors = [] # the result message string and flag self.result = u"" self.has_result = False @@ -190,8 +175,6 @@ def reset (self): self.modified = None # download time self.dltime = -1 - # download size - self.dlsize = -1 # check time self.checktime = 0 # connection object @@ -211,8 +194,6 @@ def reset (self): self.do_check_content = True # MIME content type self.content_type = None - # number of URLs in page content - self.num_urls = 0 def set_result (self, msg, valid=True, overwrite=False): """ @@ -229,6 +210,8 @@ def set_result (self, msg, valid=True, overwrite=False): log.warn(LOG_CHECK, "Empty result for %s", self) self.result = msg self.valid = valid + # free content data + self.data = None def get_title (self): """Return title of page the URL refers to. @@ -246,30 +229,6 @@ def get_title (self): self.title = title return self.title - def set_title_from_content (self): - """Set title of page the URL refers to.from page content.""" - if not self.valid: - return - try: - handler = linkparse.TitleFinder() - except tuple(ExcList): - return - parser = htmlsax.parser(handler) - handler.parser = parser - if self.charset: - parser.encoding = self.charset - # parse - try: - parser.feed(self.get_content()) - parser.flush() - except linkparse.StopParse as msg: - log.debug(LOG_CHECK, "Stopped parsing: %s", msg) - # break cyclic dependencies - handler.parser = None - parser.handler = None - if handler.title: - self.title = handler.title - def is_parseable (self): """ Return True iff content of this url is parseable. @@ -287,15 +246,15 @@ def is_css (self): return False def is_http (self): - """ - Return True for http:// URLs. - """ + """Return True for http:// URLs.""" return False def is_file (self): - """ - Return True for file:// URLs. - """ + """Return True for file:// URLs.""" + return False + + def is_directory(self): + """Return True if current URL represents a directory.""" return False def is_local(self): @@ -318,45 +277,6 @@ def add_info (self, s): if s not in self.info: self.info.append(s) - def copy_from_cache (self, cache_data): - """ - Fill attributes from cache data. - """ - self.url = cache_data["url"] - self.result = cache_data["result"] - self.has_result = True - anchor_changed = (self.anchor != cache_data["anchor"]) - for tag, msg in cache_data["warnings"]: - # do not copy anchor warnings, since the current anchor - # might have changed - if anchor_changed and tag == WARN_URL_ANCHOR_NOT_FOUND: - continue - self.add_warning(msg, tag=tag) - for info in cache_data["info"]: - self.add_info(info) - self.valid = cache_data["valid"] - self.dltime = cache_data["dltime"] - self.dlsize = cache_data["dlsize"] - self.anchors = cache_data["anchors"] - self.content_type = cache_data["content_type"] - if anchor_changed and self.valid: - # recheck anchor - self.check_anchor() - - def get_cache_data (self): - """Return all data values that should be put in the cache.""" - return {"url": self.url, - "result": self.result, - "warnings": self.warnings, - "info": self.info, - "valid": self.valid, - "dltime": self.dltime, - "dlsize": self.dlsize, - "anchors": self.anchors, - "anchor": self.anchor, - "content_type": self.get_content_type(), - } - def set_cache_keys (self): """ Set keys for URL checking and content recursion. @@ -367,11 +287,7 @@ def set_cache_keys (self): assert isinstance(self.cache_content_key, unicode), self log.debug(LOG_CACHE, "Content cache key %r", self.cache_content_key) # construct cache key - if self.aggregate.config["anchors"]: - # add anchor to cache key - self.cache_url_key = urlutil.urlunsplit(self.urlparts[:4]+[self.anchor or u""]) - else: - self.cache_url_key = self.cache_content_key + self.cache_url_key = self.cache_content_key assert isinstance(self.cache_url_key, unicode), self log.debug(LOG_CACHE, "URL cache key %r", self.cache_url_key) @@ -442,9 +358,9 @@ def build_url (self): self.url = urlutil.urlunsplit(urlparts) # split into (modifiable) list self.urlparts = strformat.url_unicode_split(self.url) + self.build_url_parts() # and unsplit again self.url = urlutil.urlunsplit(self.urlparts) - self.build_url_parts() def build_url_parts (self): """Set userinfo, host, port and anchor from self.urlparts. @@ -452,22 +368,28 @@ def build_url_parts (self): """ # check userinfo@host:port syntax self.userinfo, host = urllib.splituser(self.urlparts[1]) - # set host lowercase - if self.userinfo: - self.urlparts[1] = "%s@%s" % (self.userinfo, host.lower()) - else: - self.urlparts[1] = host.lower() - # safe anchor for later checking - self.anchor = self.urlparts[4] port = urlutil.default_ports.get(self.scheme, 0) - self.host, self.port = urlutil.splitport(host, port=port) - if self.port is None: + host, port = urlutil.splitport(host, port=port) + if port is None: raise LinkCheckerError(_("URL host %(host)r has invalid port") % {"host": host}) + self.port = port + # set host lowercase + self.host = host.lower() if self.scheme in scheme_requires_host: if not self.host: raise LinkCheckerError(_("URL has empty hostname")) self.check_obfuscated_ip() + if not self.port or self.port == urlutil.default_ports.get(self.scheme): + host = self.host + else: + host = "%s:%d" % (self.host, self.port) + if self.userinfo: + self.urlparts[1] = "%s@%s" % (self.userinfo, host) + else: + self.urlparts[1] = host + # safe anchor for later checking + self.anchor = self.urlparts[4] def check_obfuscated_ip (self): """Warn if host of this URL is obfuscated IP address.""" @@ -476,9 +398,10 @@ def check_obfuscated_ip (self): if iputil.is_obfuscated_ip(self.host): ips = iputil.resolve_host(self.host) if ips: + self.host = ips[0] self.add_warning( _("URL %(url)s has obfuscated IP address %(ip)s") % \ - {"url": self.base_url, "ip": ips.pop()}, + {"url": self.base_url, "ip": ips[0]}, tag=WARN_URL_OBFUSCATED_IP) def check (self): @@ -499,19 +422,6 @@ def check (self): # close/release possible open connection self.close_connection() - def add_country_info (self): - """Try to ask GeoIP database for country info.""" - if self.host: - country = geoip.get_country(self.host) - if country: - self.add_info(_("URL is located in %(country)s.") % - {"country": _(country)}) - - def add_size_info (self): - """Store size of URL content from meta info into self.size. - Must be implemented in subclasses.""" - pass - def local_check (self): """Local check function can be overridden in subclasses.""" log.debug(LOG_CHECK, "Checking %s", self) @@ -524,35 +434,28 @@ def local_check (self): try: self.check_connection() self.add_size_info() - self.add_country_info() + self.aggregate.plugin_manager.run_connection_plugins(self) except tuple(ExcList) as exc: value = self.handle_exception() # make nicer error msg for unknown hosts if isinstance(exc, socket.error) and exc.args[0] == -2: value = _('Hostname not found') - # make nicer error msg for bad status line - elif isinstance(exc, httplib.BadStatusLine): - value = _('Bad HTTP response %(line)r') % {"line": str(value)} elif isinstance(exc, UnicodeError): # idna.encode(host) failed value = _('Bad hostname %(host)r: %(msg)s') % {'host': self.host, 'msg': str(value)} self.set_result(unicode_safe(value), valid=False) - self.checktime = time.time() - check_start if self.do_check_content: # check content and recursion try: - self.check_content() + if self.valid and self.can_get_content(): + self.aggregate.plugin_manager.run_content_plugins(self) if self.allows_recursion(): - self.parse_url() - # check content size - self.check_size() + parser.parse_url(self) except tuple(ExcList): value = self.handle_exception() - # make nicer error msg for bad status line - if isinstance(value, httplib.BadStatusLine): - value = _('Bad HTTP response %(line)r') % {"line": str(value)} self.add_warning(_("could not get content: %(msg)s") % {"msg": str(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT) + self.checktime = time.time() - check_start def close_connection (self): """ @@ -595,6 +498,17 @@ def check_connection (self): """ self.url_connection = urllib2.urlopen(self.url) + def add_size_info (self): + """Set size of URL content (if any).. + Should be overridden in subclasses.""" + maxbytes = self.aggregate.config["maxfilesizedownload"] + if self.size > maxbytes: + self.add_warning( + _("Content size %(size)s is larger than %(maxbytes)s.") % + dict(size=strformat.strsize(self.size), + maxbytes=strformat.strsize(maxbytes)), + tag=WARN_URL_CONTENT_SIZE_TOO_LARGE) + def allows_recursion (self): """ Return True iff we can recurse into the url's content. @@ -617,6 +531,9 @@ def allows_recursion (self): if self.extern[0]: log.debug(LOG_CHECK, "... no, extern.") return False + if self.size > self.aggregate.config["maxfilesizeparse"]: + log.debug(LOG_CHECK, "... no, maximum parse size.") + return False if not self.content_allows_robots(): log.debug(LOG_CHECK, "... no, robots.") return False @@ -628,6 +545,7 @@ def content_allows_robots (self): Return False if the content of this URL forbids robots to search for recursive links. """ + # XXX cleanup if not self.is_html(): return True if not (self.is_http() or self.is_file()): @@ -644,63 +562,12 @@ def content_allows_robots (self): parser.flush() except linkparse.StopParse as msg: log.debug(LOG_CHECK, "Stopped parsing: %s", msg) + pass # break cyclic dependencies handler.parser = None parser.handler = None return handler.follow - def get_anchors (self): - """Store anchors for this URL. Precondition: this URL is - an HTML resource.""" - log.debug(LOG_CHECK, "Getting HTML anchors %s", self) - self.find_links(self.add_anchor, tags=linkparse.AnchorTags) - - def find_links (self, callback, tags=None): - """Parse into content and search for URLs to check. - Found URLs are added to the URL queue. - """ - # construct parser object - handler = linkparse.LinkFinder(callback, tags=tags) - parser = htmlsax.parser(handler) - if self.charset: - parser.encoding = self.charset - handler.parser = parser - # parse - try: - parser.feed(self.get_content()) - parser.flush() - except linkparse.StopParse as msg: - log.debug(LOG_CHECK, "Stopped parsing: %s", msg) - # break cyclic dependencies - handler.parser = None - parser.handler = None - - def add_anchor (self, url, line, column, name, base): - """Add anchor URL.""" - self.anchors.append((url, line, column, name, base)) - - def check_anchor (self): - """If URL is valid, parseable and has an anchor, check it. - A warning is logged and True is returned if the anchor is not found. - """ - if not (self.anchor and self.aggregate.config["anchors"] and - self.valid and self.is_html()): - return - log.debug(LOG_CHECK, "checking anchor %r in %s", self.anchor, self.anchors) - enc = lambda anchor: urlutil.url_quote_part(anchor, encoding=self.encoding) - if any(x for x in self.anchors if enc(x[0]) == self.anchor): - return - if self.anchors: - anchornames = sorted(set(u"`%s'" % x[0] for x in self.anchors)) - anchors = u", ".join(anchornames) - else: - anchors = u"-" - args = {"name": self.anchor, "anchors": anchors} - msg = u"%s %s" % (_("Anchor `%(name)s' not found.") % args, - _("Available anchors: %(anchors)s.") % args) - self.add_warning(msg, tag=WARN_URL_ANCHOR_NOT_FOUND) - return True - def set_extern (self, url): """ Match URL against extern and intern link patterns. If no pattern @@ -728,9 +595,15 @@ def set_extern (self, url): log.debug(LOG_CHECK, "Intern URL %r", url) self.extern = (0, 0) return - log.debug(LOG_CHECK, "Explicit extern URL %r", url) - self.extern = (1, 0) - return + if self.aggregate.config['checkextern']: + self.extern = (1, 0) + else: + self.extern = (1, 1) + if self.extern[0] and self.extern[1]: + self.add_info(_("The URL is outside of the domain " + "filter, checked only syntax.")) + if not self.has_result: + self.set_result(_("filtered")) def get_content_type (self): """Return content MIME type or empty string. @@ -741,188 +614,35 @@ def get_content_type (self): def can_get_content (self): """Indicate wether url get_content() can be called.""" - return True + return self.size <= self.aggregate.config["maxfilesizedownload"] def get_content (self): """Precondition: url_connection is an opened URL.""" if self.data is None: log.debug(LOG_CHECK, "Get content of %r", self.url) t = time.time() - self.data, self.dlsize = self.read_content() + self.data = self.read_content() + self.size = len(self.data) self.dltime = time.time() - t - return self.data - - def read_content (self): - """Return data and data size for this URL. - Can be overridden in subclasses.""" - if self.size > self.MaxFilesizeBytes: - raise LinkCheckerError(_("File size too large")) - data = self.url_connection.read(self.MaxFilesizeBytes+1) - if len(data) > self.MaxFilesizeBytes: - raise LinkCheckerError(_("File size too large")) - if not self.is_local(): - self.aggregate.add_download_data(self.cache_content_key, data) - return data, len(data) - - def check_content (self): - """Check content data for warnings, syntax errors, viruses etc.""" - if not (self.valid and self.can_get_content()): - return - if self.is_html(): - self.set_title_from_content() - if self.aggregate.config["anchors"]: - self.get_anchors() - self.check_anchor() - self.check_warningregex() - # is it an intern URL? - if not self.extern[0]: - # check HTML/CSS syntax - if self.aggregate.config["checkhtml"] and self.is_html(): - self.check_html() - if self.aggregate.config["checkcss"] and self.is_css(): - self.check_css() - # check with clamav - if self.aggregate.config["scanvirus"]: - self.scan_virus() - - def check_warningregex (self): - """Check if content matches a given regular expression.""" - config = self.aggregate.config - warningregex = config["warningregex"] - if not (warningregex and self.valid and self.is_parseable()): - return - log.debug(LOG_CHECK, "checking content for warning regex") - try: - content = self.get_content() - curpos = 0 - curline = 1 - # add warnings for found matches, up to the maximum allowed number - for num, match in enumerate(warningregex.finditer(content)): - # calculate line number for match - curline += content.count('\n', curpos, match.start()) - curpos = match.start() - # add a warning message - msg = _("Found %(match)r at line %(line)d in link contents.") - self.add_warning(msg % - {"match": match.group(), "line": curline}, - tag=WARN_URL_WARNREGEX_FOUND) - # check for maximum number of warnings - if num >= config["warningregex_max"]: - break - except tuple(ExcList): - value = self.handle_exception() - self.set_result(unicode_safe(value), valid=False) - - def check_size (self): - """Check content size if it is zero or larger than a given - maximum size. - """ - if self.dlsize == 0: - self.add_warning(_("Content size is zero."), + if self.size == 0: + self.add_warning(_("Content size is zero."), tag=WARN_URL_CONTENT_SIZE_ZERO) - else: - maxbytes = self.aggregate.config["warnsizebytes"] - if maxbytes is not None and self.dlsize >= maxbytes: - self.add_warning( - _("Content size %(dlsize)s is larger than %(maxbytes)s.") % - {"dlsize": strformat.strsize(self.dlsize), - "maxbytes": strformat.strsize(maxbytes)}, - tag=WARN_URL_CONTENT_SIZE_TOO_LARGE) - if self.size != -1 and self.dlsize != -1 and self.dlsize != self.size: - self.add_warning(_("Download size (%(dlsize)d Byte) " - "does not equal content size (%(size)d Byte).") % - {"dlsize": self.dlsize, - "size": self.size}, - tag=WARN_URL_CONTENT_SIZE_UNEQUAL) - - def check_w3_errors (self, xml, w3type): - """Add warnings for W3C HTML or CSS errors in xml format. - w3type is either "W3C HTML" or "W3C CSS".""" - from xml.dom.minidom import parseString - dom = parseString(xml) - for error in dom.getElementsByTagName('m:error'): - warnmsg = _("%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s") - attrs = { - "w3type": w3type, - "line": getXmlText(error, "m:line"), - "column": getXmlText(error, "m:col"), - "msg": getXmlText(error, "m:message"), - } - tag = WARN_SYNTAX_HTML if w3type == "W3C HTML" else WARN_SYNTAX_CSS - self.add_warning(warnmsg % attrs, tag=tag) - - def check_html (self): - """Check HTML syntax of this page (which is supposed to be HTML) - with the online W3C HTML validator documented at - http://validator.w3.org/docs/api.html - """ - self.aggregate.check_w3_time() - try: - body = {'fragment': self.get_content(), 'output': 'soap12'} - data = urllib.urlencode(body) - u = urllib2.urlopen('http://validator.w3.org/check', data) - if u.headers.get('x-w3c-validator-status', 'Invalid') == 'Valid': - self.add_info(u"W3C Validator: %s" % _("valid HTML syntax")) - return - self.check_w3_errors(u.read(), "W3C HTML") - except Exception: - # catch _all_ exceptions since we dont want third party module - # errors to propagate into this library - err = str(sys.exc_info()[1]) - log.warn(LOG_CHECK, - _("HTML W3C validation caused error: %(msg)s ") % - {"msg": err}) + return self.data - def check_css (self): - """Check CSS syntax of this page (which is supposed to be CSS) - with the online W3C CSS validator documented at - http://jigsaw.w3.org/css-validator/manual.html#expert - """ - self.aggregate.check_w3_time() - try: - host = 'jigsaw.w3.org' - path = '/css-validator/validator' - params = { - 'text': "div {}", - 'warning': '2', - 'output': 'soap12', - } - fields = params.items() - content_type, body = httputil.encode_multipart_formdata(fields) - h = httplib.HTTPConnection(host) - h.putrequest('POST', path) - h.putheader('Content-Type', content_type) - h.putheader('Content-Length', str(len(body))) - h.endheaders() - h.send(body) - r = h.getresponse(True) - if r.getheader('X-W3C-Validator-Status', 'Invalid') == 'Valid': - self.add_info(u"W3C Validator: %s" % _("valid CSS syntax")) - return - self.check_w3_errors(r.read(), "W3C HTML") - except Exception: - # catch _all_ exceptions since we dont want third party module - # errors to propagate into this library - err = str(sys.exc_info()[1]) - log.warn(LOG_CHECK, - _("CSS W3C validation caused error: %(msg)s ") % - {"msg": err}) - - def scan_virus (self): - """Scan content for viruses.""" - infected, errors = clamav.scan(self.get_content()) - for msg in infected: - self.add_warning(u"Virus scan infection: %s" % msg) - for msg in errors: - self.add_warning(u"Virus scan error: %s" % msg) - - def parse_url (self): - """ - Parse url content and search for recursive links. - Default parse type is html. - """ - self.parse_html() - self.add_num_url_info() + def read_content(self): + """Return data for this URL. Can be overridden in subclasses.""" + buf = StringIO() + data = self.read_content_chunk() + while data: + if buf.tell() + len(data) > self.aggregate.config["maxfilesizedownload"]: + raise LinkCheckerError(_("File size too large")) + buf.write(data) + data = self.read_content_chunk() + return buf.getvalue() + + def read_content_chunk(self): + """Read one chunk of content from this URL.""" + return self.url_connection.read(self.ReadChunkBytes) def get_user_password (self): """Get tuple (user, password) from configured authentication. @@ -933,16 +653,8 @@ def get_user_password (self): return urllib.splitpasswd(self.userinfo) return self.aggregate.config.get_user_password(self.url) - def parse_html (self): - """Parse into HTML content and search for URLs to check. - Found URLs are added to the URL queue. - """ - log.debug(LOG_CHECK, "Parsing HTML %s", self) - self.find_links(self.add_url) - def add_url (self, url, line=0, column=0, name=u"", base=None): """Queue URL data for checking.""" - self.num_urls += 1 if base: base_ref = urlutil.url_norm(base)[0] else: @@ -954,108 +666,6 @@ def add_url (self, url, line=0, column=0, name=u"", base=None): # Only queue URLs which have a result or are not strict extern. self.aggregate.urlqueue.put(url_data) - def add_num_url_info(self): - """Add number of URLs parsed to info.""" - if self.num_urls > 0: - attrs = {"num": self.num_urls} - msg = _n("%(num)d URL parsed.", "%(num)d URLs parsed.", self.num_urls) - self.add_info(msg % attrs) - - def parse_opera (self): - """Parse an opera bookmark file.""" - log.debug(LOG_CHECK, "Parsing Opera bookmarks %s", self) - from ..bookmarks.opera import parse_bookmark_data - for url, name, lineno in parse_bookmark_data(self.get_content()): - self.add_url(url, line=lineno, name=name) - - def parse_chromium (self): - """Parse a Chromium or Google Chrome bookmark file.""" - log.debug(LOG_CHECK, "Parsing Chromium bookmarks %s", self) - from ..bookmarks.chromium import parse_bookmark_data - for url, name in parse_bookmark_data(self.get_content()): - self.add_url(url, name=name) - - def parse_safari (self): - """Parse a Safari bookmark file.""" - log.debug(LOG_CHECK, "Parsing Safari bookmarks %s", self) - from ..bookmarks.safari import parse_bookmark_data - for url, name in parse_bookmark_data(self.get_content()): - self.add_url(url, name=name) - - def parse_text (self): - """Parse a text file with one url per line; comment and blank - lines are ignored.""" - log.debug(LOG_CHECK, "Parsing text %s", self) - lineno = 0 - for line in self.get_content().splitlines(): - lineno += 1 - line = line.strip() - if not line or line.startswith('#'): - continue - self.add_url(line, line=lineno) - - def parse_css (self): - """ - Parse a CSS file for url() patterns. - """ - log.debug(LOG_CHECK, "Parsing CSS %s", self) - lineno = 0 - linkfinder = linkparse.css_url_re.finditer - strip_comments = linkparse.strip_c_comments - for line in strip_comments(self.get_content()).splitlines(): - lineno += 1 - for mo in linkfinder(line): - column = mo.start("url") - url = strformat.unquote(mo.group("url").strip()) - self.add_url(url, line=lineno, column=column) - - def parse_swf (self): - """Parse a SWF file for URLs.""" - linkfinder = linkparse.swf_url_re.finditer - for mo in linkfinder(self.get_content()): - url = mo.group() - self.add_url(url) - - def parse_word (self): - """Parse a word file for hyperlinks.""" - if not winutil.has_word(): - return - filename = self.get_temp_filename() - # open word file and parse hyperlinks - try: - app = winutil.get_word_app() - try: - doc = winutil.open_wordfile(app, filename) - if doc is None: - raise winutil.Error("could not open word file %r" % filename) - try: - for link in doc.Hyperlinks: - self.add_url(link.Address, name=link.TextToDisplay) - finally: - winutil.close_wordfile(doc) - finally: - winutil.close_word_app(app) - except winutil.Error, msg: - log.warn(LOG_CHECK, "Error parsing word file: %s", msg) - - def parse_wml (self): - """Parse into WML content and search for URLs to check. - Found URLs are added to the URL queue. - """ - log.debug(LOG_CHECK, "Parsing WML %s", self) - self.find_links(self.add_url, tags=linkparse.WmlTags) - - def get_temp_filename (self): - """Get temporary filename for content to parse.""" - # store content in temporary file - fd, filename = fileutil.get_temp_file(mode='wb', suffix='.doc', - prefix='lc_') - try: - fd.write(self.get_content()) - finally: - fd.close() - return filename - def serialized (self, sep=os.linesep): """ Return serialized url check data as unicode string. @@ -1103,7 +713,7 @@ def add_intern_pattern(self, url=None): if pat: log.debug(LOG_CHECK, "Add intern pattern %r", pat) self.aggregate.config['internlinks'].append(get_link_pat(pat)) - except UnicodeError, msg: + except UnicodeError as msg: res = _("URL has unparsable domain name: %(domain)s") % \ {"domain": msg} self.set_result(res, valid=False) @@ -1151,7 +761,7 @@ def to_wire_dict (self): Number of seconds needed to check this link, default: zero. - url_data.dltime: int Number of seconds needed to download URL content, default: -1 - - url_data.dlsize: int + - url_data.size: int Size of downloaded URL content, default: -1 - url_data.info: list of unicode Additional information about this URL. @@ -1181,7 +791,7 @@ def to_wire_dict (self): domain=(self.urlparts[1] if self.urlparts else u""), checktime=self.checktime, dltime=self.dltime, - dlsize=self.dlsize, + size=self.size, info=self.info, line=self.line, column=self.column, @@ -1211,7 +821,7 @@ def to_wire (self): 'domain', 'checktime', 'dltime', - 'dlsize', + 'size', 'info', 'modified', 'line', diff --git a/linkcheck/cmdline.py b/linkcheck/cmdline.py index d2f2d748c..f1a7056d4 100644 --- a/linkcheck/cmdline.py +++ b/linkcheck/cmdline.py @@ -20,7 +20,7 @@ from __future__ import print_function import sys import argparse -from . import checker, fileutil, strformat +from . import checker, fileutil, strformat, plugins from .director import console @@ -42,6 +42,19 @@ def print_version(exit_code=0): sys.exit(exit_code) +def print_plugins(folders, exit_code=0): + """Print available plugins and exit.""" + modules = plugins.get_plugin_modules(folders) + pluginclasses = sorted(plugins.get_plugin_classes(modules), key=lambda x: x.__name__) + + for pluginclass in pluginclasses: + print(pluginclass.__name__) + doc = strformat.wrap(pluginclass.__doc__, 80) + print(strformat.indent(doc)) + print() + sys.exit(exit_code) + + def print_usage (msg, exit_code=2): """Print a program msg text to stderr and exit.""" program = sys.argv[0] diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py index 720ed5838..9167fd6bd 100644 --- a/linkcheck/configuration/__init__.py +++ b/linkcheck/configuration/__init__.py @@ -27,7 +27,7 @@ import shutil import socket import _LinkChecker_configdata as configdata -from .. import (log, LOG_CHECK, LOG_ROOT, ansicolor, lognames, clamav, +from .. import (log, LOG_CHECK, LOG_ROOT, ansicolor, lognames, get_config_dir, fileutil, configdict) from . import confparse from ..decorators import memoized @@ -75,6 +75,9 @@ def normpath (path): def get_modules_info (): """Return list of unicode strings with detected module info.""" lines = [] + # requests + import requests + lines.append(u"Requests: %s" % requests.__version__) # PyQt try: from PyQt4 import QtCore @@ -129,53 +132,48 @@ def __init__ (self): Initialize the default options. """ super(Configuration, self).__init__() - self['trace'] = False - self["verbose"] = False - self["complete"] = False - self["warnings"] = True - self["ignorewarnings"] = [] - self['quiet'] = False - self["anchors"] = False - self["externlinks"] = [] - self["internlinks"] = [] - # on ftp, password is set by Pythons ftplib + ## checking options + self["allowedschemes"] = [] + self['cookiefile'] = None + self["debugmemory"] = False + self["localwebroot"] = None + self["maxfilesizeparse"] = 1*1024*1024 + self["maxfilesizedownload"] = 5*1024*1024 + self["maxnumurls"] = None + self["maxrunseconds"] = None + self["maxrequestspersecond"] = 10 + self["nntpserver"] = os.environ.get("NNTP_SERVER", None) + self["proxy"] = urllib.getproxies() + self["sslverify"] = True + self["threads"] = 100 + self["timeout"] = 60 + self["aborttimeout"] = 300 + self["recursionlevel"] = -1 + self["useragent"] = UserAgent + ## authentication self["authentication"] = [] self["loginurl"] = None self["loginuserfield"] = "login" self["loginpasswordfield"] = "password" self["loginextrafields"] = {} - self["proxy"] = urllib.getproxies() - self["recursionlevel"] = -1 - self["wait"] = 0 - self['sendcookies'] = False - self['storecookies'] = False - self['cookiefile'] = None - self["status"] = False - self["status_wait_seconds"] = 5 + ## filtering + self["externlinks"] = [] + self["ignorewarnings"] = [] + self["internlinks"] = [] + self["checkextern"] = False + ## plugins + self["pluginfolders"] = get_plugin_folders() + self["enabledplugins"] = [] + ## output + self['trace'] = False + self['quiet'] = False + self["verbose"] = False + self["warnings"] = True self["fileoutput"] = [] self['output'] = 'text' + self["status"] = False + self["status_wait_seconds"] = 5 self['logger'] = None - self["warningregex"] = None - self["warningregex_max"] = 5 - self["warnsizebytes"] = None - self["nntpserver"] = os.environ.get("NNTP_SERVER", None) - self["threads"] = 100 - # socket timeout in seconds - self["timeout"] = 60 - self["checkhtml"] = False - self["checkcss"] = False - self["scanvirus"] = False - self["clamavconf"] = clamav.canonical_clamav_conf() - self["useragent"] = UserAgent - self["debugmemory"] = False - self["localwebroot"] = None - self["sslverify"] = True - self["warnsslcertdaysvalid"] = 14 - self["maxrunseconds"] = None - self["maxnumurls"] = None - self["maxconnectionshttp"] = 10 - self["maxconnectionshttps"] = 10 - self["maxconnectionsftp"] = 2 self.loggers = {} from ..logger import LoggerClasses for c in LoggerClasses: @@ -302,29 +300,15 @@ def get_connectionlimits(self): def sanitize (self): "Make sure the configuration is consistent." - if self["anchors"]: - self.sanitize_anchors() if self['logger'] is None: self.sanitize_logger() - if self['scanvirus']: - self.sanitize_scanvirus() - if self['storecookies'] or self['cookiefile']: - self.sanitize_cookies() if self['loginurl']: self.sanitize_loginurl() self.sanitize_proxies() + self.sanitize_plugins() # set default socket timeout socket.setdefaulttimeout(self['timeout']) - def sanitize_anchors (self): - """Make anchor configuration consistent.""" - if not self["warnings"]: - self["warnings"] = True - from ..checker.const import Warnings - self["ignorewarnings"] = Warnings.keys() - if 'url-anchor-not-found' in self["ignorewarnings"]: - self["ignorewarnings"].remove('url-anchor-not-found') - def sanitize_logger (self): """Make logger configuration consistent.""" if not self['output']: @@ -332,24 +316,6 @@ def sanitize_logger (self): self['output'] = 'text' self['logger'] = self.logger_new(self['output']) - def sanitize_scanvirus (self): - """Ensure clamav is installed for virus checking.""" - try: - clamav.init_clamav_conf(self['clamavconf']) - except clamav.ClamavError: - log.warn(LOG_CHECK, - _("Clamav could not be initialized")) - self['scanvirus'] = False - - def sanitize_cookies (self): - """Make cookie configuration consistent.""" - if not self['sendcookies']: - log.warn(LOG_CHECK, _("activating sendcookies.")) - self['sendcookies'] = True - if not self['storecookies']: - log.warn(LOG_CHECK, _("activating storecookies.")) - self['storecookies'] = True - def sanitize_loginurl (self): """Make login configuration consistent.""" url = self["loginurl"] @@ -377,9 +343,6 @@ def sanitize_loginurl (self): log.warn(LOG_CHECK, _("disabling login URL %(url)s.") % {"url": url}) self["loginurl"] = None - elif not self['storecookies']: - # login URL implies storing and sending cookies - self['storecookies'] = self['sendcookies'] = True def sanitize_proxies (self): """Try to read additional proxy settings which urllib does not @@ -395,6 +358,39 @@ def sanitize_proxies (self): if ftp_proxy: self["proxy"]["ftp"] = ftp_proxy + def sanitize_plugins(self): + """Ensure each plugin is configurable.""" + for plugin in self["enabledplugins"]: + if plugin not in self: + self[plugin] = {} + + +def get_plugin_folders(): + """Get linkchecker plugin folders. Default is ~/.linkchecker/plugins/.""" + folders = [] + defaultfolder = normpath("~/.linkchecker/plugins") + if not os.path.exists(defaultfolder) and not Portable: + try: + make_userdir(defaultfolder) + except StandardError as errmsg: + msg = _("could not create plugin directory %(dirname)r: %(errmsg)r") + args = dict(dirname=defaultfolder, errmsg=errmsg) + log.warn(LOG_CHECK, msg % args) + if os.path.exists(defaultfolder): + folders.append(defaultfolder) + return folders + + +def make_userdir(child): + """Create a child directory.""" + userdir = os.path.dirname(child) + if not os.path.isdir(userdir): + if os.name == 'nt': + # Windows forbids filenames with leading dot unless + # a trailing dot is added. + userdir += "." + os.mkdir(userdir, 0700) + def get_user_config(): """Get the user configuration filename. @@ -413,13 +409,7 @@ def get_user_config(): not Portable: # copy the initial configuration to the user configuration try: - userdir = os.path.dirname(userconf) - if not os.path.isdir(userdir): - if os.name == 'nt': - # Windows forbids filenames with leading dot unless - # a trailing dot is added. - userdir += "." - os.mkdir(userdir, 0700) + make_userdir(userconf) shutil.copy(initialconf, userconf) except StandardError as errmsg: msg = _("could not copy initial configuration file %(src)r to %(dst)r: %(errmsg)r") @@ -445,6 +435,7 @@ def get_gconf_http_proxy (): return "%s:%d" % (host, port) except StandardError as msg: log.debug(LOG_CHECK, "error getting HTTP proxy from gconf: %s", msg) + pass return None @@ -464,6 +455,7 @@ def get_gconf_ftp_proxy (): return "%s:%d" % (host, port) except StandardError as msg: log.debug(LOG_CHECK, "error getting FTP proxy from gconf: %s", msg) + pass return None @@ -478,6 +470,7 @@ def get_kde_http_proxy (): return data.get("http_proxy") except StandardError as msg: log.debug(LOG_CHECK, "error getting HTTP proxy from KDE: %s", msg) + pass def get_kde_ftp_proxy (): @@ -491,6 +484,7 @@ def get_kde_ftp_proxy (): return data.get("ftp_proxy") except StandardError as msg: log.debug(LOG_CHECK, "error getting FTP proxy from KDE: %s", msg) + pass # The following KDE functions are largely ported and ajusted from # Google Chromium: diff --git a/linkcheck/configuration/confparse.py b/linkcheck/configuration/confparse.py index 7ba08bc8d..70ad3f740 100644 --- a/linkcheck/configuration/confparse.py +++ b/linkcheck/configuration/confparse.py @@ -17,9 +17,8 @@ """Parse configuration files""" import ConfigParser -import re import os -from .. import LinkCheckerError, get_link_pat, LOG_CHECK, log, fileutil +from .. import LinkCheckerError, get_link_pat, LOG_CHECK, log, fileutil, plugins def read_multiline (value): @@ -53,16 +52,17 @@ def read (self, files): failed_files = set(files) - set(self.read_ok) log.warn(LOG_CHECK, "Could not read configuration files %s.", failed_files) # Read all the configuration parameters from the given files. - self.read_output_config() self.read_checking_config() self.read_authentication_config() self.read_filtering_config() + self.read_output_config() + self.read_plugin_config() except Exception as msg: raise LinkCheckerError( _("Error parsing configuration: %s") % unicode(msg)) def read_string_option (self, section, option, allowempty=False): - """Read a sring option.""" + """Read a string option.""" if self.has_option(section, option): value = self.get(section, option) if not allowempty and not value: @@ -106,11 +106,6 @@ def read_output_config (self): if self.getboolean(section, "verbose"): self.config["verbose"] = True self.config["warnings"] = True - if self.has_option(section, "complete"): - if self.getboolean(section, "complete"): - self.config["complete"] = True - self.config["verbose"] = True - self.config["warnings"] = True if self.has_option(section, "quiet"): if self.getboolean(section, "quiet"): self.config['output'] = 'none' @@ -141,37 +136,24 @@ def read_checking_config (self): self.read_int_option(section, "threads", min=-1) self.config['threads'] = max(0, self.config['threads']) self.read_int_option(section, "timeout", min=1) - self.read_boolean_option(section, "anchors") + self.read_int_option(section, "aborttimeout", min=1) self.read_int_option(section, "recursionlevel", min=-1) - if self.has_option(section, "warningregex"): - val = self.get(section, "warningregex") - if val: - self.config["warningregex"] = re.compile(val) - self.read_int_option(section, "warnsizebytes", min=1) self.read_string_option(section, "nntpserver") self.read_string_option(section, "useragent") - self.read_int_option(section, "pause", key="wait", min=0) - for name in ("http", "https", "ftp"): - self.read_int_option(section, "maxconnections%s" % name, min=1) - self.read_check_options(section) - - def read_check_options (self, section): - """Read check* options.""" - self.read_boolean_option(section, "checkhtml") - self.read_boolean_option(section, "checkcss") - self.read_boolean_option(section, "scanvirus") - self.read_boolean_option(section, "clamavconf") + self.read_int_option(section, "maxrequestspersecond", min=1) + self.read_int_option(section, "maxnumurls", min=0) + self.read_int_option(section, "maxfilesizeparse", min=1) + self.read_int_option(section, "maxfilesizedownload", min=1) + if self.has_option(section, "allowedschemes"): + self.config['allowedschemes'] = [x.strip().lower() for x in \ + self.get(section, 'allowedschemes').split(',')] self.read_boolean_option(section, "debugmemory") - if self.has_option(section, "cookies"): - self.config["sendcookies"] = self.config["storecookies"] = \ - self.getboolean(section, "cookies") self.read_string_option(section, "cookiefile") self.read_string_option(section, "localwebroot") try: self.read_boolean_option(section, "sslverify") except ValueError: self.read_string_option(section, "sslverify") - self.read_int_option(section, "warnsslcertdaysvalid", min=1) self.read_int_option(section, "maxrunseconds", min=0) def read_authentication_config (self): @@ -198,7 +180,6 @@ def read_authentication_config (self): raise LinkCheckerError(_("invalid login URL `%s'. Only " \ "HTTP and HTTPS URLs are supported.") % val) self.config["loginurl"] = val - self.config["storecookies"] = self.config["sendcookies"] = True self.read_string_option(section, "loginuserfield") self.read_string_option(section, "loginpasswordfield") # read login extra fields @@ -231,7 +212,7 @@ def read_filtering_config (self): """ section = "filtering" if self.has_option(section, "ignorewarnings"): - self.config['ignorewarnings'] = [f.strip() for f in \ + self.config['ignorewarnings'] = [f.strip().lower() for f in \ self.get(section, 'ignorewarnings').split(',')] if self.has_option(section, "ignore"): for line in read_multiline(self.get(section, "ignore")): @@ -244,3 +225,14 @@ def read_filtering_config (self): if self.has_option(section, "internlinks"): pat = get_link_pat(self.get(section, "internlinks")) self.config["internlinks"].append(pat) + self.read_boolean_option(section, "checkextern") + + def read_plugin_config(self): + """Read plugin-specific configuration values.""" + folders = self.config["pluginfolders"] + modules = plugins.get_plugin_modules(folders) + for pluginclass in plugins.get_plugin_classes(modules): + section = pluginclass.__name__ + if self.has_section(section): + self.config["enabledplugins"].append(section) + self.config[section] = pluginclass.read_config(self) diff --git a/linkcheck/containers.py b/linkcheck/containers.py index 289c109a3..2037d50cc 100644 --- a/linkcheck/containers.py +++ b/linkcheck/containers.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/cookies.py b/linkcheck/cookies.py index e98de2535..9fab34a6b 100644 --- a/linkcheck/cookies.py +++ b/linkcheck/cookies.py @@ -15,510 +15,13 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -Parsing and storing of cookies. See [1]RFC 2965 and [2]RFC 2109. -The reason for this module is that neither the cookielib nor the Cookie -modules included in the Python standard library provide a usable interface -for programmable cookie handling. -This module provides parsing of cookies for all formats specified by -the above RFCs, plus smart methods handling data conversion and formatting. -And a cookie storage class is provided. - -[1] http://www.faqs.org/rfcs/rfc2965.html -[2] http://www.faqs.org/rfcs/rfc2109.html +Parsing of cookies. """ -import time -import string -import re import cookielib import httplib +import requests from cStringIO import StringIO -from . import strformat - - -_nulljoin = ''.join -_semispacejoin = '; '.join -_spacejoin = ' '.join - -class CookieError (StandardError): - """Thrown for invalid cookie syntax or conflicting/impossible values.""" - pass - -_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:" -_Translator = { - '\000' : '\\000', '\001' : '\\001', '\002' : '\\002', - '\003' : '\\003', '\004' : '\\004', '\005' : '\\005', - '\006' : '\\006', '\007' : '\\007', '\010' : '\\010', - '\011' : '\\011', '\012' : '\\012', '\013' : '\\013', - '\014' : '\\014', '\015' : '\\015', '\016' : '\\016', - '\017' : '\\017', '\020' : '\\020', '\021' : '\\021', - '\022' : '\\022', '\023' : '\\023', '\024' : '\\024', - '\025' : '\\025', '\026' : '\\026', '\027' : '\\027', - '\030' : '\\030', '\031' : '\\031', '\032' : '\\032', - '\033' : '\\033', '\034' : '\\034', '\035' : '\\035', - '\036' : '\\036', '\037' : '\\037', - - # Because of the way browsers really handle cookies (as opposed - # to what the RFC says) we also encode , and ; - - ',' : '\\054', ';' : '\\073', - - '"' : '\\"', '\\' : '\\\\', - - '\177' : '\\177', '\200' : '\\200', '\201' : '\\201', - '\202' : '\\202', '\203' : '\\203', '\204' : '\\204', - '\205' : '\\205', '\206' : '\\206', '\207' : '\\207', - '\210' : '\\210', '\211' : '\\211', '\212' : '\\212', - '\213' : '\\213', '\214' : '\\214', '\215' : '\\215', - '\216' : '\\216', '\217' : '\\217', '\220' : '\\220', - '\221' : '\\221', '\222' : '\\222', '\223' : '\\223', - '\224' : '\\224', '\225' : '\\225', '\226' : '\\226', - '\227' : '\\227', '\230' : '\\230', '\231' : '\\231', - '\232' : '\\232', '\233' : '\\233', '\234' : '\\234', - '\235' : '\\235', '\236' : '\\236', '\237' : '\\237', - '\240' : '\\240', '\241' : '\\241', '\242' : '\\242', - '\243' : '\\243', '\244' : '\\244', '\245' : '\\245', - '\246' : '\\246', '\247' : '\\247', '\250' : '\\250', - '\251' : '\\251', '\252' : '\\252', '\253' : '\\253', - '\254' : '\\254', '\255' : '\\255', '\256' : '\\256', - '\257' : '\\257', '\260' : '\\260', '\261' : '\\261', - '\262' : '\\262', '\263' : '\\263', '\264' : '\\264', - '\265' : '\\265', '\266' : '\\266', '\267' : '\\267', - '\270' : '\\270', '\271' : '\\271', '\272' : '\\272', - '\273' : '\\273', '\274' : '\\274', '\275' : '\\275', - '\276' : '\\276', '\277' : '\\277', '\300' : '\\300', - '\301' : '\\301', '\302' : '\\302', '\303' : '\\303', - '\304' : '\\304', '\305' : '\\305', '\306' : '\\306', - '\307' : '\\307', '\310' : '\\310', '\311' : '\\311', - '\312' : '\\312', '\313' : '\\313', '\314' : '\\314', - '\315' : '\\315', '\316' : '\\316', '\317' : '\\317', - '\320' : '\\320', '\321' : '\\321', '\322' : '\\322', - '\323' : '\\323', '\324' : '\\324', '\325' : '\\325', - '\326' : '\\326', '\327' : '\\327', '\330' : '\\330', - '\331' : '\\331', '\332' : '\\332', '\333' : '\\333', - '\334' : '\\334', '\335' : '\\335', '\336' : '\\336', - '\337' : '\\337', '\340' : '\\340', '\341' : '\\341', - '\342' : '\\342', '\343' : '\\343', '\344' : '\\344', - '\345' : '\\345', '\346' : '\\346', '\347' : '\\347', - '\350' : '\\350', '\351' : '\\351', '\352' : '\\352', - '\353' : '\\353', '\354' : '\\354', '\355' : '\\355', - '\356' : '\\356', '\357' : '\\357', '\360' : '\\360', - '\361' : '\\361', '\362' : '\\362', '\363' : '\\363', - '\364' : '\\364', '\365' : '\\365', '\366' : '\\366', - '\367' : '\\367', '\370' : '\\370', '\371' : '\\371', - '\372' : '\\372', '\373' : '\\373', '\374' : '\\374', - '\375' : '\\375', '\376' : '\\376', '\377' : '\\377' - } - -def quote(str, LegalChars=_LegalChars): - r"""Quote a string for use in a cookie header. - - If the string does not need to be double-quoted, then just return the - string. Otherwise, surround the string in doublequotes and quote - (with a \) special characters. - """ - if all(c in LegalChars for c in str): - return str - else: - return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"' - - -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") - -def unquote(str): - """Remove string quoting.""" - # If there aren't any doublequotes, - # then there can't be any special characters. See RFC 2109. - if len(str) < 2: - return str - if str[0] != '"' or str[-1] != '"': - return str - - # We have to assume that we must decode this string. - # Down to work. - - # Remove the "s - str = str[1:-1] - - # Check for special sequences. Examples: - # \012 --> \n - # \" --> " - # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(str, i) - q_match = _QuotePatt.search(str, i) - if not o_match and not q_match: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) - - -has_embedded_dot = re.compile(r"[a-zA-Z0-9]\.[a-zA-Z]").search - - -# Pattern for finding cookie snatched from Pythons Cookie.py -# Modification: allow whitespace in values. -_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]" -_CookiePattern = re.compile(r""" - (?x) # This is a verbose pattern - (?P # Start of group 'key' - """ + _LegalCharsPatt + r"""+? # Any word of at least one letter - ) # End of group 'key' - ( # Optional group: there may not be a value. - \s*=\s* # Equal Sign - (?P # Start of group 'val' - "(?:[^\\"]|\\.)*" # Any doublequoted string - | # or - \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr - | # or - """ + _LegalCharsPatt + r"""* # Any word or empty string - ) # End of group 'val' - )? # End of optional value group - \s* # Any number of spaces. - (\s+|;|$) # Ending either at space, semicolon, or EOS. - """) - -class HttpCookie (object): - """A cookie consists of one name-value pair with attributes. - Each attribute consists of a predefined name (see attribute_names) - and a value (which is optional for some attributes).""" - - # A mapping from the lowercase variant on the left to the - # appropriate traditional formatting on the right. - attribute_names = { - # Old Netscape attribute - "expires": "expires", - # Defined by RFC 2109 - "path": "Path", - "comment": "Comment", - "domain": "Domain", - "max-age": "Max-Age", - "secure": "secure", - "version": "Version", - # Additional attributes defined by RFC 2965 - "commenturl": "CommentURL", - "discard": "Discard", - "port": "Port", - # httponly to protect against XSS attacks - "httponly": "httponly", - } - - def __init__ (self, name, value, attributes=None): - """Store name, value and attributes. Also calculates expiration - if given in attributes.""" - self.name = name - self.value = value - if attributes is None: - self.attributes = {} - else: - self.attributes = attributes - self.calculate_expiration() - - def calculate_expiration (self): - """If "max-age" or "expires" attributes are given, calculate - the time when this cookie expires. - Stores the time value in self.expires, or None if this cookie - does not expire. - """ - # default: do not expire - self.expire = None - if "max-age" in self.attributes: - now = time.time() - try: - maxage = int(self.attributes["max-age"]) - if maxage == 0: - # Expire immediately: subtract 1 to be sure since - # some clocks have only full second precision. - self.expire = now - 1 - else: - self.expire = now + maxage - except (ValueError, OverflowError): - # note: even self.now + maxage can overflow - pass - elif "expires" in self.attributes: - expiration_date = self.attributes["expires"] - try: - self.expire = cookielib.http2time(expiration_date) - except ValueError: - # see http://bugs.python.org/issue16181 - raise CookieError("Invalid expiration date in %r" % expiration_date) - - def is_expired (self, now=None): - """Return True if this cookie is expired, else False.""" - if self.expire is None: - # Does not expire. - return False - if now is None: - now = time.time() - return now > self.expire - - def __repr__ (self): - """Return cookie name, value and attributes as string.""" - attrs = "; ".join("%s=%r"%(k, v) for k, v in self.attributes.items()) - return "<%s %s=%r; %s>" % (self.__class__.__name__, - self.name, self.value, attrs) - - def is_valid_for (self, scheme, host, port, path): - """Check validity of this cookie against the desired scheme, - host and path.""" - if self.check_expired() and \ - self.check_domain(host) and \ - self.check_port(port) and \ - self.check_path(path) and \ - self.check_secure(scheme): - return True - return False - - def check_expired (self): - """Return False if cookie is expired, else True.""" - return not self.is_expired() - - def check_domain (self, domain): - """Return True if given domain matches this cookie, else False.""" - if "domain" not in self.attributes: - return False - cdomain = self.attributes["domain"] - if domain == cdomain: - # equality matches - return True - if "." not in domain and domain == cdomain[1:]: - # "localhost" and ".localhost" match - return True - if not domain.endswith(cdomain): - # any suffix matches - return False - if "." in domain[:-(len(cdomain)+1)]: - # prefix must be dot-free - return False - return True - - def check_port (self, port): - """Return True if given port matches this cookie, else False. - For now, this returns always True.""" - return True - - def check_path (self, path): - """Return True if given path matches this cookie, else False.""" - if "path" not in self.attributes: - return False - return path.startswith(self.attributes["path"]) - - def check_secure (self, scheme): - """Return True if given Scheme is allowed for this cookie, else - False.""" - if "secure" in self.attributes: - return scheme == "https" - return True - - def set_attribute (self, key, value): - """Helper method to set attribute values. Called when parsing - cookie data. - The attribute key and value are checked, and CookieError is - raised in these cases.""" - if self.attributes is None: - raise CookieError("no NAME=VALUE before attributes found") - key = key.lower() - if key not in self.attribute_names: - raise CookieError("invalid attribute %r" % key) - if value: - value = unquote(value) - else: - value = "" - if key == "domain": - value = value.lower() - if not value.startswith(".") and not has_embedded_dot(value): - if "." in value: - raise CookieError("invalid dot in domain %r" % value) - # supply a leading dot - value = "."+value - if key == "max-age": - try: - if int(value) < 0: - raise ValueError("Negative Max-Age") - except (OverflowError, ValueError): - raise CookieError("invalid Max-Age number: %r" % value) - if key == "port": - ports = value.split(",") - for port in ports: - try: - if not (0 <= int(port) <= 65535): - raise ValueError("Invalid port number") - except (OverflowError, ValueError): - raise CookieError("invalid port number: %r" % port) - self.attributes[key] = value - - def parse (self, text, patt=_CookiePattern): - """Parse cookie data.""" - text = strformat.ascii_safe(text.rstrip('\r\n')) - # reset values - self.name = None - self.value = None - self.attributes = None - # Our starting point - i = 0 - # Length of string - n = len(text) - - while 0 <= i < n: - # Start looking for a key-value pair. - match = patt.search(text, i) - if not match: - # No more key-value pairs. - break - key, value = match.group("key"), match.group("val") - if value is None: - value = "" - i = match.end() - # Parse the key, value in case it's metainfo. - if self.name is None: - # Set name and value. - self.name = key - self.value = unquote(value) - self.attributes = {} - else: - if key.startswith("$"): - key = key[1:] - self.set_attribute(key, value) - if self.name is None: - raise CookieError("missing cookie name in %r" % text) - self.calculate_expiration() - - def set_default_attributes (self, scheme, host, path): - """Set domain and path attributes for given scheme, host and - path.""" - scheme = strformat.ascii_safe(scheme) - host = strformat.ascii_safe(host) - path = strformat.ascii_safe(path) - if "domain" not in self.attributes: - self.attributes["domain"] = host.lower() - if "path" not in self.attributes: - i = path.rfind("/") - if i == -1: - path = "/" - else: - path = path[:i] - if not path: - path = "/" - self.attributes["path"] = path - if not self.check_domain(host): - cdomain = self.attributes["domain"] - raise CookieError("domain %r not for cookie %r" % (cdomain, host)) - if not self.check_path(path): - cpath = self.attributes["path"] - raise CookieError("domain %r not for cookie %r" % (cpath, path)) - if not self.check_secure(scheme): - raise CookieError("no secure scheme %r" % scheme) - - def quote (self, key, value): - """Quote value for given key.""" - return quote(value) - - def server_header_value (self): - """Return HTTP header value to send to server.""" - parts = ["%s=%s" % (self.name, quote(self.value))] - parts.extend(["%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ - for k, v in self.attributes.items()]) - return "; ".join(parts) - - def client_header_value (self): - """Return HTTP header value to send to client.""" - parts = [] - if "version" in self.attributes: - parts.append("$Version=%s" % quote(self.attributes["version"])) - parts.append("%s=%s" % (self.name, quote(self.value))) - parts.extend(["$%s=%s"% (self.attribute_names[k], self.quote(k, v)) \ - for k, v in self.attributes.items() if k != "version"]) - return "; ".join(parts) - -class NetscapeCookie (HttpCookie): - """Parses RFC 2109 (Netscape) cookies.""" - - def __init__ (self, text, scheme, host, path): - """Parse given cookie data.""" - self.parse(text) - self.set_default_attributes(scheme, host, path) - - def server_header_name (self): - """Return "Set-Cookie" as server header name.""" - return "Set-Cookie" - - def __eq__ (self, other): - """Compare equality of cookie.""" - return (isinstance(other, NetscapeCookie) and - self.name.lower() == other.name.lower() and - self.attributes['domain'] == other.attributes['domain'] and - self.attributes['path'] == other.attributes['path']) - - def __hash__ (self): - """Cookie hash value""" - data = ( - self.name.lower(), - self.attributes['domain'], - self.attributes['path'], - ) - return hash(data) - - - -class Rfc2965Cookie (HttpCookie): - """Parses RFC 2965 cookies.""" - - def __init__ (self, text, scheme, host, path): - """Parse given cookie data.""" - self.parse(text) - self.set_default_attributes(scheme, host, path) - - def check_port (self, port): - """Return True if given port matches this cookie, else False.""" - if "port" not in self.attributes: - return True - cport = self.attributes["port"] - return port in [int(x) for x in cport.split(",")] - - def server_header_name (self): - """Return "Set-Cookie2" as server header name.""" - return "Set-Cookie2" - - def quote (self, key, value): - """Quote value for given key.""" - if key == "port": - return quote(value, LegalChars="") - return quote(value) - - def __eq__ (self, other): - """Compare equality of cookie.""" - return (isinstance(other, Rfc2965Cookie) and - self.name.lower() == other.name.lower() and - self.attributes['domain'].lower() == - other.attributes['domain'].lower() and - self.attributes['path'] == other.attributes['path']) - - def __hash__ (self): - """Cookie hash value""" - data = ( - self.name.lower(), - self.attributes['domain'].lower(), - self.attributes['path'], - ) - return hash(data) def from_file (filename): @@ -545,92 +48,21 @@ def from_file (filename): def from_headers (strheader): """Parse cookie data from a string in HTTP header (RFC 2616) format. - @return: tuple (headers, scheme, host, path) + @return: list of cookies @raises: ValueError for incomplete or invalid data """ + res = [] fp = StringIO(strheader) headers = httplib.HTTPMessage(fp, seekable=True) if "Host" not in headers: raise ValueError("Required header 'Host:' missing") host = headers["Host"] - scheme = headers.get("Scheme", "http") path= headers.get("Path", "/") - return (headers, scheme, host, path) - - -## Taken and adpated from the _mechanize package included in Twill. - -def cookie_str(cookie): - """Return string representation of Cookie.""" - h = [(cookie.name, unquote(cookie.value)), - ("path", cookie.path), - ("domain", cookie.domain)] - if cookie.port is not None: h.append(("port", cookie.port)) - #if cookie.path_specified: h.append(("path_spec", None)) - #if cookie.port_specified: h.append(("port_spec", None)) - #if cookie.domain_initial_dot: h.append(("domain_dot", None)) - if cookie.secure: h.append(("secure", None)) - if cookie.httponly: h.append(("httponly", None)) - if cookie.expires: h.append(("expires", - time2isoz(float(cookie.expires)))) - if cookie.discard: h.append(("discard", None)) - if cookie.comment: h.append(("comment", cookie.comment)) - if cookie.comment_url: h.append(("commenturl", cookie.comment_url)) - #if cookie.rfc2109: h.append(("rfc2109", None)) - - keys = cookie.nonstandard_attr_keys() - keys.sort() - for k in keys: - h.append((k, str(cookie.get_nonstandard_attr(k)))) - - h.append(("version", str(cookie.version))) - - return join_header_words([h]) - - -def time2isoz(t=None): - """Return a string representing time in seconds since epoch, t. - - If the function is called without an argument, it will use the current - time. - - The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ", - representing Universal Time (UTC, aka GMT). An example of this format is: - - 1994-11-24 08:49:37Z - - """ - if t is None: t = time.time() - year, mon, mday, hour, min, sec = time.gmtime(t)[:6] - return "%04d-%02d-%02d %02d:%02d:%02dZ" % ( - year, mon, mday, hour, min, sec) - - -join_escape_re = re.compile(r"([\"\\])") -def join_header_words(lists): - """Do the inverse of the conversion done by split_header_words. - - Takes a list of lists of (key, value) pairs and produces a single header - value. Attribute values are quoted if needed. - - >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]]) - 'text/plain; charset="iso-8859/1"' - >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]]) - 'text/plain, charset="iso-8859/1"' - - """ - headers = [] - for pairs in lists: - attr = [] - for k, v in pairs: - if v is not None: - if not re.search(r"^\w+$", v): - v = join_escape_re.sub(r"\\\1", v) # escape " and \ - v = '"%s"' % v - if k is None: # Netscape cookies may have no name - k = v - else: - k = "%s=%s" % (k, v) - attr.append(k) - if attr: headers.append("; ".join(attr)) - return ", ".join(headers) + for header in headers.getallmatchingheaders("Set-Cookie"): + headervalue = header.split(':', 1)[1] + for pairs in cookielib.split_header_words([headervalue]): + for name, value in pairs: + cookie = requests.cookies.create_cookie(name, value, + domain=host, path=path) + res.append(cookie) + return res diff --git a/linkcheck/decorators.py b/linkcheck/decorators.py index fcf04506b..1831537f5 100644 --- a/linkcheck/decorators.py +++ b/linkcheck/decorators.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2012 Bastian Kleineidam +# Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/director/__init__.py b/linkcheck/director/__init__.py index 304ab2c35..2282c6c96 100644 --- a/linkcheck/director/__init__.py +++ b/linkcheck/director/__init__.py @@ -19,13 +19,11 @@ """ import os import thread -import urlparse -from cStringIO import StringIO -from .. import log, LOG_CHECK, LinkCheckerInterrupt, cookies, dummy, \ - fileutil, strformat -from ..cache import urlqueue, robots_txt, cookie, connection +import time +from .. import log, LOG_CHECK, LinkCheckerInterrupt, dummy, \ + fileutil, strformat, plugins +from ..cache import urlqueue, robots_txt from . import aggregator, console -from ..httplib2 import HTTPMessage def visit_loginurl (aggregate): @@ -53,7 +51,7 @@ def visit_loginurl (aggregate): log.warn(LOG_CHECK, _("Error posting form at login URL %(url)s.") % \ {"url": url}) return - store_cookies(tc.get_browser().cj, aggregate.cookies, url) + #XXX store_cookies(tc.get_browser().cj, aggregate.cookies, url) resulturl = tc.get_browser().get_url() log.debug(LOG_CHECK, u"URL after POST is %s" % resulturl) # add result URL to check list @@ -107,18 +105,6 @@ def search_formname (fieldnames, tc): return None -def store_cookies (cookiejar, cookiecache, url): - """Store cookies in cookiejar into the cookiecache.""" - cookielst = [] - for c in cookiejar: - cookielst.append("Set-Cookie2: %s" % cookies.cookie_str(c)) - log.debug(LOG_CHECK, "Store cookies %s", cookielst) - headers = HTTPMessage(StringIO("\r\n".join(cookielst))) - urlparts = urlparse.urlsplit(url) - scheme, host, path = urlparts[0:3] - cookiecache.add(headers, scheme, host, path) - - def check_urls (aggregate): """Main check function; checks all configured URLs until interrupted with Ctrl-C. @@ -194,14 +180,17 @@ def abort (aggregate): break except KeyboardInterrupt: log.warn(LOG_CHECK, _("user abort; force shutdown")) + aggregate.logger.end_log_output() abort_now() def abort_now (): """Force exit of current process without cleanup.""" if os.name == 'posix': - # Unix systems can use sigkill + # Unix systems can use signals import signal + os.kill(os.getpid(), signal.SIGTERM) + time.sleep(1) os.kill(os.getpid(), signal.SIGKILL) elif os.name == 'nt': # NT has os.abort() @@ -214,8 +203,6 @@ def abort_now (): def get_aggregate (config): """Get an aggregator instance with given configuration.""" _urlqueue = urlqueue.UrlQueue(max_allowed_puts=config["maxnumurls"]) - connections = connection.ConnectionPool(config.get_connectionlimits(), wait=config["wait"]) - cookies = cookie.CookieJar() _robots_txt = robots_txt.RobotsTxt() - return aggregator.Aggregate(config, _urlqueue, connections, - cookies, _robots_txt) + plugin_manager = plugins.PluginManager(config) + return aggregator.Aggregate(config, _urlqueue, _robots_txt, plugin_manager) diff --git a/linkcheck/director/aggregator.py b/linkcheck/director/aggregator.py index f75ae751d..4f373d0ec 100644 --- a/linkcheck/director/aggregator.py +++ b/linkcheck/director/aggregator.py @@ -17,54 +17,93 @@ """ Aggregate needed object instances for checker threads. """ -import time import threading -from .. import log, LOG_CHECK, strformat +import thread +import requests +import time +import random +from .. import log, LOG_CHECK, strformat, cookies from ..decorators import synchronized from ..cache import urlqueue -from . import logger, status, checker, cleanup +from . import logger, status, checker, interrupt -_w3_time_lock = threading.Lock() _threads_lock = threading.RLock() -_download_lock = threading.Lock() +_hosts_lock = threading.RLock() + +def new_request_session(config): + """Create a new request session.""" + session = requests.Session() + # XXX proxies + if config["cookiefile"]: + for cookie in cookies.from_file(config["cookiefile"]): + session.cookies = requests.cookies.merge_cookies(session.cookies, cookie) + return session + class Aggregate (object): """Store thread-safe data collections for checker threads.""" - def __init__ (self, config, urlqueue, connections, cookies, robots_txt): + def __init__ (self, config, urlqueue, robots_txt, plugin_manager): """Store given link checking objects.""" self.config = config self.urlqueue = urlqueue - self.connections = connections - self.cookies = cookies - self.robots_txt = robots_txt self.logger = logger.Logger(config) self.threads = [] - self.last_w3_call = 0 - self.downloaded_bytes = 0 + self.request_sessions = {} + self.robots_txt = robots_txt + self.plugin_manager = plugin_manager + self.times = {} + requests_per_second = config["maxrequestspersecond"] + self.wait_time_min = 1.0 / requests_per_second + self.wait_time_max = max(self.wait_time_min + 0.5, 0.5) @synchronized(_threads_lock) def start_threads (self): """Spawn threads for URL checking and status printing.""" if self.config["status"]: t = status.Status(self.urlqueue, self.config.status_logger, - self.config["status_wait_seconds"], - self.config["maxrunseconds"]) + self.config["status_wait_seconds"]) + t.start() + self.threads.append(t) + if self.config["maxrunseconds"]: + t = interrupt.Interrupt(self.config["maxrunseconds"]) t.start() self.threads.append(t) - t = cleanup.Cleanup(self.connections) - t.start() - self.threads.append(t) num = self.config["threads"] if num > 0: for dummy in range(num): - t = checker.Checker(self.urlqueue, self.logger) - t.start() + t = checker.Checker(self.urlqueue, self.logger, self.add_request_session) self.threads.append(t) + t.start() else: + self.request_sessions[thread.get_ident()] = new_request_session(self.config) checker.check_url(self.urlqueue, self.logger) + @synchronized(_threads_lock) + def add_request_session(self): + """Add a request session for current thread.""" + session = new_request_session(self.config) + self.request_sessions[thread.get_ident()] = session + + @synchronized(_threads_lock) + def get_request_session(self): + """Get the request session for current thread.""" + return self.request_sessions[thread.get_ident()] + + @synchronized(_hosts_lock) + def wait_for_host(self, host): + """Throttle requests to one host.""" + t = time.time() + if host in self.times: + due_time = self.times[host] + if due_time > t: + wait = due_time - t + time.sleep(wait) + t = time.time() + wait_time = random.uniform(self.wait_time_min, self.wait_time_max) + self.times[host] = t + wait_time + @synchronized(_threads_lock) def print_active_threads (self): """Log all currently active threads.""" @@ -77,8 +116,8 @@ def print_active_threads (self): first = False log.info(LOG_CHECK, name[12:]) args = dict( - num=len(self.threads), - timeout=strformat.strduration_long(self.config["timeout"]), + num=len([x for x in self.threads if x.getName().startswith("CheckThread-")]), + timeout=strformat.strduration_long(self.config["aborttimeout"]), ) log.info(LOG_CHECK, _("%(num)d URLs are still active. After a timeout of %(timeout)s the active URLs will stop.") % args) @@ -98,7 +137,7 @@ def abort (self): """Print still-active URLs and empty the URL queue.""" self.print_active_threads() self.cancel() - timeout = self.config["timeout"] + timeout = self.config["aborttimeout"] try: self.urlqueue.join(timeout=timeout) except urlqueue.Timeout: @@ -118,36 +157,9 @@ def finish (self): self.cancel() for t in self.threads: t.stop() - self.connections.clear() - self.gather_statistics() @synchronized(_threads_lock) def is_finished (self): """Determine if checking is finished.""" self.remove_stopped_threads() return self.urlqueue.empty() and not self.threads - - @synchronized(_w3_time_lock) - def check_w3_time (self): - """Make sure the W3C validators are at most called once a second.""" - if time.time() - self.last_w3_call < 1: - time.sleep(1) - self.last_w3_call = time.time() - - @synchronized(_download_lock) - def add_download_data(self, url, data): - """Add given downloaded data. - @param url: URL which data belongs to - @ptype url: unicode - @param data: downloaded data - @ptype data: string - """ - self.downloaded_bytes += len(data) - - def gather_statistics(self): - """Gather download and cache statistics and send them to the - logger. - """ - robots_txt_stats = self.robots_txt.hits, self.robots_txt.misses - download_stats = self.downloaded_bytes - self.logger.add_statistics(robots_txt_stats, download_stats) diff --git a/linkcheck/director/checker.py b/linkcheck/director/checker.py index 5942cde0a..b90a48909 100644 --- a/linkcheck/director/checker.py +++ b/linkcheck/director/checker.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2011 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -36,14 +36,17 @@ def check_url (urlqueue, logger): class Checker (task.LoggedCheckedTask): """URL check thread.""" - def __init__ (self, urlqueue, logger): + def __init__ (self, urlqueue, logger, add_request_session): """Store URL queue and logger.""" super(Checker, self).__init__(logger) self.urlqueue = urlqueue self.origname = self.getName() + self.add_request_session = add_request_session def run_checked (self): """Check URLs in the queue.""" + # construct per-thread HTTP/S requests session + self.add_request_session() while not self.stopped(0): self.check_url() diff --git a/linkcheck/director/cleanup.py b/linkcheck/director/cleanup.py deleted file mode 100644 index 2281a2f1d..000000000 --- a/linkcheck/director/cleanup.py +++ /dev/null @@ -1,40 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2007-2011 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -"""Cleanup task.""" -import time -from . import task, console - - -class Cleanup (task.CheckedTask): - """Cleanup task performing periodic cleanup of cached connections.""" - - def __init__ (self, connections): - """Store urlqueue object.""" - super(Cleanup, self).__init__() - self.connections = connections - - def run_checked (self): - """Print periodic status messages.""" - self.start_time = time.time() - self.setName("Cleanup") - # clean every 15 seconds - while not self.stopped(15): - self.connections.remove_expired() - - def internal_error (self): - """Print internal error to console.""" - console.internal_error() diff --git a/linkcheck/director/console.py b/linkcheck/director/console.py index 8633935e0..fd129c2d3 100644 --- a/linkcheck/director/console.py +++ b/linkcheck/director/console.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2013 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/director/interrupt.py b/linkcheck/director/interrupt.py new file mode 100644 index 000000000..a26f61705 --- /dev/null +++ b/linkcheck/director/interrupt.py @@ -0,0 +1,46 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2006-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +"""Status message handling""" +import time +from . import task +from .. import log, LOG_CHECK, strformat + + +class Interrupt (task.CheckedTask): + """Thread that raises KeyboardInterrupt after a specified duration. + This gives us a portable SIGALRM implementation. + The duration is checked every 5 seconds. + """ + WaitSeconds = 5 + + def __init__ (self, duration): + """Initialize the task. + @param duration: raise KeyboardInterrupt after given number of seconds + @ptype duration: int + """ + super(Interrupt, self).__init__() + self.duration = duration + + def run_checked (self): + """Wait and raise KeyboardInterrupt after.""" + self.start_time = time.time() + self.setName("Interrupt") + while not self.stopped(self.WaitSeconds): + duration = time.time() - self.start_time + if duration > self.duration: + log.warn(LOG_CHECK, "Interrupt after %s" % strformat.strduration_long(duration)) + raise KeyboardInterrupt() diff --git a/linkcheck/director/logger.py b/linkcheck/director/logger.py index 467a5a568..5f0a96096 100644 --- a/linkcheck/director/logger.py +++ b/linkcheck/director/logger.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2012 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,7 +29,6 @@ def __init__ (self, config): self.loggers = [config['logger']] self.loggers.extend(config['fileoutput']) self.verbose = config["verbose"] - self.complete = config["complete"] self.warnings = config["warnings"] def start_log_output (self): @@ -46,15 +45,8 @@ def end_log_output (self): for logger in self.loggers: logger.end_output() - def add_statistics(self, robots_txt_stats, download_stats): - """Add statistics to logger.""" - for logger in self.loggers: - logger.add_statistics(robots_txt_stats, download_stats) - def do_print (self, url_data): """Determine if URL entry should be logged or not.""" - if self.complete: - return True if self.verbose: return True if self.warnings and url_data.warnings: diff --git a/linkcheck/director/status.py b/linkcheck/director/status.py index 59676d124..1837387c7 100644 --- a/linkcheck/director/status.py +++ b/linkcheck/director/status.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2012 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ class Status (task.LoggedCheckedTask): """Thread that gathers and logs the status periodically.""" - def __init__ (self, urlqueue, logger, wait_seconds, max_duration): + def __init__ (self, urlqueue, logger, wait_seconds): """Initialize the status logger task. @param urlqueue: the URL queue @ptype urlqueue: Urlqueue @@ -30,33 +30,27 @@ def __init__ (self, urlqueue, logger, wait_seconds, max_duration): @ptype logger: console.StatusLogger @param wait_seconds: interval in seconds to report status @ptype wait_seconds: int - @param max_duration: abort checking after given number of seconds - @ptype max_duration: int or None """ super(Status, self).__init__(logger) self.urlqueue = urlqueue self.wait_seconds = wait_seconds assert self.wait_seconds >= 1 - self.first_wait = True - self.max_duration = max_duration def run_checked (self): """Print periodic status messages.""" self.start_time = time.time() self.setName("Status") - if not self.first_wait: - wait_seconds = self.wait_seconds - else: - # the first status should be after a second - self.first_wait = False - wait_seconds = 1 + # the first status should be after a second + wait_seconds = 1 + first_wait = True while not self.stopped(wait_seconds): self.log_status() + if first_wait: + wait_seconds = self.wait_seconds + first_wait = False def log_status (self): """Log a status message.""" duration = time.time() - self.start_time - if self.max_duration is not None and duration > self.max_duration: - raise KeyboardInterrupt() checked, in_progress, queue = self.urlqueue.status() self.logger.log_status(checked, in_progress, queue, duration) diff --git a/linkcheck/director/task.py b/linkcheck/director/task.py index 2c7735044..9abeab939 100644 --- a/linkcheck/director/task.py +++ b/linkcheck/director/task.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2011 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -16,7 +16,7 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import thread from ..decorators import notimplemented -from .. import log, LOG_CHECK, threader +from .. import threader from . import console @@ -28,7 +28,6 @@ def run (self): try: self.run_checked() except KeyboardInterrupt: - log.warn(LOG_CHECK, "interrupt did not reach the main thread") thread.interrupt_main() except Exception: self.internal_error() diff --git a/linkcheck/dummy.py b/linkcheck/dummy.py index b2855b3c6..73ac2a7c9 100644 --- a/linkcheck/dummy.py +++ b/linkcheck/dummy.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2011 Bastian Kleineidam +# Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 0e751d379..6e796d298 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -275,6 +275,12 @@ def is_accessable_by_others(filename): return mode & (stat.S_IRWXG | stat.S_IRWXO) +def is_writable_by_others(filename): + """Check if file or directory is world writable.""" + mode = os.stat(filename)[stat.ST_MODE] + return mode & stat.S_IWOTH + + @memoized def is_writable(filename): """Check if diff --git a/linkcheck/ftpparse.py b/linkcheck/ftpparse.py index 760b83aa0..fc2449fa8 100644 --- a/linkcheck/ftpparse.py +++ b/linkcheck/ftpparse.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2009-2010 Bastian Kleineidam +# Copyright (C) 2009-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/__init__.py b/linkcheck/gui/__init__.py index bcf6a902e..a9ca2679e 100644 --- a/linkcheck/gui/__init__.py +++ b/linkcheck/gui/__init__.py @@ -23,7 +23,7 @@ from .linkchecker_ui_main import Ui_MainWindow from .properties import set_properties, clear_properties from .statistics import set_statistics, clear_statistics -from .debug import LinkCheckerDebug, LinkCheckerDebugMemory +from .debug import LinkCheckerDebug from .logger import SignalLogger, GuiLogHandler, StatusLogger from .help import HelpWindow from .options import LinkCheckerOptions @@ -37,7 +37,7 @@ from .recentdocs import RecentDocumentModel from .projects import openproject, saveproject, loadproject, ProjectExt from .. import configuration, checker, director, get_link_pat, \ - strformat, fileutil, LinkCheckerError, memoryutil + strformat, fileutil, LinkCheckerError from ..containers import enum from .. import url as urlutil from ..checker import httpheaders @@ -99,7 +99,6 @@ def __init__(self, parent=None, url=None, project=None): # init subdialogs self.options = LinkCheckerOptions(parent=self) self.debug = LinkCheckerDebug(parent=self) - self.debugmemory = LinkCheckerDebugMemory(parent=self) self.checker = CheckerThread(parent=self) self.contextmenu = ContextMenu(parent=self) self.editor = EditorWindow(parent=self) @@ -175,8 +174,6 @@ def connect_widgets (self): def set_idle (): """Set application status to idle.""" self.status = Status.idle - if self.config["debugmemory"]: - self.dump_memory() self.set_statusmsg(_("Check finished.")) self.controlButton.clicked.disconnect(self.checker.cancel) self.checker.finished.connect(set_idle) @@ -250,7 +247,6 @@ def set_config (self): self.config["threads"] = 1 else: self.config.reset_loglevel() - self.config["debugmemory"] = data["debugmemory"] if data["warninglines"]: lines = data["warninglines"].splitlines() ro = re.compile(warninglines2regex(lines)) @@ -313,7 +309,6 @@ def set_status (self, status): elif status == Status.checking: self.treeView.setSortingEnabled(False) self.debug.reset() - self.debugmemory.reset() self.set_statusmsg(u"Checking site...") # disable commands self.menubar.setEnabled(False) @@ -423,7 +418,7 @@ def start (self): def cancel (self): """Note that checking is canceled.""" self.controlButton.setEnabled(False) - duration = strformat.strduration_long(self.config["timeout"]) + duration = strformat.strduration_long(self.config["aborttimeout"]) self.set_statusmsg(_(u"Closing active URLs with timeout %s...") % duration) @QtCore.pyqtSlot() @@ -436,16 +431,6 @@ def on_controlButton_clicked (self): else: raise ValueError("Invalid application status %r" % self.status) - def dump_memory (self): - """Dump memory to temporary file and inform user with a modal - dialog where the file is.""" - self.set_statusmsg(_(u"Dumping memory statistics...")) - filename = memoryutil.write_memory_dump() - title = _(u"LinkChecker memory dump written") - message = _(u"The memory dump has been written to `%(filename)s'.") - attrs = dict(filename=filename) - QtGui.QMessageBox.information(self, title, message % attrs) - def get_url (self): """Return URL to check from the urlinput widget.""" url = strformat.stripurl(unicode(self.urlinput.text())) @@ -524,9 +509,10 @@ def view_source (self, url, line, col): """View URL source in editor window.""" self.editor.setWindowTitle(u"View %s" % url) self.editor.setUrl(url) - info, data = urlutil.get_content(url, proxy=self.config["proxy"]) - if (info, data) == (None, None): - self.editor.setText(u"An error occurred retreiving URL `%s'." % url) + data, info = urlutil.get_content(url, proxy=self.config["proxy"]) + if data is None: + msg = u"An error occurred retreiving URL `%s': %s." % (url, info) + self.editor.setText(msg) else: content_type = httpheaders.get_content_type(info) if not content_type: diff --git a/linkcheck/gui/checker.py b/linkcheck/gui/checker.py index 33e50071b..0ade8766f 100644 --- a/linkcheck/gui/checker.py +++ b/linkcheck/gui/checker.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2008-2011 Bastian Kleineidam +# Copyright (C) 2008-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/contextmenu.py b/linkcheck/gui/contextmenu.py index 9fc6330f3..259fb474c 100644 --- a/linkcheck/gui/contextmenu.py +++ b/linkcheck/gui/contextmenu.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2009-2011 Bastian Kleineidam +# Copyright (C) 2009-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/debug.py b/linkcheck/gui/debug.py index 304dcbceb..e2a697e26 100644 --- a/linkcheck/gui/debug.py +++ b/linkcheck/gui/debug.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2009-2012 Bastian Kleineidam +# Copyright (C) 2009-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -41,23 +41,3 @@ def reset (self): def getText (self): """Get debug info as string.""" return self.textEdit.toPlainText() - - -class LinkCheckerDebugMemory (QtGui.QDialog, Ui_DebugDialog): - """Show memory debugging output.""" - - def __init__ (self, parent=None): - """Setup the debug memory dialog.""" - super(LinkCheckerDebugMemory, self).__init__(parent) - self.setupUi(self) - font = QtGui.QFont("Consolas", 11) - font.setFixedPitch(True) - self.textEdit.document().setDefaultFont(font) - - def reset (self): - """Clear memory info.""" - self.textEdit.clear() - - def setText (self, text): - """Set memory debug info.""" - return self.textEdit.setPlainText(text) diff --git a/linkcheck/gui/editor.py b/linkcheck/gui/editor.py index d5b080191..d4b55126d 100644 --- a/linkcheck/gui/editor.py +++ b/linkcheck/gui/editor.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/editor_qsci.py b/linkcheck/gui/editor_qsci.py index 936e8b0dc..bf02b9244 100644 --- a/linkcheck/gui/editor_qsci.py +++ b/linkcheck/gui/editor_qsci.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/editor_qt.py b/linkcheck/gui/editor_qt.py index 86db8ddab..01788c21f 100644 --- a/linkcheck/gui/editor_qt.py +++ b/linkcheck/gui/editor_qt.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/help.py b/linkcheck/gui/help.py index 8cdbe5d33..2e4638fc5 100644 --- a/linkcheck/gui/help.py +++ b/linkcheck/gui/help.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2009-2011 Bastian Kleineidam +# Copyright (C) 2009-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/lineedit.py b/linkcheck/gui/lineedit.py index 0d54d49d1..719090c58 100644 --- a/linkcheck/gui/lineedit.py +++ b/linkcheck/gui/lineedit.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/linkchecker_ui_debug.py b/linkcheck/gui/linkchecker_ui_debug.py index 68a9352a2..5f697c677 100644 --- a/linkcheck/gui/linkchecker_ui_debug.py +++ b/linkcheck/gui/linkchecker_ui_debug.py @@ -2,8 +2,8 @@ # Form implementation generated from reading ui file 'ui/debug.ui' # -# Created: Mon Dec 12 19:00:37 2011 -# by: PyQt4 UI code generator 4.8.6 +# Created: Fri Feb 28 21:24:59 2014 +# by: PyQt4 UI code generator 4.9.3 # # WARNING! All changes made in this file will be lost! @@ -19,7 +19,6 @@ def setupUi(self, DebugDialog): DebugDialog.setObjectName(_fromUtf8("DebugDialog")) DebugDialog.setWindowModality(QtCore.Qt.ApplicationModal) DebugDialog.resize(564, 547) - DebugDialog.setWindowTitle(_("LinkChecker debug log")) self.verticalLayout = QtGui.QVBoxLayout(DebugDialog) self.verticalLayout.setObjectName(_fromUtf8("verticalLayout")) self.frame = QtGui.QFrame(DebugDialog) @@ -40,5 +39,5 @@ def setupUi(self, DebugDialog): QtCore.QMetaObject.connectSlotsByName(DebugDialog) def retranslateUi(self, DebugDialog): - pass + DebugDialog.setWindowTitle(_("LinkChecker debug log")) diff --git a/linkcheck/gui/linkchecker_ui_main.py b/linkcheck/gui/linkchecker_ui_main.py index adfa6bbbd..2e88e5f73 100644 --- a/linkcheck/gui/linkchecker_ui_main.py +++ b/linkcheck/gui/linkchecker_ui_main.py @@ -2,7 +2,7 @@ # Form implementation generated from reading ui file 'ui/main.ui' # -# Created: Tue Nov 6 21:47:39 2012 +# Created: Fri Feb 28 21:24:58 2014 # by: PyQt4 UI code generator 4.9.3 # # WARNING! All changes made in this file will be lost! @@ -679,29 +679,6 @@ def setupUi(self, MainWindow): self.stats_url_maxlen.setOpenExternalLinks(True) self.stats_url_maxlen.setObjectName(_fromUtf8("stats_url_maxlen")) self.gridLayout_3.addWidget(self.stats_url_maxlen, 1, 1, 1, 1) - self.label_14 = QtGui.QLabel(self.groupBox_2) - sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Preferred) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.label_14.sizePolicy().hasHeightForWidth()) - self.label_14.setSizePolicy(sizePolicy) - self.label_14.setAlignment(QtCore.Qt.AlignRight|QtCore.Qt.AlignTrailing|QtCore.Qt.AlignVCenter) - self.label_14.setObjectName(_fromUtf8("label_14")) - self.gridLayout_3.addWidget(self.label_14, 1, 2, 1, 1) - self.stats_domains = QtGui.QLabel(self.groupBox_2) - sizePolicy = QtGui.QSizePolicy(QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Preferred) - sizePolicy.setHorizontalStretch(0) - sizePolicy.setVerticalStretch(0) - sizePolicy.setHeightForWidth(self.stats_domains.sizePolicy().hasHeightForWidth()) - self.stats_domains.setSizePolicy(sizePolicy) - self.stats_domains.setMinimumSize(QtCore.QSize(30, 0)) - self.stats_domains.setFrameShape(QtGui.QFrame.StyledPanel) - self.stats_domains.setFrameShadow(QtGui.QFrame.Sunken) - self.stats_domains.setText(_fromUtf8("")) - self.stats_domains.setTextFormat(QtCore.Qt.RichText) - self.stats_domains.setOpenExternalLinks(True) - self.stats_domains.setObjectName(_fromUtf8("stats_domains")) - self.gridLayout_3.addWidget(self.stats_domains, 1, 3, 1, 1) self.verticalLayout_2.addWidget(self.groupBox_2) self.horizontalLayout.addWidget(self.statistics) self.verticalLayout.addLayout(self.horizontalLayout) @@ -831,7 +808,6 @@ def retranslateUi(self, MainWindow): self.label_18.setText(_("Min. length")) self.label_20.setText(_("Avg. length")) self.label_19.setText(_("Max. length")) - self.label_14.setText(_("Domains")) self.menuEdit.setTitle(_("&Edit")) self.menuFile.setTitle(_("&File")) self.menuHelp.setTitle(_("&Help")) diff --git a/linkcheck/gui/linkchecker_ui_options.py b/linkcheck/gui/linkchecker_ui_options.py index b440dc7b2..f239126e5 100644 --- a/linkcheck/gui/linkchecker_ui_options.py +++ b/linkcheck/gui/linkchecker_ui_options.py @@ -2,8 +2,8 @@ # Form implementation generated from reading ui file 'ui/options.ui' # -# Created: Sun Jun 10 11:51:42 2012 -# by: PyQt4 UI code generator 4.9.1 +# Created: Fri Feb 28 21:24:59 2014 +# by: PyQt4 UI code generator 4.9.3 # # WARNING! All changes made in this file will be lost! @@ -28,6 +28,7 @@ def setupUi(self, Options): self.widget = QtGui.QWidget(self.groupBox_2) self.widget.setObjectName(_fromUtf8("widget")) self.formLayout = QtGui.QFormLayout(self.widget) + self.formLayout.setFieldGrowthPolicy(QtGui.QFormLayout.ExpandingFieldsGrow) self.formLayout.setMargin(0) self.formLayout.setObjectName(_fromUtf8("formLayout")) self.label = QtGui.QLabel(self.widget) @@ -60,14 +61,6 @@ def setupUi(self, Options): self.debug.setText(_fromUtf8("")) self.debug.setObjectName(_fromUtf8("debug")) self.formLayout.setWidget(2, QtGui.QFormLayout.FieldRole, self.debug) - self.label_7 = QtGui.QLabel(self.widget) - self.label_7.setToolTip(_fromUtf8("")) - self.label_7.setObjectName(_fromUtf8("label_7")) - self.formLayout.setWidget(3, QtGui.QFormLayout.LabelRole, self.label_7) - self.debugmemory = QtGui.QCheckBox(self.widget) - self.debugmemory.setText(_fromUtf8("")) - self.debugmemory.setObjectName(_fromUtf8("debugmemory")) - self.formLayout.setWidget(3, QtGui.QFormLayout.FieldRole, self.debugmemory) self.verticalLayout.addWidget(self.widget) spacerItem = QtGui.QSpacerItem(20, 10, QtGui.QSizePolicy.Minimum, QtGui.QSizePolicy.Expanding) self.verticalLayout.addItem(spacerItem) @@ -143,7 +136,6 @@ def retranslateUi(self, Options): self.label_2.setText(_("Verbose output")) self.verbose.setToolTip(_("Log all checked URLs once. Default is to log only errors and warnings.")) self.label_4.setText(_("Debug")) - self.label_7.setText(_("Debug memory usage")) self.label_5.setText(_("Warn when one of these strings are found (one per line):")) self.label_6.setText(_("Ignore URLs matching one of these patterns (one per line):")) self.groupBox.setTitle(_("Configuration file")) diff --git a/linkcheck/gui/options.py b/linkcheck/gui/options.py index 0cee6b9ed..cf859b172 100644 --- a/linkcheck/gui/options.py +++ b/linkcheck/gui/options.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2009-2012 Bastian Kleineidam +# Copyright (C) 2009-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ from PyQt4 import QtGui from .linkchecker_ui_options import Ui_Options from .editor import EditorWindow -from ..fileutil import is_writable, has_module +from ..fileutil import is_writable from .. import configuration @@ -46,11 +46,6 @@ def reset_gui_options (self): self.recursionlevel.setValue(-1) self.verbose.setChecked(False) self.debug.setChecked(False) - self.debugmemory.setChecked(False) - if not has_module("meliae"): - self.debugmemory.setEnabled(False) - from ..memoryutil import MemoryDebugMsg - self.debugmemory.setToolTip(MemoryDebugMsg) self.warninglines.setPlainText(u"") self.ignorelines.setPlainText(u"") @@ -69,7 +64,6 @@ def get_options (self): """Return option data as dictionary.""" return dict( debug=self.debug.isChecked(), - debugmemory=self.debugmemory.isChecked(), verbose=self.verbose.isChecked(), recursionlevel=self.recursionlevel.value(), warninglines=unicode(self.warninglines.toPlainText()), @@ -80,8 +74,6 @@ def set_options (self, data): """Set GUI options from given data.""" if data.get("debug") is not None: self.debug.setChecked(data["debug"]) - if data.get("debugmemory") is not None: - self.debugmemory.setChecked(data["debugmemory"]) if data.get("verbose") is not None: self.verbose.setChecked(data["verbose"]) if data.get("recursionlevel") is not None: diff --git a/linkcheck/gui/projects.py b/linkcheck/gui/projects.py index 840cb6eaa..d9174e7e8 100644 --- a/linkcheck/gui/projects.py +++ b/linkcheck/gui/projects.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -57,9 +57,6 @@ def read_gui_config(self): return data = {} option = "debug" - if self.has_option(section, option): - data[option] = self.getboolean(section, option) - option = "debugmemory" if self.has_option(section, option): data[option] = self.getboolean(section, option) option = "verbose" diff --git a/linkcheck/gui/properties.py b/linkcheck/gui/properties.py index 7f48fe961..964827649 100644 --- a/linkcheck/gui/properties.py +++ b/linkcheck/gui/properties.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -38,8 +38,8 @@ def set_properties (widget, data): widget.prop_dltime.setText(_("%.3f seconds") % data.dltime) else: widget.prop_dltime.setText(u"") - if data.dlsize >= 0: - widget.prop_size.setText(strformat.strsize(data.dlsize)) + if data.size >= 0: + widget.prop_size.setText(strformat.strsize(data.size)) else: widget.prop_size.setText(u"") if data.modified: diff --git a/linkcheck/gui/recentdocs.py b/linkcheck/gui/recentdocs.py index ab7cf2f8b..4d190f24d 100644 --- a/linkcheck/gui/recentdocs.py +++ b/linkcheck/gui/recentdocs.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/settings.py b/linkcheck/gui/settings.py index c1c18235b..da3261a01 100644 --- a/linkcheck/gui/settings.py +++ b/linkcheck/gui/settings.py @@ -85,10 +85,10 @@ def save_treeviewcols (self, data): def read_options (self): """Return stored GUI options.""" - data = dict(debug=None, debugmemory=None, verbose=None, + data = dict(debug=None, verbose=None, recursionlevel=None, warninglines=None, ignorelines=None) self.settings.beginGroup('output') - for key in ("debug", "debugmemory", "verbose"): + for key in ("debug", "verbose"): if self.settings.contains(key): data[key] = self.settings.value(key).toBool() self.settings.endGroup() @@ -116,7 +116,7 @@ def read_options (self): def save_options (self, data): """Save GUI options.""" self.settings.beginGroup('output') - for key in ("debug", "debugmemory", "verbose"): + for key in ("debug", "verbose"): self.settings.setValue(key, QtCore.QVariant(data[key])) self.settings.endGroup() self.settings.beginGroup('checking') diff --git a/linkcheck/gui/statistics.py b/linkcheck/gui/statistics.py index 3591a7028..d9da2c100 100644 --- a/linkcheck/gui/statistics.py +++ b/linkcheck/gui/statistics.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2011 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,7 +19,6 @@ def set_statistics (widget, statistics): """Set statistic information in given widget.""" - widget.stats_domains.setText(u"%d" % len(statistics.domains)) widget.stats_url_minlen.setText(u"%d" % statistics.min_url_length) widget.stats_url_maxlen.setText(u"%d" % statistics.max_url_length) widget.stats_url_avglen.setText(u"%d" % statistics.avg_url_length) @@ -38,7 +37,6 @@ def set_statistics (widget, statistics): def clear_statistics (widget): """Reset statistic information in given widget.""" - widget.stats_domains.setText(u"") widget.stats_url_minlen.setText(u"") widget.stats_url_maxlen.setText(u"") widget.stats_url_avglen.setText(u"") diff --git a/linkcheck/gui/syntax.py b/linkcheck/gui/syntax.py index cac087680..b9e05d6f7 100644 --- a/linkcheck/gui/syntax.py +++ b/linkcheck/gui/syntax.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/ui/main.ui b/linkcheck/gui/ui/main.ui index a2b0f9e7c..66faea615 100644 --- a/linkcheck/gui/ui/main.ui +++ b/linkcheck/gui/ui/main.ui @@ -1402,53 +1402,6 @@ - - - - - 0 - 0 - - - - Domains - - - Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter - - - - - - - - 0 - 0 - - - - - 30 - 0 - - - - QFrame::StyledPanel - - - QFrame::Sunken - - - - - - Qt::RichText - - - true - - - diff --git a/linkcheck/gui/ui/options.ui b/linkcheck/gui/ui/options.ui index 8eb337e8f..686ca740c 100644 --- a/linkcheck/gui/ui/options.ui +++ b/linkcheck/gui/ui/options.ui @@ -29,6 +29,9 @@ + + QFormLayout::ExpandingFieldsGrow + @@ -104,23 +107,6 @@ - - - - - - - Debug memory usage - - - - - - - - - - diff --git a/linkcheck/gui/updater.py b/linkcheck/gui/updater.py index 4b82de9a8..fecdff836 100644 --- a/linkcheck/gui/updater.py +++ b/linkcheck/gui/updater.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/urlmodel.py b/linkcheck/gui/urlmodel.py index 8408fe8de..60e6de6f5 100644 --- a/linkcheck/gui/urlmodel.py +++ b/linkcheck/gui/urlmodel.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2011 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/gui/validator.py b/linkcheck/gui/validator.py index 304e144c1..8262e6bf1 100644 --- a/linkcheck/gui/validator.py +++ b/linkcheck/gui/validator.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/htmlutil/__init__.py b/linkcheck/htmlutil/__init__.py index 31ce70de0..23f2260b6 100644 --- a/linkcheck/htmlutil/__init__.py +++ b/linkcheck/htmlutil/__init__.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2008-2009 Bastian Kleineidam +# Copyright (C) 2008-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/htmlutil/linkname.py b/linkcheck/htmlutil/linkname.py index cc22d3a62..9cb65dfa2 100644 --- a/linkcheck/htmlutil/linkname.py +++ b/linkcheck/htmlutil/linkname.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2001-2010 Bastian Kleineidam +# Copyright (C) 2001-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py index 81f1f9010..3196d0a57 100644 --- a/linkcheck/htmlutil/linkparse.py +++ b/linkcheck/htmlutil/linkparse.py @@ -201,9 +201,7 @@ def __init__ (self, callback, tags=None): def start_element (self, tag, attrs): """Search for links and store found URLs in a list.""" log.debug(LOG_CHECK, "LinkFinder tag %s attrs %s", tag, attrs) - log.debug(LOG_CHECK, "line %d col %d old line %d old col %d", - self.parser.lineno(), self.parser.column(), - self.parser.last_lineno(), self.parser.last_column()) + log.debug(LOG_CHECK, "line %d col %d old line %d old col %d", self.parser.lineno(), self.parser.column(), self.parser.last_lineno(), self.parser.last_column()) if tag == "base" and not self.base_ref: self.base_ref = unquote(attrs.get_true("href", u'')) tagattrs = self.tags.get(tag, []) @@ -282,7 +280,6 @@ def parse_tag (self, tag, attr, url, name, base): return for u in urls: assert isinstance(u, unicode) or u is None, repr(u) - log.debug(LOG_CHECK, - u"LinkParser found link %r %r %r %r %r", tag, attr, u, name, base) + log.debug(LOG_CHECK, u"LinkParser found link %r %r %r %r %r", tag, attr, u, name, base) self.callback(u, self.parser.last_lineno(), self.parser.last_column(), name, base) diff --git a/linkcheck/httplib2.py b/linkcheck/httplib2.py deleted file mode 100644 index 29eb5fea8..000000000 --- a/linkcheck/httplib2.py +++ /dev/null @@ -1,1383 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copied from Python source; License: Python License -# Copyright Guido van Rossum and others -r"""HTTP/1.1 client library - - - - -HTTPConnection goes through a number of "states", which define when a client -may legally make another request or fetch the response for a particular -request. This diagram details these state transitions: - - (null) - | - | HTTPConnection() - v - Idle - | - | putrequest() - v - Request-started - | - | ( putheader() )* endheaders() - v - Request-sent - | - | response = getresponse() - v - Unread-response [Response-headers-read] - |\____________________ - | | - | response.read() | putrequest() - v v - Idle Req-started-unread-response - ______/| - / | - response.read() | | ( putheader() )* endheaders() - v v - Request-started Req-sent-unread-response - | - | response.read() - v - Request-sent - -This diagram presents the following rules: - -- a second request may not be started until {response-headers-read} - -- a response [object] cannot be retrieved until {request-sent} - -- there is no differentiation between an unread response body and a - partially read response body - -Note: this enforcement is applied by the HTTPConnection class. The - HTTPResponse class does not enforce this state machine, which - implies sophisticated clients may accelerate the request/response - pipeline. Caution should be taken, though: accelerating the states - beyond the above pattern may imply knowledge of the server's - connection-close behavior for certain requests. For example, it - is impossible to tell whether the server will close the connection - UNTIL the response headers have been read; this means that further - requests cannot be placed into the pipeline until it is known that - the server will NOT be closing the connection. - -Logical State __state __response -------------- ------- ---------- -Idle _CS_IDLE None -Request-started _CS_REQ_STARTED None -Request-sent _CS_REQ_SENT None -Unread-response _CS_IDLE -Req-started-unread-response _CS_REQ_STARTED -Req-sent-unread-response _CS_REQ_SENT -""" - -import errno -import sys -import mimetools -from array import array -import os -import socket -from urlparse import urlsplit -import warnings - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -__all__ = ["HTTP", "HTTPResponse", "HTTPConnection", - "HTTPException", "NotConnected", "UnknownProtocol", - "UnknownTransferEncoding", "UnimplementedFileMode", - "IncompleteRead", "InvalidURL", "ImproperConnectionState", - "CannotSendRequest", "CannotSendHeader", "ResponseNotReady", - "BadStatusLine", "error", "responses"] - -HTTP_PORT = 80 -HTTPS_PORT = 443 - -_UNKNOWN = 'UNKNOWN' - -# connection states -_CS_IDLE = 'Idle' -_CS_REQ_STARTED = 'Request-started' -_CS_REQ_SENT = 'Request-sent' - -# status codes -# informational -CONTINUE = 100 -SWITCHING_PROTOCOLS = 101 -PROCESSING = 102 - -# successful -OK = 200 -CREATED = 201 -ACCEPTED = 202 -NON_AUTHORITATIVE_INFORMATION = 203 -NO_CONTENT = 204 -RESET_CONTENT = 205 -PARTIAL_CONTENT = 206 -MULTI_STATUS = 207 -IM_USED = 226 - -# redirection -MULTIPLE_CHOICES = 300 -MOVED_PERMANENTLY = 301 -FOUND = 302 -SEE_OTHER = 303 -NOT_MODIFIED = 304 -USE_PROXY = 305 -TEMPORARY_REDIRECT = 307 - -# client error -BAD_REQUEST = 400 -UNAUTHORIZED = 401 -PAYMENT_REQUIRED = 402 -FORBIDDEN = 403 -NOT_FOUND = 404 -METHOD_NOT_ALLOWED = 405 -NOT_ACCEPTABLE = 406 -PROXY_AUTHENTICATION_REQUIRED = 407 -REQUEST_TIMEOUT = 408 -CONFLICT = 409 -GONE = 410 -LENGTH_REQUIRED = 411 -PRECONDITION_FAILED = 412 -REQUEST_ENTITY_TOO_LARGE = 413 -REQUEST_URI_TOO_LONG = 414 -UNSUPPORTED_MEDIA_TYPE = 415 -REQUESTED_RANGE_NOT_SATISFIABLE = 416 -EXPECTATION_FAILED = 417 -UNPROCESSABLE_ENTITY = 422 -LOCKED = 423 -FAILED_DEPENDENCY = 424 -UPGRADE_REQUIRED = 426 - -# server error -INTERNAL_SERVER_ERROR = 500 -NOT_IMPLEMENTED = 501 -BAD_GATEWAY = 502 -SERVICE_UNAVAILABLE = 503 -GATEWAY_TIMEOUT = 504 -HTTP_VERSION_NOT_SUPPORTED = 505 -INSUFFICIENT_STORAGE = 507 -NOT_EXTENDED = 510 - -# Mapping status codes to official W3C names -responses = { - 100: 'Continue', - 101: 'Switching Protocols', - - 200: 'OK', - 201: 'Created', - 202: 'Accepted', - 203: 'Non-Authoritative Information', - 204: 'No Content', - 205: 'Reset Content', - 206: 'Partial Content', - - 300: 'Multiple Choices', - 301: 'Moved Permanently', - 302: 'Found', - 303: 'See Other', - 304: 'Not Modified', - 305: 'Use Proxy', - 306: '(Unused)', - 307: 'Temporary Redirect', - - 400: 'Bad Request', - 401: 'Unauthorized', - 402: 'Payment Required', - 403: 'Forbidden', - 404: 'Not Found', - 405: 'Method Not Allowed', - 406: 'Not Acceptable', - 407: 'Proxy Authentication Required', - 408: 'Request Timeout', - 409: 'Conflict', - 410: 'Gone', - 411: 'Length Required', - 412: 'Precondition Failed', - 413: 'Request Entity Too Large', - 414: 'Request-URI Too Long', - 415: 'Unsupported Media Type', - 416: 'Requested Range Not Satisfiable', - 417: 'Expectation Failed', - - 500: 'Internal Server Error', - 501: 'Not Implemented', - 502: 'Bad Gateway', - 503: 'Service Unavailable', - 504: 'Gateway Timeout', - 505: 'HTTP Version Not Supported', -} - -# maximal amount of data to read at one time in _safe_read -MAXAMOUNT = 1048576 - -# maximal line length when calling readline(). -_MAXLINE = 65536 - -class HTTPMessage(mimetools.Message): - - def addheader(self, key, value): - """Add header for field key handling repeats.""" - prev = self.dict.get(key) - if prev is None: - self.dict[key] = value - else: - combined = ", ".join((prev, value)) - self.dict[key] = combined - - def addcontinue(self, key, more): - """Add more field data from a continuation line.""" - prev = self.dict[key] - self.dict[key] = prev + "\n " + more - - def readheaders(self): - """Read header lines. - - Read header lines up to the entirely blank line that terminates them. - The (normally blank) line that ends the headers is skipped, but not - included in the returned list. If a non-header line ends the headers, - (which is an error), an attempt is made to backspace over it; it is - never included in the returned list. - - The variable self.status is set to the empty string if all went well, - otherwise it is an error message. The variable self.headers is a - completely uninterpreted list of lines contained in the header (so - printing them will reproduce the header exactly as it appears in the - file). - - If multiple header fields with the same name occur, they are combined - according to the rules in RFC 2616 sec 4.2: - - Appending each subsequent field-value to the first, each separated - by a comma. The order in which header fields with the same field-name - are received is significant to the interpretation of the combined - field value. - """ - # XXX The implementation overrides the readheaders() method of - # rfc822.Message. The base class design isn't amenable to - # customized behavior here so the method here is a copy of the - # base class code with a few small changes. - - self.dict = {} - self.unixfrom = '' - self.headers = hlist = [] - self.status = '' - headerseen = "" - firstline = 1 - startofline = unread = tell = None - if hasattr(self.fp, 'unread'): - unread = self.fp.unread - elif self.seekable: - tell = self.fp.tell - while True: - if tell: - try: - startofline = tell() - except IOError: - startofline = tell = None - self.seekable = 0 - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - self.status = 'EOF in headers' - break - # Skip unix From name time lines - if firstline and line.startswith('From '): - self.unixfrom = self.unixfrom + line - continue - firstline = 0 - if headerseen and line[0] in ' \t': - # XXX Not sure if continuation lines are handled properly - # for http and/or for repeating headers - # It's a continuation line. - hlist.append(line) - self.addcontinue(headerseen, line.strip()) - continue - elif self.iscomment(line): - # It's a comment. Ignore it. - continue - elif self.islast(line): - # Note! No pushback here! The delimiter line gets eaten. - break - headerseen = self.isheader(line) - if headerseen: - # It's a legal header line, save it. - hlist.append(line) - self.addheader(headerseen, line[len(headerseen)+1:].strip()) - continue - else: - # It's not a header line; throw it back and stop here. - if not self.dict: - self.status = 'No headers' - else: - self.status = 'Non-header line where header expected' - # Try to undo the read. - if unread: - unread(line) - elif tell: - self.fp.seek(startofline) - else: - self.status = self.status + '; bad seek' - break - -class HTTPResponse: - - # strict: If true, raise BadStatusLine if the status line can't be - # parsed as a valid HTTP/1.0 or 1.1 status line. By default it is - # false because it prevents clients from talking to HTTP/0.9 - # servers. Note that a response with a sufficiently corrupted - # status line will look like an HTTP/0.9 response. - - # See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details. - - def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False): - if buffering: - # The caller won't be using any sock.recv() calls, so buffering - # is fine and recommended for performance. - self.fp = sock.makefile('rb') - else: - # The buffer size is specified as zero, because the headers of - # the response are read with readline(). If the reads were - # buffered the readline() calls could consume some of the - # response, which make be read via a recv() on the underlying - # socket. - self.fp = sock.makefile('rb', 0) - self.debuglevel = debuglevel - self.strict = strict - self._method = method - - self.msg = None - - # from the Status-Line of the response - self.version = _UNKNOWN # HTTP-Version - self.status = _UNKNOWN # Status-Code - self.reason = _UNKNOWN # Reason-Phrase - - self.chunked = _UNKNOWN # is "chunked" being used? - self.chunk_left = _UNKNOWN # bytes left to read in current chunk - self.length = _UNKNOWN # number of bytes left in response - self.will_close = _UNKNOWN # conn will close at end of response - - def __str__ (self): - return "" % \ - (self.status, self.reason, self.chunked, self.length) - - def _read_status(self): - # Initialize with Simple-Response defaults - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if self.debuglevel > 0: - print >>sys.stderr, "reply:", repr(line) - if not line: - # Presumably, the server closed the connection before - # sending a valid response. - raise BadStatusLine(line) - try: - [version, status, reason] = line.split(None, 2) - except ValueError: - try: - [version, status] = line.split(None, 1) - reason = "" - except ValueError: - # empty version will cause next test to fail and status - # will be treated as 0.9 response. - version = "" - if not version.startswith('HTTP/'): - if self.strict: - self.close() - raise BadStatusLine(line) - else: - # assume it's a Simple-Response from an 0.9 server - self.fp = LineAndFileWrapper(line, self.fp) - return "HTTP/0.9", 200, "" - - # The status code is a three-digit number - try: - status = int(status) - if status < 100 or status > 999: - raise BadStatusLine(line) - except ValueError: - raise BadStatusLine(line) - return version, status, reason - - def begin(self): - if self.msg is not None: - # we've already started reading the response - return - - # read until we get a non-100 response - while True: - version, status, reason = self._read_status() - if status != CONTINUE: - break - # skip the header from the 100 response - while True: - skip = self.fp.readline(_MAXLINE + 1) - if len(skip) > _MAXLINE: - raise LineTooLong("header line") - skip = skip.strip() - if not skip: - break - if self.debuglevel > 0: - print >>sys.stderr, "header:", skip - - self.status = status - self.reason = reason.strip() - if version == 'HTTP/1.0': - self.version = 10 - elif version.startswith('HTTP/1.'): - self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1 - elif version == 'HTTP/0.9': - self.version = 9 - else: - raise UnknownProtocol(version) - - if self.version == 9: - self.length = None - self.chunked = 0 - self.will_close = 1 - self.msg = HTTPMessage(StringIO()) - return - - self.msg = HTTPMessage(self.fp, 0) - if self.debuglevel > 0: - for hdr in self.msg.headers: - print >>sys.stderr, "header:", hdr, - - # don't let the msg keep an fp - self.msg.fp = None - - # are we using the chunked-style of transfer encoding? - tr_enc = self.msg.getheader('transfer-encoding') - if tr_enc and tr_enc.lower() == "chunked": - self.chunked = 1 - self.chunk_left = None - else: - self.chunked = 0 - - # will the connection close at the end of the response? - self.will_close = self._check_close() - - # do we have a Content-Length? - # NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked" - length = self.msg.getheader('content-length') - if length and not self.chunked: - try: - self.length = int(length) - except ValueError: - self.length = None - else: - if self.length < 0: # ignore nonsensical negative lengths - self.length = None - else: - self.length = None - - # does the body have a fixed length? (of zero) - if (status == NO_CONTENT or status == NOT_MODIFIED or - 100 <= status < 200 or # 1xx codes - self._method == 'HEAD'): - self.length = 0 - - # if the connection remains open, and we aren't using chunked, and - # a content-length was not provided, then assume that the connection - # WILL close. - if not self.will_close and \ - not self.chunked and \ - self.length is None: - self.will_close = 1 - - def _check_close(self): - conn = self.msg.getheader('connection') - if self.version == 11: - # An HTTP/1.1 proxy is assumed to stay open unless - # explicitly closed. - conn = self.msg.getheader('connection') - if conn and "close" in conn.lower(): - return True - return False - - # Some HTTP/1.0 implementations have support for persistent - # connections, using rules different than HTTP/1.1. - - # For older HTTP, Keep-Alive indicates persistent connection. - if self.msg.getheader('keep-alive'): - return False - - # At least Akamai returns a "Connection: Keep-Alive" header, - # which was supposed to be sent by the client. - if conn and "keep-alive" in conn.lower(): - return False - - # Proxy-Connection is a netscape hack. - pconn = self.msg.getheader('proxy-connection') - if pconn and "keep-alive" in pconn.lower(): - return False - - # otherwise, assume it will close - return True - - def close(self): - if self.fp: - self.fp.close() - self.fp = None - - def isclosed(self): - # NOTE: it is possible that we will not ever call self.close(). This - # case occurs when will_close is TRUE, length is None, and we - # read up to the last byte, but NOT past it. - - # IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be - # called, meaning self.isclosed() is meaningful. - return self.fp is None - - # XXX It would be nice to have readline and __iter__ for this, too. - - def read(self, amt=None): - if self.fp is None: - return '' - - if self._method == 'HEAD': - self.close() - return '' - - if self.chunked: - return self._read_chunked(amt) - - if amt is None: - # unbounded read - if self.length is None: - s = self.fp.read() - else: - try: - s = self._safe_read(self.length) - except IncompleteRead: - self.close() - raise - self.length = 0 - self.close() # we read everything - return s - - if self.length is not None: - if amt > self.length: - # clip the read to the "end of response" - amt = self.length - - # we do not use _safe_read() here because this may be a .will_close - # connection, and the user is reading more bytes than will be provided - # (for example, reading in 1k chunks) - s = self.fp.read(amt) - if not s: - # Ideally, we would raise IncompleteRead if the content-length - # wasn't satisfied, but it might break compatibility. - self.close() - if self.length is not None: - self.length -= len(s) - if not self.length: - self.close() - - return s - - def _read_chunked(self, amt): - assert self.chunked != _UNKNOWN - chunk_left = self.chunk_left - value = [] - - while True: - if chunk_left is None: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("chunk size") - i = line.find(';') - if i >= 0: - line = line[:i] # strip chunk-extensions - try: - chunk_left = int(line, 16) - except ValueError: - # close the connection as protocol synchronisation is - # probably lost - self.close() - raise IncompleteRead("Invalid chunk length at %r" % line) - if chunk_left == 0: - break - if amt is None: - value.append(self._safe_read(chunk_left)) - elif amt < chunk_left: - value.append(self._safe_read(amt)) - self.chunk_left = chunk_left - amt - return ''.join(value) - elif amt == chunk_left: - value.append(self._safe_read(amt)) - self._safe_read(2) # toss the CRLF at the end of the chunk - self.chunk_left = None - return ''.join(value) - else: - value.append(self._safe_read(chunk_left)) - amt -= chunk_left - - # we read the whole chunk, get another - self._safe_read(2) # toss the CRLF at the end of the chunk - chunk_left = None - - # read and discard trailer up to the CRLF terminator - ### note: we shouldn't have any trailers! - while True: - line = self.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("trailer line") - if not line: - # a vanishingly small number of sites EOF without - # sending the trailer - break - if line == '\r\n': - break - - # we read everything; close the "file" - self.close() - - return ''.join(value) - - def _safe_read(self, amt): - """Read the number of bytes requested, compensating for partial reads. - - Normally, we have a blocking socket, but a read() can be interrupted - by a signal (resulting in a partial read). - - Note that we cannot distinguish between EOF and an interrupt when zero - bytes have been read. IncompleteRead() will be raised in this - situation. - - This function should be used when bytes "should" be present for - reading. If the bytes are truly not available (due to EOF), then the - IncompleteRead exception can be used to detect the problem. - """ - # NOTE(gps): As of svn r74426 socket._fileobject.read(x) will never - # return less than x bytes unless EOF is encountered. It now handles - # signal interruptions (socket.error EINTR) internally. This code - # never caught that exception anyways. It seems largely pointless. - # self.fp.read(amt) will work fine. - s = [] - while amt > 0: - chunk = self.fp.read(min(amt, MAXAMOUNT)) - if not chunk: - raise IncompleteRead(''.join(s), amt) - s.append(chunk) - amt -= len(chunk) - return ''.join(s) - - def fileno(self): - return self.fp.fileno() - - def getheader(self, name, default=None): - if self.msg is None: - raise ResponseNotReady("Response msg is None") - return self.msg.getheader(name, default) - - def getheaders(self): - """Return list of (header, value) tuples.""" - if self.msg is None: - raise ResponseNotReady("Response msg is None") - return self.msg.items() - - -class HTTPConnection: - - _http_vsn = 11 - _http_vsn_str = 'HTTP/1.1' - - response_class = HTTPResponse - default_port = HTTP_PORT - auto_open = 1 - debuglevel = 0 - strict = 0 - - def __init__(self, host, port=None, strict=None, - timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None): - self.timeout = timeout - self.source_address = source_address - self.sock = None - self._buffer = [] - self.__response = None - self.__state = _CS_IDLE - self._method = None - self._tunnel_host = None - self._tunnel_port = None - self._tunnel_headers = {} - - self._set_hostport(host, port) - if strict is not None: - self.strict = strict - - def __str__ (self): - return "" % \ - (self.__state, self._method, self.sock, self._buffer, self.__response) - - def set_tunnel(self, host, port=None, headers=None): - """ Sets up the host and the port for the HTTP CONNECT Tunnelling. - - The headers argument should be a mapping of extra HTTP headers - to send with the CONNECT request. - """ - self._tunnel_host = host - self._tunnel_port = port - if headers: - self._tunnel_headers = headers - else: - self._tunnel_headers.clear() - - def _set_hostport(self, host, port): - if port is None: - i = host.rfind(':') - j = host.rfind(']') # ipv6 addresses have [...] - if i > j: - try: - port = int(host[i+1:]) - except ValueError: - if host[i+1:] == "": # http://foo.com:/ == http://foo.com/ - port = self.default_port - else: - raise InvalidURL("nonnumeric port: '%s'" % host[i+1:]) - host = host[:i] - else: - port = self.default_port - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] - self.host = host - self.port = port - - def set_debuglevel(self, level): - self.debuglevel = level - - def _tunnel(self): - self._set_hostport(self._tunnel_host, self._tunnel_port) - self.send("CONNECT %s:%d HTTP/1.0\r\n" % (self.host, self.port)) - for header, value in self._tunnel_headers.iteritems(): - self.send("%s: %s\r\n" % (header, value)) - self.send("\r\n") - response = self.response_class(self.sock, strict = self.strict, - method = self._method) - (version, code, message) = response._read_status() - - if code != 200: - self.close() - raise socket.error("Tunnel connection failed: %d %s" % (code, - message.strip())) - while True: - line = response.fp.readline(_MAXLINE + 1) - if len(line) > _MAXLINE: - raise LineTooLong("header line") - if not line: - # for sites which EOF without sending trailer - break - if line == '\r\n': - break - - - def connect(self): - """Connect to the host and port specified in __init__.""" - self.sock = socket.create_connection((self.host,self.port), - self.timeout, self.source_address) - - if self._tunnel_host: - self._tunnel() - - def close(self): - """Close the connection to the HTTP server.""" - if self.sock: - self.sock.close() # close it manually... there may be other refs - self.sock = None - if self.__response: - self.__response.close() - self.__response = None - self.__state = _CS_IDLE - - def send(self, data): - """Send `data' to the server.""" - if self.sock is None: - if self.auto_open: - self.connect() - else: - raise NotConnected("cannot send when not connected") - - # send the data to the server. if we get a broken pipe, then close - # the socket. we want to reconnect when somebody tries to send again. - - # NOTE: we DO propagate the error, though, because we cannot simply - # ignore the error... the caller will know if they can retry. - if self.debuglevel > 0: - print >>sys.stderr, "send:", repr(data) - try: - blocksize=8192 - if hasattr(data,'read') and not isinstance(data, array): - if self.debuglevel > 0: print >>sys.stderr, "sendIng a read()able" - datablock=data.read(blocksize) - while datablock: - self.sock.sendall(datablock) - datablock=data.read(blocksize) - else: - self.sock.sendall(data) - except socket.error, v: - if v.args[0] == 32: # Broken pipe - self.close() - raise - - def _output(self, s): - """Add a line of output to the current request buffer. - - Assumes that the line does *not* end with \\r\\n. - """ - self._buffer.append(s) - - def _send_output(self, message_body=None): - """Send the currently buffered request and clear the buffer. - - Appends an extra \\r\\n to the buffer. - A message_body may be specified, to be appended to the request. - """ - self._buffer.extend(("", "")) - msg = "\r\n".join(self._buffer) - del self._buffer[:] - # If msg and message_body are sent in a single send() call, - # it will avoid performance problems caused by the interaction - # between delayed ack and the Nagle algorithm. - if isinstance(message_body, str): - msg += message_body - message_body = None - self.send(msg) - if message_body is not None: - #message_body was not a string (i.e. it is a file) and - #we must run the risk of Nagle - self.send(message_body) - - def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): - """Send a request to the server. - - `method' specifies an HTTP request method, e.g. 'GET'. - `url' specifies the object being requested, e.g. '/index.html'. - `skip_host' if True does not add automatically a 'Host:' header - `skip_accept_encoding' if True does not add automatically an - 'Accept-Encoding:' header - """ - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # in certain cases, we cannot issue another request on this connection. - # this occurs when: - # 1) we are in the process of sending a request. (_CS_REQ_STARTED) - # 2) a response to a previous request has signalled that it is going - # to close the connection upon completion. - # 3) the headers for the previous response have not been read, thus - # we cannot determine whether point (2) is true. (_CS_REQ_SENT) - - # if there is no prior response, then we can request at will. - - # if point (2) is true, then we will have passed the socket to the - # response (effectively meaning, "there is no prior response"), and - # will open a new one when a new request is made. - - # Note: if a prior response exists, then we *can* start a new request. - # We are not allowed to begin fetching the response to this new - # request, however, until that prior response is complete. - - if self.__state == _CS_IDLE: - self.__state = _CS_REQ_STARTED - else: - raise CannotSendRequest("cannot send request in state %s" % self.__state) - - # Save the method we use, we need it later in the response phase - self._method = method - if not url: - url = '/' - hdr = '%s %s %s' % (method, url, self._http_vsn_str) - - self._output(hdr) - - if self._http_vsn == 11: - # Issue some standard headers for better HTTP/1.1 compliance - - if not skip_host: - # this header is issued *only* for HTTP/1.1 - # connections. more specifically, this means it is - # only issued when the client uses the new - # HTTPConnection() class. backwards-compat clients - # will be using HTTP/1.0 and those clients may be - # issuing this header themselves. we should NOT issue - # it twice; some web servers (such as Apache) barf - # when they see two Host: headers - - # If we need a non-standard port,include it in the - # header. If the request is going through a proxy, - # but the host of the actual URL, not the host of the - # proxy. - - netloc = '' - if url.startswith('http'): - nil, netloc, nil, nil, nil = urlsplit(url) - - if netloc: - try: - netloc_enc = netloc.encode("ascii") - except UnicodeEncodeError: - netloc_enc = netloc.encode("idna") - self.putheader('Host', netloc_enc) - else: - try: - host_enc = self.host.encode("ascii") - except UnicodeEncodeError: - host_enc = self.host.encode("idna") - # Wrap the IPv6 Host Header with [] (RFC 2732) - if host_enc.find(':') >= 0: - host_enc = "[" + host_enc + "]" - if self.port == self.default_port: - self.putheader('Host', host_enc) - else: - self.putheader('Host', "%s:%s" % (host_enc, self.port)) - - # note: we are assuming that clients will not attempt to set these - # headers since *this* library must deal with the - # consequences. this also means that when the supporting - # libraries are updated to recognize other forms, then this - # code should be changed (removed or updated). - - # we only want a Content-Encoding of "identity" since we don't - # support encodings such as x-gzip or x-deflate. - if not skip_accept_encoding: - self.putheader('Accept-Encoding', 'identity') - - # we can accept "chunked" Transfer-Encodings, but no others - # NOTE: no TE header implies *only* "chunked" - #self.putheader('TE', 'chunked') - - # if TE is supplied in the header, then it must appear in a - # Connection header. - #self.putheader('Connection', 'TE') - - else: - # For HTTP/1.0, the server will assume "not chunked" - pass - - def putheader(self, header, *values): - """Send a request header line to the server. - - For example: h.putheader('Accept', 'text/html') - """ - if self.__state != _CS_REQ_STARTED: - raise CannotSendHeader("cannot send request in state %s" % self.__state) - - hdr = '%s: %s' % (header, '\r\n\t'.join([str(v) for v in values])) - self._output(hdr) - - def endheaders(self, message_body=None): - """Indicate that the last header line has been sent to the server. - - This method sends the request to the server. The optional - message_body argument can be used to pass a message body - associated with the request. The message body will be sent in - the same packet as the message headers if it is a string, otherwise it is - sent as a separate packet. - """ - - if self.__state == _CS_REQ_STARTED: - self.__state = _CS_REQ_SENT - else: - raise CannotSendHeader("cannot send request in state %s" % self.__state) - - self._send_output(message_body) - - def request(self, method, url, body=None, headers={}): - """Send a complete request to the server.""" - - try: - self._send_request(method, url, body, headers) - except socket.error, v: - # trap 'Broken pipe' if we're allowed to automatically reconnect - if v.args[0] != errno.EPIPE or not self.auto_open: - raise - # try one more time - self._send_request(method, url, body, headers) - - def _set_content_length(self, body): - # Set the content-length based on the body. - thelen = None - try: - thelen = str(len(body)) - except TypeError: - # If this is a file-like object, try to - # fstat its file descriptor - try: - thelen = str(os.fstat(body.fileno()).st_size) - except (AttributeError, OSError): - # Don't send a length if this failed - if self.debuglevel > 0: print >>sys.stderr, "Cannot stat!!" - - if thelen is not None: - self.putheader('Content-Length', thelen) - - def _send_request(self, method, url, body, headers): - # Honor explicitly requested Host: and Accept-Encoding: headers. - header_names = dict.fromkeys([k.lower() for k in headers]) - skips = {} - if 'host' in header_names: - skips['skip_host'] = 1 - if 'accept-encoding' in header_names: - skips['skip_accept_encoding'] = 1 - - self.putrequest(method, url, **skips) - - if body is not None and 'content-length' not in header_names: - self._set_content_length(body) - for hdr, value in headers.iteritems(): - self.putheader(hdr, value) - self.endheaders(body) - - def getresponse(self, buffering=False): - "Get the response from the server." - - # if a prior response has been completed, then forget about it. - if self.__response and self.__response.isclosed(): - self.__response = None - - - # if a prior response exists, then it must be completed (otherwise, we - # cannot read this response's header to determine the connection-close - # behavior) - - # note: if a prior response existed, but was connection-close, then the - # socket and response were made independent of this HTTPConnection - # object since a new request requires that we open a whole new - # connection - - # this means the prior response had one of two states: - # 1) will_close: this connection was reset and the prior socket and - # response operate independently - # 2) persistent: the response was retained and we await its - # isclosed() status to become true. - - if self.__state != _CS_REQ_SENT or self.__response: - msg = "State %s, Response %s" % (self.__state, self.__response) - raise ResponseNotReady(msg) - - args = (self.sock,) - kwds = {"strict":self.strict, "method":self._method} - if self.debuglevel > 0: - args += (self.debuglevel,) - if buffering: - #only add this keyword if non-default, for compatibility with - #other response_classes. - kwds["buffering"] = True; - response = self.response_class(*args, **kwds) - - response.begin() - assert response.will_close != _UNKNOWN - self.__state = _CS_IDLE - - if response.will_close: - # this effectively passes the connection to the response - self.close() - else: - # remember this, so we can tell when it is complete - self.__response = response - - return response - - def is_idle (self): - return self.__state == _CS_IDLE - - -class HTTP: - "Compatibility class with httplib.py from 1.5." - - _http_vsn = 10 - _http_vsn_str = 'HTTP/1.0' - - debuglevel = 0 - - _connection_class = HTTPConnection - - def __init__(self, host='', port=None, strict=None): - "Provide a default host, since the superclass requires one." - - # some joker passed 0 explicitly, meaning default port - if port == 0: - port = None - - # Note that we may pass an empty string as the host; this will throw - # an error when we attempt to connect. Presumably, the client code - # will call connect before then, with a proper host. - self._setup(self._connection_class(host, port, strict)) - - def _setup(self, conn): - self._conn = conn - - # set up delegation to flesh out interface - self.send = conn.send - self.putrequest = conn.putrequest - self.putheader = conn.putheader - self.endheaders = conn.endheaders - self.set_debuglevel = conn.set_debuglevel - - conn._http_vsn = self._http_vsn - conn._http_vsn_str = self._http_vsn_str - - self.file = None - - def connect(self, host=None, port=None): - "Accept arguments to set the host/port, since the superclass doesn't." - - if host is not None: - self._conn._set_hostport(host, port) - self._conn.connect() - - def getfile(self): - "Provide a getfile, since the superclass' does not use this concept." - return self.file - - def getreply(self, buffering=False): - """Compat definition since superclass does not define it. - - Returns a tuple consisting of: - - server status code (e.g. '200' if all goes well) - - server "reason" corresponding to status code - - any RFC822 headers in the response from the server - """ - try: - if not buffering: - response = self._conn.getresponse() - else: - #only add this keyword if non-default for compatibility - #with other connection classes - response = self._conn.getresponse(buffering) - except BadStatusLine, e: - ### hmm. if getresponse() ever closes the socket on a bad request, - ### then we are going to have problems with self.sock - - ### should we keep this behavior? do people use it? - # keep the socket open (as a file), and return it - self.file = self._conn.sock.makefile('rb', 0) - - # close our socket -- we want to restart after any protocol error - self.close() - - self.headers = None - return -1, e.line, None - - self.headers = response.msg - self.file = response.fp - return response.status, response.reason, response.msg - - def close(self): - self._conn.close() - - # note that self.file == response.fp, which gets closed by the - # superclass. just clear the object ref here. - ### hmm. messy. if status==-1, then self.file is owned by us. - ### well... we aren't explicitly closing, but losing this ref will - ### do it - self.file = None - -try: - import ssl -except ImportError: - pass -else: - class HTTPSConnection(HTTPConnection): - "This class allows communication via SSL." - - default_port = HTTPS_PORT - - def __init__(self, host, port=None, key_file=None, cert_file=None, - strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, - source_address=None, ca_certs=None): - HTTPConnection.__init__(self, host, port, strict, timeout, - source_address) - self.key_file = key_file - self.cert_file = cert_file - self.ca_certs = ca_certs - if self.ca_certs: - self.cert_reqs = ssl.CERT_REQUIRED - else: - self.cert_reqs = ssl.CERT_NONE - - def connect(self): - "Connect to a host on a given (SSL) port." - - sock = socket.create_connection((self.host, self.port), - self.timeout, self.source_address) - if self._tunnel_host: - self.sock = sock - self._tunnel() - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=self.cert_reqs, - ca_certs=self.ca_certs) - - __all__.append("HTTPSConnection") - - class HTTPS(HTTP): - """Compatibility with 1.5 httplib interface - - Python 1.5.2 did not have an HTTPS class, but it defined an - interface for sending http requests that is also useful for - https. - """ - - _connection_class = HTTPSConnection - - def __init__(self, host='', port=None, key_file=None, cert_file=None, - strict=None): - # provide a default host, pass the X509 cert info - - # urf. compensate for bad input. - if port == 0: - port = None - self._setup(self._connection_class(host, port, key_file, - cert_file, strict)) - - # we never actually use these for anything, but we keep them - # here for compatibility with post-1.5.2 CVS. - self.key_file = key_file - self.cert_file = cert_file - - - def FakeSocket (sock, sslobj): - warnings.warn("FakeSocket is deprecated, and won't be in 3.x. " + - "Use the result of ssl.wrap_socket() directly instead.", - DeprecationWarning, stacklevel=2) - return sslobj - - -class HTTPException(Exception): - # Subclasses that define an __init__ must call Exception.__init__ - # or define self.args. Otherwise, str() will fail. - pass - -class NotConnected(HTTPException): - pass - -class InvalidURL(HTTPException): - pass - -class UnknownProtocol(HTTPException): - def __init__(self, version): - self.args = version, - self.version = version - -class UnknownTransferEncoding(HTTPException): - pass - -class UnimplementedFileMode(HTTPException): - pass - -class IncompleteRead(HTTPException): - def __init__(self, partial, expected=None): - self.args = partial, - self.partial = partial - self.expected = expected - def __repr__(self): - if self.expected is not None: - e = ', %i more expected' % self.expected - else: - e = '' - return 'IncompleteRead(%i bytes read%s)' % (len(self.partial), e) - def __str__(self): - return repr(self) - -class ImproperConnectionState(HTTPException): - pass - -class CannotSendRequest(ImproperConnectionState): - pass - -class CannotSendHeader(ImproperConnectionState): - pass - -class ResponseNotReady(ImproperConnectionState): - pass - -class BadStatusLine(HTTPException): - def __init__(self, line): - if not line: - line = repr(line) - self.args = line, - self.line = line - -class LineTooLong(HTTPException): - def __init__(self, line_type): - HTTPException.__init__(self, "got more than %d bytes when reading %s" - % (_MAXLINE, line_type)) - -# for backwards compatibility -error = HTTPException - -class LineAndFileWrapper: - """A limited file-like object for HTTP/0.9 responses.""" - - # The status-line parsing code calls readline(), which normally - # get the HTTP status line. For a 0.9 response, however, this is - # actually the first line of the body! Clients need to get a - # readable file object that contains that line. - - def __init__(self, line, file): - self._line = line - self._file = file - self._line_consumed = 0 - self._line_offset = 0 - self._line_left = len(line) - - def __getattr__(self, attr): - return getattr(self._file, attr) - - def _done(self): - # called when the last byte is read from the line. After the - # call, all read methods are delegated to the underlying file - # object. - self._line_consumed = 1 - self.read = self._file.read - self.readline = self._file.readline - self.readlines = self._file.readlines - - def read(self, amt=None): - if self._line_consumed: - return self._file.read(amt) - assert self._line_left - if amt is None or amt > self._line_left: - s = self._line[self._line_offset:] - self._done() - if amt is None: - return s + self._file.read() - else: - return s + self._file.read(amt - len(s)) - else: - assert amt <= self._line_left - i = self._line_offset - j = i + amt - s = self._line[i:j] - self._line_offset = j - self._line_left -= amt - if self._line_left == 0: - self._done() - return s - - def readline(self): - if self._line_consumed: - return self._file.readline() - assert self._line_left - s = self._line[self._line_offset:] - self._done() - return s - - def readlines(self, size=None): - if self._line_consumed: - return self._file.readlines(size) - assert self._line_left - L = [self._line[self._line_offset:]] - self._done() - if size is None: - return L + self._file.readlines() - else: - return L + self._file.readlines(size) diff --git a/linkcheck/httputil.py b/linkcheck/httputil.py index 5b09ed5e4..1dc6f7e87 100644 --- a/linkcheck/httputil.py +++ b/linkcheck/httputil.py @@ -1,86 +1,9 @@ # -*- coding: iso-8859-1 -*- # Various HTTP utils with a free license -from cStringIO import StringIO -from . import gzip2 as gzip -from . import httplib2 as httplib -from . import log, LOG_CHECK, fileutil -import re -import zlib -import urllib -import urllib2 +from . import fileutil import base64 -########################################################################### -# urlutils.py - Simplified urllib handling -# -# Written by Chris Lawrence -# (C) 1999-2002 Chris Lawrence -# -# This program is freely distributable per the following license: -# -## Permission to use, copy, modify, and distribute this software and its -## documentation for any purpose and without fee is hereby granted, -## provided that the above copyright notice appears in all copies and that -## both that copyright notice and this permission notice appear in -## supporting documentation. -## -## I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL -## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I -## BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY -## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, -## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS -## SOFTWARE. - -def decode (page): - """Gunzip or deflate a compressed page.""" - log.debug(LOG_CHECK, "page info %d %s", page.code, str(page.info())) - encoding = page.info().get("Content-Encoding") - if encoding in ('gzip', 'x-gzip', 'deflate'): - # cannot seek in socket descriptors, so must get content now - content = page.read() - try: - if encoding == 'deflate': - fp = StringIO(zlib.decompress(content)) - else: - fp = gzip.GzipFile('', 'rb', 9, StringIO(content)) - except zlib.error as msg: - log.debug(LOG_CHECK, "uncompressing had error " - "%s, assuming non-compressed content", str(msg)) - fp = StringIO(content) - # remove content-encoding header - headers = httplib.HTTPMessage(StringIO("")) - ceheader = re.compile(r"(?i)content-encoding:") - for h in page.info().keys(): - if not ceheader.match(h): - headers[h] = page.info()[h] - newpage = urllib.addinfourl(fp, headers, page.geturl()) - newpage.code = page.code - newpage.msg = page.msg - return newpage - return page - - -class HttpWithGzipHandler (urllib2.HTTPHandler): - """Support gzip encoding.""" - def http_open (self, req): - """Send request and decode answer.""" - return decode(urllib2.HTTPHandler.http_open(self, req)) - - -if hasattr(httplib, 'HTTPS'): - class HttpsWithGzipHandler (urllib2.HTTPSHandler): - """Support gzip encoding.""" - - def https_open (self, req): - """Send request and decode answer.""" - return decode(urllib2.HTTPSHandler.https_open(self, req)) - -# end of urlutils.py routines -########################################################################### - - def encode_multipart_formdata(fields, files=None): """ From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 diff --git a/linkcheck/i18n.py b/linkcheck/i18n.py index c237f7ddd..65523a60c 100644 --- a/linkcheck/i18n.py +++ b/linkcheck/i18n.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/lc_cgi.py b/linkcheck/lc_cgi.py index e916ea0d3..9e28d5a47 100644 --- a/linkcheck/lc_cgi.py +++ b/linkcheck/lc_cgi.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -172,7 +172,7 @@ def get_configuration(form, out): config["logger"] = config.logger_new('html', fd=out, encoding=HTML_ENCODING) config["threads"] = 2 if "anchors" in form: - config["anchors"] = True + config["enabledplugins"].append("AnchorCheck") if "errors" not in form: config["verbose"] = True # avoid checking of local files or other nasty stuff @@ -246,15 +246,16 @@ def format_error (why): @return: HTML page content @rtype: unicode """ - return _(""" + return _(""" + LinkChecker Online Error
-Error: %s
+Error: %s
The LinkChecker Online script has encountered an error. Please ensure that your provided URL link begins with http:// and -contains only these characters: A-Za-z0-9./_~-

+contains only these characters: A-Za-z0-9./_~-

Errors are logged.
diff --git a/linkcheck/loader.py b/linkcheck/loader.py index 2b8cf66ae..357c85a0e 100644 --- a/linkcheck/loader.py +++ b/linkcheck/loader.py @@ -4,7 +4,7 @@ Functions to load plugin modules. Example usage: - modules = loader.get_modules('plugins') + modules = loader.get_package_modules('plugins') plugins = loader.get_plugins(modules, PluginClass) """ from __future__ import print_function @@ -12,6 +12,8 @@ import sys import zipfile import importlib +import imp +from .fileutil import is_writable_by_others def is_frozen (): @@ -20,10 +22,10 @@ def is_frozen (): return hasattr(sys, "frozen") -def get_modules(folder): - """Find all valid modules in the given folder which must be in - in the same directory as this loader.py module. A valid module - has a .py extension, and is importable. +def get_package_modules(packagename): + """Find all valid modules in the given package which must be a folder + in the same directory as this loader.py module. A valid module has + a .py extension, and is importable. @return: all loaded valid modules @rtype: iterator of module """ @@ -32,22 +34,36 @@ def get_modules(folder): zipname = os.path.dirname(os.path.dirname(__file__)) parentmodule = os.path.basename(os.path.dirname(__file__)) with zipfile.ZipFile(zipname, 'r') as f: - prefix = "%s/%s/" % (parentmodule, folder) + prefix = "%s/%s/" % (parentmodule, packagename) modnames = [os.path.splitext(n[len(prefix):])[0] for n in f.namelist() if n.startswith(prefix) and "__init__" not in n] else: - dirname = os.path.join(os.path.dirname(__file__), folder) - modnames = get_importable_modules(dirname) + dirname = os.path.join(os.path.dirname(__file__), packagename) + modnames = [x[:-3] for x in get_importable_files(dirname)] for modname in modnames: try: - name ="..%s.%s" % (folder, modname) + name ="..%s.%s" % (packagename, modname) yield importlib.import_module(name, __name__) except ImportError as msg: - print("ERROR: could not load module %s: %s" % (modname, msg)) + print("WARN: could not load module %s: %s" % (modname, msg)) -def get_importable_modules(folder): +def get_folder_modules(folder, parentpackage): + """.""" + if is_writable_by_others(folder): + print("ERROR: refuse to load modules from world writable folder %r" % folder) + return + for filename in get_importable_files(folder): + fullname = os.path.join(folder, filename) + modname = parentpackage+"."+filename[:-3] + try: + yield imp.load_source(modname, fullname) + except ImportError as msg: + print("WARN: could not load file %s: %s" % (fullname, msg)) + + +def get_importable_files(folder): """Find all module files in the given folder that end with '.py' and don't start with an underscore. @return module names @@ -55,22 +71,26 @@ def get_importable_modules(folder): """ for fname in os.listdir(folder): if fname.endswith('.py') and not fname.startswith('_'): - yield fname[:-3] + fullname = os.path.join(folder, fname) + if is_writable_by_others(fullname): + print("ERROR: refuse to load module from world writable file %r" % fullname) + else: + yield fname -def get_plugins(modules, classobj): - """Find all class objects in all modules. +def get_plugins(modules, classes): + """Find all given (sub-)classes in all modules. @param modules: the modules to search @ptype modules: iterator of modules @return: found classes @rytpe: iterator of class objects """ for module in modules: - for plugin in get_module_plugins(module, classobj): + for plugin in get_module_plugins(module, classes): yield plugin -def get_module_plugins(module, classobj): +def get_module_plugins(module, classes): """Return all subclasses of a class in the module. If the module defines __all__, only those entries will be searched, otherwise all objects not starting with '_' will be searched. @@ -85,7 +105,8 @@ def get_module_plugins(module, classobj): except AttributeError: continue try: - if issubclass(obj, classobj): - yield obj + for classobj in classes: + if issubclass(obj, classobj): + yield obj except TypeError: continue diff --git a/linkcheck/lock.py b/linkcheck/lock.py index 988faefe3..52921a9f1 100644 --- a/linkcheck/lock.py +++ b/linkcheck/lock.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2005-2012 Bastian Kleineidam +# Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/logger/__init__.py b/linkcheck/logger/__init__.py index 982e49de7..15e1e34e9 100644 --- a/linkcheck/logger/__init__.py +++ b/linkcheck/logger/__init__.py @@ -61,7 +61,6 @@ class LogStatistics (object): """Gather log statistics: - number of errors, warnings and valid links - type of contents (image, video, audio, text, ...) - - number of different domains - URL lengths """ @@ -83,8 +82,6 @@ def reset (self): self.warnings_printed = 0 # number of internal errors self.internal_errors = 0 - # the set of checked domains - self.domains = set() # link types self.link_types = ContentTypes.copy() # URL length statistics @@ -92,10 +89,6 @@ def reset (self): self.min_url_length = 0 self.avg_url_length = 0.0 self.avg_number = 0 - # download stats - self.downloaded_bytes = None - # cache stats - self.robots_txt_stats = None def log_url (self, url_data, do_print): """Log URL statistics.""" @@ -108,7 +101,6 @@ def log_url (self, url_data, do_print): self.warnings += num_warnings if do_print: self.warnings_printed += num_warnings - self.domains.add(url_data.domain) if url_data.content_type: key = url_data.content_type.split('/', 1)[0].lower() if key not in self.link_types: @@ -442,11 +434,6 @@ def log_internal_error (self): log.warn(LOG_CHECK, "internal error occurred") self.stats.log_internal_error() - def add_statistics(self, robots_txt_stats, download_stats): - """Add cache and download statistics.""" - self.stats.robots_txt_stats = robots_txt_stats - self.stats.downloaded_bytes = download_stats - def format_modified(self, modified, sep=" "): """Format modification date in UTC if it's not None. @param modified: modification date in UTC @@ -461,8 +448,8 @@ def format_modified(self, modified, sep=" "): def _get_loggers(): """Return list of Logger classes.""" from .. import loader - modules = loader.get_modules('logger') - return list(loader.get_plugins(modules, _Logger)) + modules = loader.get_package_modules('logger') + return list(loader.get_plugins(modules, [_Logger])) LoggerClasses = _get_loggers() diff --git a/linkcheck/logger/csvlog.py b/linkcheck/logger/csvlog.py index 39a58f51a..d0986b761 100644 --- a/linkcheck/logger/csvlog.py +++ b/linkcheck/logger/csvlog.py @@ -26,7 +26,7 @@ Columns = ( u"urlname", u"parentname", u"baseref", u"result", u"warningstring", u"infostring", u"valid", u"url", u"line", u"column", u"name", - u"dltime", u"dlsize", u"checktime", u"cached", u"level", u"modified", + u"dltime", u"size", u"checktime", u"cached", u"level", u"modified", ) @@ -117,7 +117,7 @@ def log_url (self, url_data): if self.has_part("dltime"): row.append(url_data.dltime) if self.has_part("dlsize"): - row.append(url_data.dlsize) + row.append(url_data.size) if self.has_part("checktime"): row.append(url_data.checktime) if self.has_part("cached"): diff --git a/linkcheck/logger/customxml.py b/linkcheck/logger/customxml.py index 41f512105..106027236 100644 --- a/linkcheck/logger/customxml.py +++ b/linkcheck/logger/customxml.py @@ -66,8 +66,8 @@ def log_url (self, url_data): self.xml_tag(u"extern", u"%d" % (1 if url_data.extern else 0)) if url_data.dltime >= 0 and self.has_part("dltime"): self.xml_tag(u"dltime", u"%f" % url_data.dltime) - if url_data.dlsize >= 0 and self.has_part("dlsize"): - self.xml_tag(u"dlsize", u"%d" % url_data.dlsize) + if url_data.size >= 0 and self.has_part("dlsize"): + self.xml_tag(u"dlsize", u"%d" % url_data.size) if url_data.checktime and self.has_part("checktime"): self.xml_tag(u"checktime", u"%f" % url_data.checktime) if self.has_part("level"): diff --git a/linkcheck/logger/dot.py b/linkcheck/logger/dot.py index fc39c9b4c..777846118 100644 --- a/linkcheck/logger/dot.py +++ b/linkcheck/logger/dot.py @@ -59,8 +59,8 @@ def log_url (self, url_data): self.writeln(u' href="%s",' % dotquote(node["url"])) if node["dltime"] >= 0 and self.has_part("dltime"): self.writeln(u" dltime=%d," % node["dltime"]) - if node["dlsize"] >= 0 and self.has_part("dlsize"): - self.writeln(u" dlsize=%d," % node["dlsize"]) + if node["size"] >= 0 and self.has_part("dlsize"): + self.writeln(u" size=%d," % node["size"]) if node["checktime"] and self.has_part("checktime"): self.writeln(u" checktime=%d," % node["checktime"]) if self.has_part("extern"): diff --git a/linkcheck/logger/gml.py b/linkcheck/logger/gml.py index 229141883..1b5c9e469 100644 --- a/linkcheck/logger/gml.py +++ b/linkcheck/logger/gml.py @@ -55,8 +55,8 @@ def log_url (self, url_data): self.writeln(u' url "%s"' % node["url"]) if node["dltime"] >= 0 and self.has_part("dltime"): self.writeln(u" dltime %d" % node["dltime"]) - if node["dlsize"] >= 0 and self.has_part("dlsize"): - self.writeln(u" dlsize %d" % node["dlsize"]) + if node["size"] >= 0 and self.has_part("dlsize"): + self.writeln(u" size %d" % node["size"]) if node["checktime"] and self.has_part("checktime"): self.writeln(u" checktime %d" % node["checktime"]) if self.has_part("extern"): diff --git a/linkcheck/logger/graph.py b/linkcheck/logger/graph.py index 388cd0360..f5aed8576 100644 --- a/linkcheck/logger/graph.py +++ b/linkcheck/logger/graph.py @@ -55,7 +55,7 @@ def get_node (self, url_data): "label": quote(url_data.title if url_data.title else url_data.name), "extern": 1 if url_data.extern else 0, "checktime": url_data.checktime, - "dlsize": url_data.dlsize, + "size": url_data.size, "dltime": url_data.dltime, "edge": quote(url_data.name), "valid": 1 if url_data.valid else 0, diff --git a/linkcheck/logger/gxml.py b/linkcheck/logger/gxml.py index f7efe87a6..04e7cb725 100644 --- a/linkcheck/logger/gxml.py +++ b/linkcheck/logger/gxml.py @@ -58,8 +58,8 @@ def log_url (self, url_data): self.xml_starttag(u"data") if node["dltime"] >= 0 and self.has_part("dltime"): self.xml_tag(u"dltime", u"%f" % node["dltime"]) - if node["dlsize"] >= 0 and self.has_part("dlsize"): - self.xml_tag(u"dlsize", u"%d" % node["dlsize"]) + if node["size"] >= 0 and self.has_part("dlsize"): + self.xml_tag(u"size", u"%d" % node["size"]) if node["checktime"] and self.has_part("checktime"): self.xml_tag(u"checktime", u"%f" % node["checktime"]) if self.has_part("extern"): diff --git a/linkcheck/logger/html.py b/linkcheck/logger/html.py index 4414792f9..93300218b 100644 --- a/linkcheck/logger/html.py +++ b/linkcheck/logger/html.py @@ -30,23 +30,31 @@ validate_css = "http://jigsaw.w3.org/css-validator/validator?" \ "uri=%(uri)s&warning=1&profile=css2&usermedium=all" -HTML_HEADER = """ +HTML_HEADER = """ - + %(title)s - + """ @@ -99,16 +107,20 @@ def start_output (self): "link": self.colorlink, "vlink": self.colorlink, "alink": self.colorlink, + "url": self.colorurl, + "error": self.colorerror, + "valid": self.colorok, + "warning": self.colorwarning, } self.write(HTML_HEADER % header) self.comment("Generated by %s" % configuration.App) if self.has_part('intro'): - self.write(u"

"+configuration.App+ - "


"+ - configuration.Freeware+"

"+ + self.write(u"

"+configuration.App+ + "


"+ + configuration.Freeware+"

"+ (_("Start checking at %s") % strformat.strtime(self.starttime))+ - os.linesep+"
") + os.linesep+"
") self.check_date() self.flush() @@ -127,8 +139,8 @@ def log_url (self, url_data): self.write_real(url_data) if url_data.dltime >= 0 and self.has_part("dltime"): self.write_dltime(url_data) - if url_data.dlsize >= 0 and self.has_part("dlsize"): - self.write_dlsize(url_data) + if url_data.size >= 0 and self.has_part("dlsize"): + self.write_size(url_data) if url_data.checktime and self.has_part("checktime"): self.write_checktime(url_data) if url_data.info and self.has_part("info"): @@ -144,20 +156,11 @@ def log_url (self, url_data): def write_table_start (self): """Start html table.""" - self.writeln(u'

') - self.writeln(u'' % self.colorborder) - self.writeln(u"") - self.writeln(u"
") - self.writeln(u'' % - (cgi.escape(_("checked link")), self.colorbackground)) + self.writeln(u'

') def write_table_end (self): """End html table.""" - self.write(u'

') + self.write(u'
') def write_id (self): """Write ID for current URL.""" @@ -168,9 +171,8 @@ def write_id (self): def write_url (self, url_data): """Write url_data.base_url.""" self.writeln(u"") - self.writeln(u'%s' % - (self.colorurl, self.part("url"))) - self.write(u'' % self.colorurl) + self.writeln(u'%s' % self.part("url")) + self.write(u'') self.write(u"`%s'" % cgi.escape(url_data.base_url)) self.writeln(u"") @@ -213,10 +215,10 @@ def write_dltime (self, url_data): (_("%.3f seconds") % url_data.dltime)+ u"") - def write_dlsize (self, url_data): - """Write url_data.dlsize.""" + def write_size (self, url_data): + """Write url_data.size.""" self.writeln(u""+self.part("dlsize")+u""+ - strformat.strsize(url_data.dlsize)+ + strformat.strsize(url_data.size)+ u"") def write_checktime (self, url_data): @@ -226,7 +228,7 @@ def write_checktime (self, url_data): def write_info (self, url_data): """Write url_data.info.""" - sep = u"
"+os.linesep + sep = u"
"+os.linesep text = sep.join(cgi.escape(x) for x in url_data.info) self.writeln(u'' + self.part("info")+ u""+text+u"") @@ -239,24 +241,23 @@ def write_modified(self, url_data): def write_warning (self, url_data): """Write url_data.warnings.""" - sep = u"
"+os.linesep + sep = u"
"+os.linesep text = sep.join(cgi.escape(x[1]) for x in url_data.warnings) - self.writeln(u'' + self.part("warning") + - u'' + - text + u"") + u'' + text + u"") def write_result (self, url_data): """Write url_data.result.""" if url_data.valid: - self.write(u'' % self.colorok) + self.write(u'') self.write(self.part("result")) - self.write(u'' % self.colorok) + self.write(u'') self.write(cgi.escape(_("Valid"))) else: - self.write(u'' % self.colorerror) + self.write(u'') self.write(self.part("result")) - self.write(u'' % self.colorerror) + self.write(u'') self.write(cgi.escape(_("Error"))) if url_data.result: self.write(u": "+cgi.escape(url_data.result)) @@ -264,27 +265,24 @@ def write_result (self, url_data): def write_stats (self): """Write check statistic infos.""" - self.writeln(u'
%s
' % _("Statistics")) - if len(self.stats.domains) > 1: - self.writeln(_("Number of domains: %d") % len(self.stats.domains)) - self.writeln(u"
") + self.writeln(u'
%s
' % _("Statistics")) if self.stats.number > 0: self.writeln(_( "Content types: %(image)d image, %(text)d text, %(video)d video, " "%(audio)d audio, %(application)d application, %(mail)d mail" " and %(other)d other.") % self.stats.link_types) - self.writeln(u"
") + self.writeln(u"
") self.writeln(_("URL lengths: min=%(min)d, max=%(max)d, avg=%(avg)d.") % dict(min=self.stats.min_url_length, max=self.stats.max_url_length, avg=self.stats.avg_url_length)) else: self.writeln(_("No statistics available since no URLs were checked.")) - self.writeln(u"
") + self.writeln(u"
") def write_outro (self): """Write end of check message.""" - self.writeln(u"
") + self.writeln(u"
") self.write(_("That's it.")+" ") if self.stats.number >= 0: self.write(_n("%d link checked.", "%d links checked.", @@ -302,25 +300,25 @@ def write_outro (self): self.write(_(" (%d duplicates not printed)") % (self.stats.errors - self.stats.errors_printed)) self.writeln(u".") - self.writeln(u"
") + self.writeln(u"
") num = self.stats.internal_errors if num: self.write(_n("There was %(num)d internal error.", "There were %(num)d internal errors.", num) % {"num": num}) - self.writeln(u"
") + self.writeln(u"
") self.stoptime = time.time() duration = self.stoptime - self.starttime self.writeln(_("Stopped checking at %(time)s (%(duration)s)") % {"time": strformat.strtime(self.stoptime), "duration": strformat.strduration_long(duration)}) - self.writeln(u'


'+ - configuration.HtmlAppInfo+u"
") + self.writeln(u'


'+ + configuration.HtmlAppInfo+u"
") self.writeln(_("Get the newest version at %s") % (u''+ - configuration.Url+u".
")) + configuration.Url+u".
")) self.writeln(_("Write comments and bugs to %s") % (u''+ - configuration.SupportUrl+u".
")) + configuration.SupportUrl+u".
")) self.writeln(_("Support this project at %s") % (u''+ configuration.DonateUrl+u".")) diff --git a/linkcheck/logger/sql.py b/linkcheck/logger/sql.py index 3072c1698..0a8c9c6a3 100644 --- a/linkcheck/logger/sql.py +++ b/linkcheck/logger/sql.py @@ -90,7 +90,7 @@ def log_url (self, url_data): """ self.writeln(u"insert into %(table)s(urlname," "parentname,baseref,valid,result,warning,info,url,line,col," - "name,checktime,dltime,dlsize,cached,level) values (" + "name,checktime,dltime,size,cached,level) values (" "%(base_url)s," "%(url_parent)s," "%(base_ref)s," @@ -104,7 +104,7 @@ def log_url (self, url_data): "%(name)s," "%(checktime)d," "%(dltime)d," - "%(dlsize)d," + "%(size)d," "%(cached)d," "%(level)d," "%(modified)s" @@ -123,7 +123,7 @@ def log_url (self, url_data): 'name': sqlify(url_data.name), 'checktime': url_data.checktime, 'dltime': url_data.dltime, - 'dlsize': url_data.dlsize, + 'size': url_data.size, 'cached': 0, 'separator': self.separator, "level": url_data.level, diff --git a/linkcheck/logger/text.py b/linkcheck/logger/text.py index 690deae44..c9a5f4a68 100644 --- a/linkcheck/logger/text.py +++ b/linkcheck/logger/text.py @@ -123,8 +123,8 @@ def log_url (self, url_data): self.write_checktime(url_data) if url_data.dltime >= 0 and self.has_part('dltime'): self.write_dltime(url_data) - if url_data.dlsize >= 0 and self.has_part('dlsize'): - self.write_dlsize(url_data) + if url_data.size >= 0 and self.has_part('dlsize'): + self.write_size(url_data) if url_data.info and self.has_part('info'): self.write_info(url_data) if url_data.modified and self.has_part('modified'): @@ -176,10 +176,10 @@ def write_dltime (self, url_data): self.writeln(_("%.3f seconds") % url_data.dltime, color=self.colordltime) - def write_dlsize (self, url_data): - """Write url_data.dlsize.""" + def write_size (self, url_data): + """Write url_data.size.""" self.write(self.part("dlsize") + self.spaces("dlsize")) - self.writeln(strformat.strsize(url_data.dlsize), + self.writeln(strformat.strsize(url_data.size), color=self.colordlsize) def write_checktime (self, url_data): @@ -260,12 +260,6 @@ def write_stats (self): """Write check statistic info.""" self.writeln() self.writeln(_("Statistics:")) - if self.stats.downloaded_bytes > 0: - self.writeln(_("Downloaded: %s") % strformat.strsize(self.stats.downloaded_bytes)) - hitsmisses = strformat.str_cache_stats(*self.stats.robots_txt_stats) - self.writeln(_("Robots.txt cache: %s") % hitsmisses) - if len(self.stats.domains) > 1: - self.writeln(_("Number of domains: %d") % len(self.stats.domains)) if self.stats.number > 0: self.writeln(_( "Content types: %(image)d image, %(text)d text, %(video)d video, " diff --git a/linkcheck/memoryutil.py b/linkcheck/memoryutil.py index 513b85765..5dab9656e 100644 --- a/linkcheck/memoryutil.py +++ b/linkcheck/memoryutil.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2012 Bastian Kleineidam +# Copyright (C) 2012-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/network/_network.c b/linkcheck/network/_network.c index ec169a4c2..94f3d8176 100644 --- a/linkcheck/network/_network.c +++ b/linkcheck/network/_network.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000-2010 Bastian Kleineidam +/* Copyright (C) 2000-2014 Bastian Kleineidam This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/linkcheck/network/iputil.py b/linkcheck/network/iputil.py index 3ab4c9e59..2d99462ae 100644 --- a/linkcheck/network/iputil.py +++ b/linkcheck/network/iputil.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2003-2012 Bastian Kleineidam +# Copyright (C) 2003-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -233,7 +233,7 @@ def hosts2map (hosts): elif is_valid_ip(host): hostset.add(expand_ip(host)[0]) else: - hostset |= resolve_host(host) + hostset |= set(resolve_host(host)) return (hostset, nets) @@ -264,29 +264,32 @@ def lookup_ips (ips): def resolve_host (host): """ @host: hostname or IP address - Return set of ip numbers for given host. + Return list of ip numbers for given host. """ - ips = set() + ips = [] try: for res in socket.getaddrinfo(host, None, 0, socket.SOCK_STREAM): # res is a tuple (address family, socket type, protocol, # canonical name, socket address) # add first ip of socket address - ips.add(res[4][0]) + ips.append(res[4][0]) except socket.error: log.info(LOG_CHECK, "Ignored invalid host %r", host) return ips -def obfuscate_ip (ip): +def obfuscate_ip(ip): """Obfuscate given host in IP form. @ip: IPv4 address string @return: hexadecimal IP string ('0x1ab...') @raise: ValueError on invalid IP addresses """ - if not is_valid_ipv4(ip): - raise ValueError('Invalid IPv4 value %r' % ip) - return "0x%s" % "".join(hex(int(x))[2:] for x in ip.split(".")) + if is_valid_ipv4(ip): + res = "0x%s" % "".join(hex(int(x))[2:] for x in ip.split(".")) + else: + raise ValueError('Invalid IP value %r' % ip) + assert is_obfuscated_ip(res), '%r obfuscation error' % res + return res is_obfuscated_ip = re.compile(r"^(0x[a-f0-9]+|[0-9]+)$").match diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py new file mode 100644 index 000000000..6bf9566f4 --- /dev/null +++ b/linkcheck/parser/__init__.py @@ -0,0 +1,186 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Main functions for link parsing +""" +from ..checker import get_url_from +from .. import winutil, fileutil, log, LOG_CHECK, strformat +from ..htmlutil import linkparse +from ..HtmlParser import htmlsax +from ..bookmarks import firefox + +ContentMimetypes = { + "text/html": "html", + "application/xhtml+xml": "html", + # Include PHP file which helps when checking local .php files. + # It does not harm other URL schemes like HTTP since HTTP servers + # should not send this content type. They send text/html instead. + "application/x-httpd-php": "html", + "text/css": "css", + "application/x-shockwave-flash": "swf", + "application/msword": "word", + "text/plain+linkchecker": "text", + "text/plain+opera": "opera", + "text/plain+chromium": "chromium", + "application/x-plist+safari": "safari", + "text/vnd.wap.wml": "wml", + } + +def parse_url(url_data): + """Parse a URL.""" + if url_data.is_directory(): + # both ftp and file links present directories as HTML data + return parse_html(url_data) + if url_data.is_file() and firefox.has_sqlite and firefox.extension.search(url_data.url): + return parse_firefox(url_data) + # determine parse routine according to content types + mime = url_data.get_content_type() + key = ContentMimetypes[mime] + return globals()["parse_"+key](url_data) + + +def parse_html (url_data): + """Parse into HTML content and search for URLs to check. + Found URLs are added to the URL queue. + """ + find_links(url_data, url_data.add_url) + + +def parse_opera (url_data): + """Parse an opera bookmark file.""" + from .bookmarks.opera import parse_bookmark_data + for url, name, lineno in parse_bookmark_data(url_data.get_content()): + url_data.add_url(url, line=lineno, name=name) + +def parse_chromium (url_data): + """Parse a Chromium or Google Chrome bookmark file.""" + from .bookmarks.chromium import parse_bookmark_data + for url, name in parse_bookmark_data(url_data.get_content()): + url_data.add_url(url, name=name) + +def parse_safari (url_data): + """Parse a Safari bookmark file.""" + from ..bookmarks.safari import parse_bookmark_data + for url, name in parse_bookmark_data(url_data.get_content()): + url_data.add_url(url, name=name) + +def parse_text (url_data): + """Parse a text file with one url per line; comment and blank + lines are ignored.""" + lineno = 0 + for line in url_data.get_content().splitlines(): + lineno += 1 + line = line.strip() + if not line or line.startswith('#'): + continue + url_data.add_url(line, line=lineno) + + +def parse_css (url_data): + """ + Parse a CSS file for url() patterns. + """ + lineno = 0 + linkfinder = linkparse.css_url_re.finditer + strip_comments = linkparse.strip_c_comments + for line in strip_comments(url_data.get_content()).splitlines(): + lineno += 1 + for mo in linkfinder(line): + column = mo.start("url") + url = strformat.unquote(mo.group("url").strip()) + url_data.add_url(url, line=lineno, column=column) + +def parse_swf (url_data): + """Parse a SWF file for URLs.""" + linkfinder = linkparse.swf_url_re.finditer + for mo in linkfinder(url_data.get_content()): + url = mo.group() + url_data.add_url(url) + +def parse_word (url_data): + """Parse a word file for hyperlinks.""" + if not winutil.has_word(): + return + filename = get_temp_filename() + # open word file and parse hyperlinks + try: + app = winutil.get_word_app() + try: + doc = winutil.open_wordfile(app, filename) + if doc is None: + raise winutil.Error("could not open word file %r" % filename) + try: + for link in doc.Hyperlinks: + url_data.add_url(link.Address, name=link.TextToDisplay) + finally: + winutil.close_wordfile(doc) + finally: + winutil.close_word_app(app) + except winutil.Error as msg: + log.warn(LOG_CHECK, "Error parsing word file: %s", msg) + +def parse_wml (url_data): + """Parse into WML content and search for URLs to check. + Found URLs are added to the URL queue. + """ + find_links(url_data, url_data.add_url, tags=linkparse.WmlTags) + + +def get_temp_filename (content): + """Get temporary filename for content to parse.""" + # store content in temporary file + fd, filename = fileutil.get_temp_file(mode='wb', suffix='.doc', + prefix='lc_') + try: + fd.write(content) + finally: + fd.close() + return filename + + +def find_links (url_data, callback, tags=None): + """Parse into content and search for URLs to check. + Found URLs are added to the URL queue. + """ + # construct parser object + handler = linkparse.LinkFinder(callback, tags=tags) + parser = htmlsax.parser(handler) + if url_data.charset: + parser.encoding = url_data.charset + handler.parser = parser + # parse + try: + parser.feed(url_data.get_content()) + parser.flush() + except linkparse.StopParse as msg: + log.debug(LOG_CHECK, "Stopped parsing: %s", msg) + pass + # break cyclic dependencies + handler.parser = None + parser.handler = None + + +def parse_firefox (url_data): + """Parse a Firefox3 bookmark file.""" + filename = url_data.get_os_filename() + for url, name in firefox.parse_bookmark_file(filename): + # XXX use add_url + url_data = get_url_from(url, url_data.recursion_level+1, + url_data.aggregate, parent_url=url_data.url, name=name) + url_data.aggregate.urlqueue.put(url_data) + + diff --git a/linkcheck/plugins/__init__.py b/linkcheck/plugins/__init__.py new file mode 100644 index 000000000..51868e7f2 --- /dev/null +++ b/linkcheck/plugins/__init__.py @@ -0,0 +1,103 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Module for plugin management. +""" +from .. import loader, log, LOG_PLUGIN + + +class _PluginBase(object): + """Basic plugin class featuring plugin identification and + helper functions.""" + + def __init__(self, config): + """Add plugin-specific configuration.""" + pass + + def check(self, url_data): + """Common check method run for all plugins.""" + pass + + @classmethod + def read_config(cls, configparser): + """Read configuration file options.""" + pass + + +class _ConnectionPlugin(_PluginBase): + """Plugins run after connection checks.""" + pass + + +class _ContentPlugin(_PluginBase): + """Plugins run for valid URLs with content.""" + pass + + +def get_plugin_modules(folders, package='plugins', + parentpackage='linkcheck.dummy'): + """Get plugin modules for given folders.""" + for folder in folders: + for module in loader.get_folder_modules(folder, parentpackage): + yield module + for module in loader.get_package_modules(package): + yield module + + +def get_plugin_classes(modules): + """Get plugin classes for given modules.""" + classes = (_ConnectionPlugin, _ContentPlugin) + return loader.get_plugins(modules, classes) + + +class PluginManager(object): + """Manage all connection and content plugins.""" + + def __init__(self, config): + """Load enabled plugins.""" + self.connection_plugins = [] + self.content_plugins = [] + folders = config["pluginfolders"] + modules = get_plugin_modules(folders) + self.load_modules(modules, config) + + def load_modules(self, modules, config): + """Load plugin modules.""" + for pluginclass in get_plugin_classes(modules): + name = pluginclass.__name__ + if name in config["enabledplugins"]: + if issubclass(pluginclass, _ConnectionPlugin): + log.debug(LOG_PLUGIN, "Enable connection plugin %s", name) + self.connection_plugins.append(pluginclass(config[name])) + else: + log.debug(LOG_PLUGIN, "Enable content plugin %s", name) + self.content_plugins.append(pluginclass(config[name])) + + def run_connection_plugins(self, url_data): + """Run all connection plugins.""" + run_plugins(self.connection_plugins, url_data) + + def run_content_plugins(self, url_data): + """Run all content plugins.""" + run_plugins(self.content_plugins, url_data) + + +def run_plugins(plugins, url_data): + """Run the check(url_data) method of given plugins.""" + for plugin in plugins: + log.debug(LOG_PLUGIN, "Run plugin %s", plugin.__class__.__name__) + plugin.check(url_data) diff --git a/linkcheck/plugins/anchorcheck.py b/linkcheck/plugins/anchorcheck.py new file mode 100644 index 000000000..0e9ac39b6 --- /dev/null +++ b/linkcheck/plugins/anchorcheck.py @@ -0,0 +1,63 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Check page content for virus infection with clamav. +""" +from . import _ContentPlugin +from .. import log, LOG_PLUGIN, url as urlutil +from ..htmlutil import linkparse +from ..parser import find_links + + +class AnchorCheck(_ContentPlugin): + """Checks validity of HTML anchors.""" + + def check(self, url_data): + """Check content for invalid anchors.""" + if not url_data.is_html(): + # not an HTML page + return + if not url_data.anchor: + # no anchor + return + log.debug(LOG_PLUGIN, "checking content for invalid anchors") + # list of parsed anchors + self.anchors = [] + find_links(url_data, self.add_anchor, tags=linkparse.AnchorTags) + self.check_anchor(url_data) + + def add_anchor (self, url, line, column, name, base): + """Add anchor URL.""" + self.anchors.append((url, line, column, name, base)) + + def check_anchor(self, url_data): + """If URL is valid, parseable and has an anchor, check it. + A warning is logged and True is returned if the anchor is not found. + """ + log.debug(LOG_PLUGIN, "checking anchor %r in %s", url_data.anchor, self.anchors) + enc = lambda anchor: urlutil.url_quote_part(anchor, encoding=url_data.encoding) + if any(x for x in self.anchors if enc(x[0]) == url_data.anchor): + return + if self.anchors: + anchornames = sorted(set(u"`%s'" % x[0] for x in self.anchors)) + anchors = u", ".join(anchornames) + else: + anchors = u"-" + args = {"name": url_data.anchor, "anchors": anchors} + msg = u"%s %s" % (_("Anchor `%(name)s' not found.") % args, + _("Available anchors: %(anchors)s.") % args) + url_data.add_warning(msg) diff --git a/linkcheck/geoip.py b/linkcheck/plugins/locationinfo.py similarity index 74% rename from linkcheck/geoip.py rename to linkcheck/plugins/locationinfo.py index 13d36ac21..2e95fe959 100644 --- a/linkcheck/geoip.py +++ b/linkcheck/plugins/locationinfo.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2011 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -17,13 +17,28 @@ """ Store and retrieve country names for IPs. """ +from . import _ConnectionPlugin import os import sys import socket -from .lock import get_lock -from .decorators import synchronized -from .strformat import unicode_safe -from . import log, LOG_CHECK +from ..lock import get_lock +from ..decorators import synchronized +from ..strformat import unicode_safe +from .. import log, LOG_PLUGIN + +class LocationInfo(_ConnectionPlugin): + """Adds the country and if possible city name of the URL host as info. + Needs GeoIP or pygeoip and a local country or city lookup DB installed.""" + + def check(self, url_data): + """Try to ask GeoIP database for country info.""" + if not url_data.valid: + return + if url_data.host and geoip: + location = get_location(url_data.host) + if location: + url_data.add_info(_("URL is located in %(location)s.") % + {"location": _(location)}) # It is unknown if the geoip library is already thread-safe, so # no risks should be taken here by using a lock. @@ -64,8 +79,8 @@ def get_geoip_dat (): @synchronized(_lock) -def get_country (host): - """Get translated country name. +def get_location (host): + """Get translated country and optional city name. @return: country with optional city or an boolean False if not found """ @@ -75,7 +90,7 @@ def get_country (host): try: record = get_geoip_record(host) except (geoip_error, socket.error): - log.debug(LOG_CHECK, "Geoip error for %r", host, exception=True) + log.debug(LOG_PLUGIN, "Geoip error for %r", host, exception=True) # ignore lookup errors return None value = u"" diff --git a/linkcheck/plugins/regexcheck.py b/linkcheck/plugins/regexcheck.py new file mode 100644 index 000000000..d9f34618e --- /dev/null +++ b/linkcheck/plugins/regexcheck.py @@ -0,0 +1,76 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Check page content with regular expression. +""" +import re +from . import _ContentPlugin +from .. import log, LOG_PLUGIN + + +class RegexCheck(_ContentPlugin): + """Define a regular expression which prints a warning if it matches + any content of the checked link. This applies only to valid pages, + so we can get their content. + + Use this to check for pages that contain some form of error + message, for example 'This page has moved' or 'Oracle + Application error'. + + Note that multiple values can be combined in the regular expression, + for example "(This page has moved|Oracle Application error)".""" + + def __init__(self, config): + """Set warning regex from config.""" + super(RegexCheck, self).__init__(config) + if config["warningregex"]: + self.warningregex = re.compile(config["warningregex"]) + else: + self.warningregex = None + + def check(self, url_data): + """Check content.""" + if not self.warningregex: + return + if url_data.extern[0]: + # only scan internal pages for warnings + return + if not url_data.is_parseable(): + return + log.debug(LOG_PLUGIN, "checking content for warning regex") + content = url_data.get_content() + # add warnings for found matches, up to the maximum allowed number + match = self.warningregex.search(content) + if match: + # calculate line number for match + line = content.count('\n', 0, match.start()) + # add a warning message + msg = _("Found %(match)r at line %(line)d in link contents.") + url_data.add_warning(msg % {"match": match.group(), "line": line}) + + @classmethod + def read_config(cls, configparser): + """Read configuration file options.""" + config = dict() + section = cls.__name__ + option = "warningregex" + if configparser.has_option(section, option): + value = configparser.get(section, option) + else: + value = None + config[option] = value + return config diff --git a/linkcheck/plugins/sslcertcheck.py b/linkcheck/plugins/sslcertcheck.py new file mode 100644 index 000000000..1669d9391 --- /dev/null +++ b/linkcheck/plugins/sslcertcheck.py @@ -0,0 +1,118 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Handle https links. +""" +import time +import threading +from . import _ContentPlugin +from .. import log, LOG_PLUGIN, strformat, LinkCheckerError +from ..decorators import synchronized + +_lock = threading.Lock() + +# configuration option names +sslcertwarndays = "sslcertwarndays" + +class SslCertificateCheck(_ContentPlugin): + """Check SSL certificate expiration date. Only internal https: links + will be checked. A domain will only be checked once to avoid duplicate + warnings. + The expiration warning time can be configured with the sslcertwarndays + option.""" + + def __init__(self, config): + """Initialize clamav configuration.""" + super(SslCertificateCheck, self).__init__(config) + self.warn_ssl_cert_secs_valid = config[sslcertwarndays] * strformat.SECONDS_PER_DAY + # do not check hosts multiple times + self.checked_hosts = set() + + @synchronized(_lock) + def check(self, url_data): + """Run all SSL certificate checks that have not yet been done. + OpenSSL already checked the SSL notBefore and notAfter dates. + """ + if url_data.extern[0]: + # only check internal pages + return + if not url_data.valid: + return + if url_data.scheme != 'https': + return + host = url_data.urlparts[1] + if host in self.checked_hosts: + return + self.checked_hosts.add(host) + ssl_sock = url_data.url_connection.raw._connection.sock + cert = ssl_sock.getpeercert() + log.debug(LOG_PLUGIN, "Got SSL certificate %s", cert) + #if not cert: + # return + if 'notAfter' in cert: + self.check_ssl_valid_date(url_data, ssl_sock, cert) + else: + msg = _('certificate did not include "notAfter" information') + self.add_ssl_warning(url_data, ssl_sock, msg) + + def check_ssl_valid_date(self, url_data, ssl_sock, cert): + """Check if the certificate is still valid, or if configured check + if it's at least a number of days valid. + """ + import ssl + try: + notAfter = ssl.cert_time_to_seconds(cert['notAfter']) + except ValueError as msg: + msg = _('invalid certficate "notAfter" value %r') % cert['notAfter'] + self.add_ssl_warning(url_data, ssl_sock, msg) + return + curTime = time.time() + # Calculate seconds until certifcate expires. Can be negative if + # the certificate is already expired. + secondsValid = notAfter - curTime + if secondsValid < 0: + msg = _('certficate is expired on %s') % cert['notAfter'] + self.add_ssl_warning(url_data, ssl_sock, msg) + elif secondsValid < self.warn_ssl_cert_secs_valid: + strSecondsValid = strformat.strduration_long(secondsValid) + msg = _('certificate is only %s valid') % strSecondsValid + self.add_ssl_warning(url_data, ssl_sock, msg) + + def add_ssl_warning(self, url_data, ssl_sock, msg): + """Add a warning message about an SSL certificate error.""" + cipher_name, ssl_protocol, secret_bits = ssl_sock.cipher() + err = _(u"SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s.") + attrs = dict(msg=msg, cipher=cipher_name, protocol=ssl_protocol) + url_data.add_warning(err % attrs) + + @classmethod + def read_config(cls, configparser): + """Read configuration file options.""" + config = dict() + section = cls.__name__ + option = sslcertwarndays + if configparser.has_option(section, option): + num = configparser.getint(section, option) + if num > 0: + config[option] = num + else: + msg = _("invalid value for %s: %d must not be less than %d") % (option, num, 0) + raise LinkCheckerError(msg) + else: + # set the default + config[option] = 30 + return config diff --git a/linkcheck/plugins/syntaxchecks.py b/linkcheck/plugins/syntaxchecks.py new file mode 100644 index 000000000..70a06c4b1 --- /dev/null +++ b/linkcheck/plugins/syntaxchecks.py @@ -0,0 +1,144 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2000-2014 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +import threading +import time +import requests +from xml.dom.minidom import parseString +from . import _ContentPlugin +from .. import log, LOG_PLUGIN +from ..decorators import synchronized + + +_w3_time_lock = threading.Lock() + + +class W3Timer(object): + """Ensure W3C apis are not hammered.""" + + # every X seconds + SleepSeconds = 2 + + def __init__(self): + """Remember last API call.""" + self.last_w3_call = 0 + + @synchronized(_w3_time_lock) + def check_w3_time (self): + """Make sure the W3C validators are at most called once a second.""" + if time.time() - self.last_w3_call < W3Timer.SleepSeconds: + time.sleep(W3Timer.SleepSeconds) + self.last_w3_call = time.time() + + +class HtmlSyntaxCheck(_ContentPlugin): + """Check the syntax of HTML pages with the online W3C HTML validator. + See http://validator.w3.org/docs/api.html. + """ + def __init__(self, config): + """Initialize plugin.""" + super(HtmlSyntaxCheck, self).__init__(config) + self.timer = W3Timer() + + def check(self, url_data): + """Check HTML syntax of given URL.""" + if url_data.extern[0]: + # only check internal pages + return + if not url_data.is_html(): + # only check HTML pages + return + self.timer.check_w3_time() + session = url_data.session + try: + body = {'uri': url_data.url, 'output': 'soap12'} + response = session.post('http://validator.w3.org/check', data=body) + response.raise_for_status() + if response.headers.get('x-w3c-validator-status', 'Invalid') == 'Valid': + url_data.add_info(u"W3C Validator: %s" % _("valid HTML syntax")) + return + check_w3_errors(url_data, response.text, "W3C HTML") + except requests.exceptions.RequestException: + pass # ignore service failures + except Exception as msg: + log.warn(LOG_PLUGIN, + _("HTML syntax check plugin error: %(msg)s ") % {"msg": msg}) + + +class CssSyntaxCheck(_ContentPlugin): + """Check the syntax of HTML pages with the online W3C CSS validator. + See http://jigsaw.w3.org/css-validator/manual.html#expert. + """ + + def __init__(self, config): + """Initialize plugin.""" + super(CssSyntaxCheck, self).__init__(config) + self.timer = W3Timer() + + def check(self, url_data): + """Check CSS syntax of given URL.""" + if url_data.extern[0]: + # only check internal pages + return + if not url_data.is_css(): + # only check CSS pages + return + self.timer.check_w3_time() + session = url_data.session + try: + url = 'http://jigsaw.w3.org/css-validator/validator' + params = { + 'uri': url_data.url, + 'warning': '2', + 'output': 'soap12', + } + response = session.get(url, params=params) + response.raise_for_status() + if response.headers.get('X-W3C-Validator-Status', 'Invalid') == 'Valid': + url_data.add_info(u"W3C Validator: %s" % _("valid CSS syntax")) + return + check_w3_errors(url_data, response.text, "W3C HTML") + except requests.exceptions.RequestException: + pass # ignore service failures + except Exception as msg: + log.warn(LOG_PLUGIN, + _("CSS syntax check plugin error: %(msg)s ") % {"msg": msg}) + + +def check_w3_errors (url_data, xml, w3type): + """Add warnings for W3C HTML or CSS errors in xml format. + w3type is either "W3C HTML" or "W3C CSS".""" + dom = parseString(xml) + for error in dom.getElementsByTagName('m:error'): + warnmsg = _("%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s") + attrs = { + "w3type": w3type, + "line": getXmlText(error, "m:line"), + "column": getXmlText(error, "m:col"), + "msg": getXmlText(error, "m:message"), + } + url_data.add_warning(warnmsg % attrs) + + +def getXmlText (parent, tag): + """Return XML content of given tag in parent element.""" + elem = parent.getElementsByTagName(tag)[0] + # Yes, the DOM standard is awful. + rc = [] + for node in elem.childNodes: + if node.nodeType == node.TEXT_NODE: + rc.append(node.data) + return ''.join(rc) diff --git a/linkcheck/clamav.py b/linkcheck/plugins/viruscheck.py similarity index 76% rename from linkcheck/clamav.py rename to linkcheck/plugins/viruscheck.py index 04a51eb52..4b4e26692 100644 --- a/linkcheck/clamav.py +++ b/linkcheck/plugins/viruscheck.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -14,11 +14,57 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -import socket +""" +Check page content for virus infection with clamav. +""" import os -from . import log, LOG_ROOT -from .socketutil import create_socket +import socket +from . import _ContentPlugin +from .. import log, LOG_PLUGIN +from ..socketutil import create_socket + + +class VirusCheck(_ContentPlugin): + """Checks the page content for virus infections with clamav. + A local clamav daemon must be installed.""" + + def __init__(self, config): + """Initialize clamav configuration.""" + super(VirusCheck, self).__init__(config) + # XXX read config + self.clamav_conf = get_clamav_conf(canonical_clamav_conf()) + + def check(self, url_data): + """Try to ask GeoIP database for country info.""" + if url_data.extern[0]: + # only scan internal pages for viruses + return + if not self.clamav_conf: + # No clamav available + return + data = url_data.get_content() + infected, errors = scan(data, self.clamav_conf) + if infected or errors: + for msg in infected: + url_data.add_warning(u"Virus scan infection: %s" % msg) + for msg in errors: + url_data.add_warning(u"Virus scan error: %s" % msg) + else: + url_data.add_info("No viruses in data found.") + + @classmethod + def read_config(cls, configparser): + """Read configuration file options.""" + config = dict() + section = cls.__name__ + option = "clamavconf" + if configparser.has_option(section, option): + value = configparser.get(section, option) + else: + value = None + config[option] = value + return config + class ClamavError (Exception): """Raised on clamav errors.""" @@ -91,22 +137,11 @@ def canonical_clamav_conf (): return clamavconf -_clamav_conf = None -def init_clamav_conf (conf): +def get_clamav_conf(filename): """Initialize clamav configuration.""" - if not conf: - # clamav was not configured - return - if os.path.isfile(conf): - global _clamav_conf - _clamav_conf = ClamavConfig(conf) - else: - log.warn(LOG_ROOT, "No ClamAV config file found at %r.", conf) - - -def get_clamav_conf (): - """Get the ClamavConfig instance.""" - return _clamav_conf + if os.path.isfile(filename): + return ClamavConfig(filename) + log.warn(LOG_PLUGIN, "No ClamAV config file found at %r.", filename) def get_sockinfo (host, port=None): @@ -181,12 +216,11 @@ def create_tcp_socket (self, host): return sock -def scan (data): +def scan (data, clamconf): """Scan data for viruses. @return (infection msgs, errors) @rtype ([], []) """ - clamconf = ClamavConfig(canonical_clamav_conf()) try: scanner = ClamdScanner(clamconf) except socket.error: diff --git a/linkcheck/robotparser2.py b/linkcheck/robotparser2.py index d39ce647a..7bd52192c 100644 --- a/linkcheck/robotparser2.py +++ b/linkcheck/robotparser2.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,12 +22,8 @@ """ import urlparse import urllib -import urllib2 import time -import socket -import sys -from . import httplib2 as httplib -from . import url as urlutil +import requests from . import log, LOG_CHECK, configuration __all__ = ["RobotFileParser"] @@ -79,71 +75,32 @@ def set_url (self, url): def read (self): """Read the robots.txt URL and feeds it to the parser.""" self._reset() - data = None headers = { 'User-Agent': configuration.UserAgent, 'Accept-Encoding': ACCEPT_ENCODING, } - req = urllib2.Request(self.url, data, headers) try: - self._read_content(req) - except urllib2.HTTPError, x: - if x.code in (401, 403): + response = requests.get(self.url, headers=headers) + response.raise_for_status() + content_type = response.headers.get('content-type') + if content_type and content_type.lower().startswith('text/plain'): + self.parse(response.iter_lines()) + else: + log.debug(LOG_CHECK, "%r allow all (no text content)", self.url) + self.allow_all = True + except requests.HTTPError, x: + if x.response.status_code in (401, 403): self.disallow_all = True - log.debug(LOG_CHECK, "%r disallow all (code %d)", - self.url, x.code) + log.debug(LOG_CHECK, "%r disallow all (code %d)", self.url, x.response.status_code) else: self.allow_all = True log.debug(LOG_CHECK, "%r allow all (HTTP error)", self.url) - except socket.timeout: + except requests.exceptions.Timeout: raise - except urllib2.URLError: - x = sys.exc_info()[1] - if isinstance(x.reason, socket.timeout): - raise - self.allow_all = True - log.debug(LOG_CHECK, "%r allow all (URL error)", self.url) - except (socket.gaierror, socket.error): - # no network + except requests.exceptions.RequestException: + # no network or other failure self.allow_all = True - log.debug(LOG_CHECK, "%r allow all (socket error)", self.url) - except IOError: - self.allow_all = True - log.debug(LOG_CHECK, "%r allow all (I/O error)", self.url) - except httplib.HTTPException: - self.allow_all = True - log.debug(LOG_CHECK, "%r allow all (HTTP exception)", self.url) - except ValueError: - # urllib2 could raise ValueError on invalid data - self.disallow_all = True - log.debug(LOG_CHECK, "%r disallow all (value error)", self.url) - - def _read_content (self, req): - """Read robots.txt content. - @raise: urllib2.HTTPError on HTTP failure codes - @raise: socket.gaierror, socket.error, urllib2.URLError on network - errors - @raise: httplib.HTTPException, IOError on HTTP errors - @raise: ValueError on bad digest auth (a bug) - """ - if log.is_debug(LOG_CHECK): - debuglevel = 1 - else: - debuglevel = 0 - f = urlutil.get_opener(user=self.user, password=self.password, - proxy=self.proxy, debuglevel=debuglevel) - res = None - try: - res = f.open(req) - ct = res.info().get("Content-Type") - if ct and ct.lower().startswith("text/plain"): - self.parse([line.strip() for line in res]) - else: - log.debug(LOG_CHECK, "%r allow all (no text content)", self.url) - self.allow_all = True - finally: - if res is not None: - res.close() + log.debug(LOG_CHECK, "%r allow all (request error)", self.url) def _add_entry (self, entry): """Add a parsed entry to entry list. @@ -163,18 +120,17 @@ def parse (self, lines): @return: None """ - log.debug(LOG_CHECK, "%r parse %d lines", self.url, len(lines)) + log.debug(LOG_CHECK, "%r parse lines", self.url) state = 0 linenumber = 0 entry = Entry() for line in lines: + line = line.strip() linenumber += 1 if not line: if state == 1: - log.debug(LOG_CHECK, - "%r line %d: allow or disallow directives without" \ - " any user-agent line", self.url, linenumber) + log.debug(LOG_CHECK, "%r line %d: allow or disallow directives without any user-agent line", self.url, linenumber) entry = Entry() state = 0 elif state == 2: @@ -194,49 +150,42 @@ def parse (self, lines): line[1] = urllib.unquote(line[1].strip()) if line[0] == "user-agent": if state == 2: - log.debug(LOG_CHECK, - "%r line %d: missing blank line before user-agent" \ - " directive", self.url, linenumber) + log.debug(LOG_CHECK, "%r line %d: missing blank line before user-agent directive", self.url, linenumber) self._add_entry(entry) entry = Entry() entry.useragents.append(line[1]) state = 1 elif line[0] == "disallow": if state == 0: - log.debug(LOG_CHECK, - "%r line %d: missing user-agent directive before" \ - " this line", self.url, linenumber) + log.debug(LOG_CHECK, "%r line %d: missing user-agent directive before this line", self.url, linenumber) + pass else: entry.rulelines.append(RuleLine(line[1], False)) state = 2 elif line[0] == "allow": if state == 0: - log.debug(LOG_CHECK, - "%r line %d: missing user-agent directive before" \ - " this line", self.url, linenumber) + log.debug(LOG_CHECK, "%r line %d: missing user-agent directive before this line", self.url, linenumber) + pass else: entry.rulelines.append(RuleLine(line[1], True)) state = 2 elif line[0] == "crawl-delay": if state == 0: - log.debug(LOG_CHECK, - "%r line %d: missing user-agent directive before" \ - " this line", self.url, linenumber) + log.debug(LOG_CHECK, "%r line %d: missing user-agent directive before this line", self.url, linenumber) + pass else: try: entry.crawldelay = max(0, int(line[1])) state = 2 except ValueError: - log.debug(LOG_CHECK, - "%r line %d: invalid delay number %r", - self.url, linenumber, line[1]) + log.debug(LOG_CHECK, "%r line %d: invalid delay number %r", self.url, linenumber, line[1]) pass else: - log.debug(LOG_CHECK, "%r line %d: unknown key %r", - self.url, linenumber, line[0]) + log.debug(LOG_CHECK, "%r line %d: unknown key %r", self.url, linenumber, line[0]) + pass else: - log.debug(LOG_CHECK, "%r line %d: malformed line %r", - self.url, linenumber, line) + log.debug(LOG_CHECK, "%r line %d: malformed line %r", self.url, linenumber, line) + pass if state in (1, 2): self.entries.append(entry) self.modified() @@ -248,8 +197,7 @@ def can_fetch (self, useragent, url): @return: True if agent can fetch url, else False @rtype: bool """ - log.debug(LOG_CHECK, "%r check allowance for:\n" \ - " user agent: %r\n url: %r ...", self.url, useragent, url) + log.debug(LOG_CHECK, "%r check allowance for:\n user agent: %r\n url: %r ...", self.url, useragent, url) if not isinstance(useragent, str): useragent = useragent.encode("ascii", "ignore") if not isinstance(url, str): diff --git a/linkcheck/socketutil.py b/linkcheck/socketutil.py index 24ccc4895..e82d48239 100644 --- a/linkcheck/socketutil.py +++ b/linkcheck/socketutil.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2008-2012 Bastian Kleineidam +# Copyright (C) 2008-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/strformat.py b/linkcheck/strformat.py index c42705f85..8ab9ae1cc 100644 --- a/linkcheck/strformat.py +++ b/linkcheck/strformat.py @@ -139,7 +139,8 @@ def wrap (text, width, **kwargs): return text ret = [] for para in get_paragraphs(text): - ret.extend(textwrap.wrap(para.strip(), width, **kwargs)) + text = " ".join(para.strip().split()) + ret.extend(textwrap.wrap(text, width, **kwargs)) return os.linesep.join(ret) @@ -319,20 +320,6 @@ def format_feature_warning (**kwargs): return _("Could not import %(module)s for %(feature)s. Install %(module)s from %(url)s to use this feature.") % kwargs -def str_cache_stats(hits, misses): - """Format hits and misses string for cache statistics. - @param hits: number of hits - @ptype hits: int - @param misses: number of cache misses - @ptype misses: int - @return: string with this and misses - @rtype: unicode - """ - strhits = _n("%d hit", "%d hits", hits) % hits - strmisses = _n("%d miss", "%d misses", misses) % misses - return u"%s, %s" % (strhits, strmisses) - - def strip_control_chars(text): """Remove console control characters from text.""" if text: diff --git a/linkcheck/threader.py b/linkcheck/threader.py index 0a4e14b21..ffd62e9ff 100644 --- a/linkcheck/threader.py +++ b/linkcheck/threader.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/trace.py b/linkcheck/trace.py index ba8f22a22..eac56f267 100644 --- a/linkcheck/trace.py +++ b/linkcheck/trace.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkcheck/updater.py b/linkcheck/updater.py index 4f8ff6450..d1c4dd2fe 100644 --- a/linkcheck/updater.py +++ b/linkcheck/updater.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2011-2013 Bastian Kleineidam +# Copyright (C) 2011-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -58,9 +58,9 @@ def get_online_version (): """Download update info and parse it.""" # prevent getting a cached answer headers = {'Pragma': 'no-cache', 'Cache-Control': 'no-cache'} - info, content = get_content(UPDATE_URL, addheaders=headers) - if info is None: - return None, _('could not download update information') + content, info = get_content(UPDATE_URL, addheaders=headers) + if content is None: + return content, info version, url = None, None for line in content.splitlines(): if line.startswith(VERSION_TAG): diff --git a/linkcheck/url.py b/linkcheck/url.py index fe659df8a..23371a56d 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2000-2012 Bastian Kleineidam +# Copyright (C) 2000-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -22,9 +22,7 @@ import os import urlparse import urllib -import urllib2 -import socket -from . import httplib2 as httplib +import requests from . import log, LOG_CHECK for scheme in ('ldap', 'irc'): @@ -504,92 +502,37 @@ def splitport (host, port=0): return host, port -class PasswordManager (object): - """Simple password manager storing username and password. Suitable - for use as an AuthHandler instance in urllib2.""" - - def __init__ (self, user, password): - """Store given username and password.""" - self.user = user - self.password = password - - def add_password (self, realm, uri, user, passwd): - """Does nothing since username and password are already stored. - - @return: None - """ - pass - - def find_user_password (self, realm, authuri): - """Get stored username and password. - - @return: A tuple (user, password) - @rtype: tuple - """ - return self.user, self.password - - -def get_opener (user=None, password=None, proxy=None, debuglevel=0): - """Construct an URL opener object. It considers the given credentials - and proxy. - - @return: URL opener - @rtype: urllib2.OpenerDirector - """ - from . import httputil - pwd_manager = PasswordManager(user, password) - handlers = [ - urllib2.UnknownHandler, - httputil.HttpWithGzipHandler(debuglevel=debuglevel), - urllib2.HTTPBasicAuthHandler(pwd_manager), - urllib2.HTTPDigestAuthHandler(pwd_manager), - ] - if proxy: - handlers.insert(0, - urllib2.ProxyHandler({"http": proxy, "https": proxy})) - handlers.extend([ - urllib2.ProxyBasicAuthHandler(pwd_manager), - urllib2.ProxyDigestAuthHandler(pwd_manager), - ]) - if hasattr(httplib, 'HTTPS'): - handlers.append(httputil.HttpsWithGzipHandler(debuglevel=debuglevel)) - return urllib2.build_opener(*handlers) - - -def get_content (url, user=None, password=None, proxy=None, data=None, - addheaders=None): +def get_content(url, user=None, password=None, proxy=None, data=None, + addheaders=None): """Get URL content and info. - @return: (url info, content), or (None, None) on error. - @rtype: tuple (string, string) + @return: (decoded text content of URL, headers) or + (None, errmsg) on error. + @rtype: tuple (String, dict) or (None, String) """ from . import configuration - if log.is_debug(LOG_CHECK): - debuglevel = 1 - else: - debuglevel = 0 headers = { 'User-Agent': configuration.UserAgent, } if addheaders: headers.update(addheaders) - req = urllib2.Request(url, data, headers) + method = 'GET' + kwargs = dict(headers=headers) + if user and password: + kwargs['auth'] = (user, password) + if data: + kwargs['data'] = data + method = 'POST' + if proxy: + kwargs['proxy'] = dict(http=proxy) try: - f = get_opener(user=user, password=password, proxy=proxy, - debuglevel=debuglevel) - res = None - try: - res = f.open(req) - return (res.info(), res.read()) - finally: - if res is not None: - res.close() - except (urllib2.HTTPError, socket.timeout, urllib2.URLError, - socket.gaierror, socket.error, IOError, httplib.HTTPException, - ValueError), msg: + response = requests.request(method, url, **kwargs) + return response.text, response.headers + except (requests.exceptions.RequestException, + requests.exceptions.BaseHTTPError) as msg: log.warn(LOG_CHECK, ("Could not get content of URL %(url)s: %(msg)s.") \ % {"url": url, "msg": str(msg)}) - return (None, None) + return None, str(msg) def shorten_duplicate_content_url(url): diff --git a/linkchecker b/linkchecker index 595e6788a..a00dbc069 100755 --- a/linkchecker +++ b/linkchecker @@ -22,7 +22,6 @@ client. Run this file with the -h option to see how it's done. import sys import codecs -import re import os import pprint import argparse @@ -32,7 +31,8 @@ import linkcheck # override argparse gettext method with the one from linkcheck.init_i18n() #argparse._ = _ # now import the rest of the linkchecker gang -from linkcheck.cmdline import print_version, print_usage, aggregate_url, LCArgumentParser +from linkcheck.cmdline import print_version, print_usage, aggregate_url, \ + LCArgumentParser, print_plugins from linkcheck import log, LOG_CMDLINE, i18n, strformat import linkcheck.checker import linkcheck.configuration @@ -163,7 +163,7 @@ You can skip the "ftp://" url part if the domain starts with "ftp.": LoggerTypes = _(r"""OUTPUT TYPES Note that by default only errors and warnings are logged. You should use the --verbose option to see valid URLs, -and --complete when outputting a sitemap graph format. +and when outputting a sitemap graph format. text Standard text output, logging URLs in keyword: argument fashion. html Log URLs in keyword: argument fashion, formatted as HTML. @@ -214,17 +214,6 @@ def viewprof(): sys.exit(0) -def try_compile_re (arg): - """Try to compile the regular expression. On error print an error - message and exit.""" - try: - return re.compile(arg) - except re.error as msg: - log.error(LOG_CMDLINE, - _("Syntax error in %(arg)r: %(msg)s") % {"arg": arg, "msg": msg}) - sys.exit(1) - - def has_encoding (encoding): """Detect if Python can encode in a certain encoding.""" try: @@ -257,21 +246,24 @@ group.add_argument("-t", "--threads", type=int, metavar="NUMBER", of threads is 10. To disable threading specify a non-positive number.""")) group.add_argument("-V", "--version", action="store_true", help=_("""Print version and exit.""")) +group.add_argument("--list-plugins", action="store_true", dest="listplugins", + help=_( +"""Print available check plugins and exit.""")) group.add_argument("--stdin", action="store_true", help=_( """Read list of white-space separated URLs to check from stdin.""")) ################# output options ################## group = argparser.add_argument_group(_("Output options")) +# XXX deprecated: --check-css moved to plugin CssSyntaxCheck group.add_argument("--check-css", action="store_true", dest="checkcss", - help=_( -"""Check syntax of CSS URLs with the W3C online validator.""")) + help=argparse.SUPPRESS) +# XXX deprecated: --check-html moved to plugin HtmlSyntaxCheck group.add_argument("--check-html", action="store_true", dest="checkhtml", - help=_( -"""Check syntax of HTML URLs with the W3C online validator.""")) + help=argparse.SUPPRESS) +# XXX deprecated: --complete is removed group.add_argument("--complete", action="store_true", dest="complete", - help=_("""Log all URLs, including duplicates. -Default is to log duplicate URLs only once.""")) + help=argparse.SUPPRESS) group.add_argument("-D", "--debug", action="append", metavar="STRING", help=_("""Print debugging output for the given logger. Available loggers are %(lognamelist)s. @@ -315,52 +307,40 @@ group.add_argument("-q", "--quiet", action="store_true", dest="quiet", help=_( """Quiet operation, an alias for '-o none'. This is only useful with -F.""")) +# XXX deprecated: moved to plugin VirusCheck group.add_argument("--scan-virus", action="store_true", dest="scanvirus", - help=_( -"""Scan content of URLs with ClamAV virus scanner.""")) + help=argparse.SUPPRESS) group.add_argument("--trace", action="store_true", dest="trace", - help=_("""Print tracing information.""")) + help=argparse.SUPPRESS) group.add_argument("-v", "--verbose", action="store_true", dest="verbose", help=_( """Log all URLs. Default is to log only errors and warnings.""")) group.add_argument("--viewprof", action="store_true", dest="viewprof", help=argparse.SUPPRESS) +# XXX deprecated: moved to plugin RegexCheck group.add_argument("-W", "--warning-regex", dest="warningregex", metavar="REGEX", - help=_( -"""Define a regular expression which prints a warning if it matches -any content of the checked link. This applies only to valid pages, -so we can get their content. - -Use this to check for pages that contain some form of error -message, for example 'This page has moved' or 'Oracle -Application error'. - -Note that multiple values can be combined in the regular expression, -for example "(This page has moved|Oracle Application error)".""")) + help=argparse.SUPPRESS) +# XXX deprecated: removed group.add_argument("--warning-size-bytes", dest="warningsizebytes", metavar="NUMBER", - help=_( -"""Print a warning if content size info is available and exceeds the -given number of bytes.""")) + help=argparse.SUPPRESS) ################# checking options ################## group = argparser.add_argument_group(_("Checking options")) +# XXX deprecated: moved to plugin AnchorCheck group.add_argument("-a", "--anchors", action="store_true", dest="anchors", - help=_( -"""Check HTTP anchor references. Default is not to check anchors. -This option enables logging of the warning 'url-anchor-not-found'.""")) + help=argparse.SUPPRESS) +# XXX deprecated: replaced with requests session cookie handling group.add_argument("-C", "--cookies", action="store_true", dest="cookies", - help=_( -"""Accept and send HTTP cookies according to RFC 2109. Only cookies -which are sent back to the originating server are accepted. -Sent and accepted cookies are provided as additional logging -information.""")) + help=argparse.SUPPRESS) group.add_argument("--cookiefile", dest="cookiefile", metavar="FILENAME", help=_( """Read a file with initial cookie data. The cookie data format is explained below.""")) +group.add_argument("--check-extern", action="store_true", + dest="checkextern", help=_("""Check external URLs.""")) group.add_argument("--ignore-url", action="append", metavar="REGEX", dest="externstrict", help=_( """Only check syntax of URLs matching the given regular expression. @@ -380,11 +360,10 @@ group.add_argument("-p", "--password", action="store_false", dest="password", """Read a password from console and use it for HTTP and FTP authorization. For FTP the default password is 'anonymous@'. For HTTP there is no default password. See also -u.""")) +# XXX deprecated: replaced with numrequestsperpage group.add_argument("-P", "--pause", type=int, dest="pause", metavar="NUMBER", - help=_( -"""Pause the given number of seconds between two subsequent connection -requests to the same host. Default is no pause between requests.""")) + help=argparse.SUPPRESS) group.add_argument("-r", "--recursion-level", type=int, dest="recursionlevel", metavar="NUMBER", help=_( @@ -463,8 +442,6 @@ constructauth = False do_profile = False if options.warnings: config["warnings"] = options.warnings -if options.anchors: - config["anchors"] = options.anchors if options.externstrict: pats = [linkcheck.get_link_pat(arg, strict=True) \ for arg in options.externstrict] @@ -472,6 +449,8 @@ if options.externstrict: if options.extern: pats = [linkcheck.get_link_pat(arg) for arg in options.extern] config["externlinks"].extend(pats) +if options.checkextern: + config["checkextern"] = True if options.output: if "/" in options.output: logtype, encoding = options.output.split("/", 1) @@ -536,12 +515,6 @@ if options.password: msg = _("Enter LinkChecker HTTP/FTP password:") _password = getpass.getpass(console.encode(msg)) constructauth = True -if options.pause is not None: - if options.pause >= 0: - config["wait"] = options.pause - else: - print_usage(_("Illegal argument %(arg)r for option %(option)s") % \ - {"arg": options.pause, "option": "'-P, --pause'"}) if options.profile: do_profile = options.profile if options.quiet: @@ -562,33 +535,18 @@ if options.timeout is not None: {"arg": options.timeout, "option": "'--timeout'"}) if options.version: print_version() +if options.listplugins: + print_plugins(config["pluginfolders"]) if options.verbose: if options.verbose: config["verbose"] = True config["warnings"] = True -if options.complete: - if options.complete: - config["complete"] = True - config["verbose"] = True - config["warnings"] = True if options.viewprof: viewprof() -if options.warningregex is not None: - config["warningregex"] = try_compile_re(options.warningregex) - config["warnings"] = True -if options.warningsizebytes is not None: - config["warnsizebytes"] = options.warningsizebytes -if options.cookies: - config['storecookies'] = config['sendcookies'] = options.cookies if options.cookiefile is not None: config['cookiefile'] = options.cookiefile - config['storecookies'] = config['sendcookies'] = True if constructauth: config.add_auth(pattern=".+", user=_username, password=_password) -# boolean options -for option in ("checkhtml", "checkcss", "scanvirus"): - if getattr(options, option): - config[option] = getattr(options, option) # read missing passwords for entry in config["authentication"]: if entry["password"] is None: @@ -599,6 +557,12 @@ for entry in config["authentication"]: entry["password"] = getpass.getpass(console.encode(msg)) if options.useragent is not None: config["useragent"] = options.useragent +if options.cookiefile is not None: + if linkcheck.fileutil.is_readable(options.cookiefile): + config["cookiefile"] = options.cookiefile + else: + msg = _("Could not read cookie file %s") % options.cookiefile + log.error(LOG_CMDLINE, msg) # now sanitize the configuration config.sanitize() @@ -607,18 +571,10 @@ log.debug(LOG_CMDLINE, "configuration: %s", # prepare checking queue aggregate = get_aggregate(config) -if options.cookiefile is not None: - try: - cookies = linkcheck.cookies.from_file(options.cookiefile) - for headers, scheme, host, path in cookies: - aggregate.cookies.add(headers, scheme, host, path) - config["sendcookies"] = True - except StandardError: - log.error(LOG_CMDLINE, - _("Could not parse cookie file: %s"), sys.exc_info()[1]) - sys.exit(1) if options.trace: + # enable thread tracing config["trace"] = True + # start trace in mainthread import linkcheck.trace linkcheck.trace.trace_filter([r"^linkcheck"]) linkcheck.trace.trace_on() diff --git a/linkchecker-gui b/linkchecker-gui index ac0eabc6a..a0eabebf5 100755 --- a/linkchecker-gui +++ b/linkchecker-gui @@ -1,6 +1,6 @@ #!/usr/bin/env python # -*- coding: iso-8859-1 -*- -# Copyright (C) 2008-2012 Bastian Kleineidam +# Copyright (C) 2008-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by diff --git a/linkchecker-nagios b/linkchecker-nagios index d93faac8f..de378b90d 100755 --- a/linkchecker-nagios +++ b/linkchecker-nagios @@ -137,8 +137,6 @@ if args.verbose < 0: else: config["verbose"] = True config["warnings"] = True - if args.verbose >= 2: - config["complete"] = True # check missing passwords for entry in config["authentication"]: diff --git a/po/de.po b/po/de.po index 703e1c743..55eab0928 100644 --- a/po/de.po +++ b/po/de.po @@ -5,7 +5,7 @@ msgid "" msgstr "" "Project-Id-Version: $Id$\n" "Report-Msgid-Bugs-To: bastian.kleineidam@web.de\n" -"POT-Creation-Date: 2013-02-27 10:40+0100\n" +"POT-Creation-Date: 2014-02-25 16:10+0100\n" "PO-Revision-Date: 2012-11-13 18:13+0100\n" "Last-Translator: Bastian Kleineidam \n" "Language-Team: de \n" @@ -15,11 +15,11 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=2; plural=(n != 1);\n" -#: ../linkcheck/director/aggregator.py:76 +#: ../linkcheck/director/aggregator.py:86 msgid "These URLs are still active:" msgstr "Folgende URLs sind noch aktiv:" -#: ../linkcheck/director/aggregator.py:83 +#: ../linkcheck/director/aggregator.py:93 #, python-format msgid "" "%(num)d URLs are still active. After a timeout of %(timeout)s the active " @@ -101,7 +101,7 @@ msgstr "Standard Locale:" msgid "System info:" msgstr "Systeminformation:" -#: ../linkcheck/director/console.py:145 ../linkchecker:441 +#: ../linkcheck/director/console.py:145 ../linkchecker:408 #, python-format msgid "Python %(version)s on %(platform)s" msgstr "Python %(version)s auf %(platform)s" @@ -149,10 +149,6 @@ msgstr "ein weiter Abbruch beendet dieses Programm sofort" msgid "user abort; force shutdown" msgstr "Benutzerabbruch; erzwinge Programmende" -#: ../linkcheck/updater.py:63 -msgid "could not download update information" -msgstr "konnte Update-Informationen nicht herunterladen" - #: ../linkcheck/configuration/confparse.py:62 #, python-format msgid "Error parsing configuration: %s" @@ -173,22 +169,22 @@ msgstr "ungültiger Wert für %s: %d darf nicht kleiner als %d sein" msgid "invalid value for %s: %d must not be greater than %d" msgstr "ungültiger Wert für %s: %d darf nicht größer als %d sein" -#: ../linkcheck/configuration/confparse.py:184 +#: ../linkcheck/configuration/confparse.py:185 #, python-format msgid "missing auth part in entry %(val)r" msgstr "fehlende Authentifizierung in entry %(val)r" -#: ../linkcheck/configuration/confparse.py:190 +#: ../linkcheck/configuration/confparse.py:191 #, python-format msgid "invalid login URL `%s'. Only HTTP and HTTPS URLs are supported." msgstr "ungültige Login URL `%s'. Nur HTTP und HTTPS URLs sind unterstützt." -#: ../linkcheck/configuration/confparse.py:216 +#: ../linkcheck/configuration/confparse.py:217 #, python-format msgid "For example execute 'chmod go-rw %s'." msgstr "Führen Sie zum Beispiel 'chmod go-rw %s' aus." -#: ../linkcheck/configuration/confparse.py:218 +#: ../linkcheck/configuration/confparse.py:219 msgid "" "See http://support.microsoft.com/kb/308419 for more info on setting file " "permissions." @@ -196,63 +192,64 @@ msgstr "" "Siehe http://support.microsoft.com/kb/308419 für mehr Informationen über das " "Setzen von Dateiberechtigungen." -#: ../linkcheck/configuration/__init__.py:321 +#: ../linkcheck/configuration/__init__.py:264 #, python-format msgid "Configuration file %r does not exist." msgstr "Konfigurationsdatei %r existiert nicht." -#: ../linkcheck/configuration/__init__.py:323 +#: ../linkcheck/configuration/__init__.py:266 #, python-format msgid "Configuration file %r is not readable." msgstr "Konfigurationsdatei %r ist nicht lesbar." -#: ../linkcheck/configuration/__init__.py:334 +#: ../linkcheck/configuration/__init__.py:277 msgid "missing user or URL pattern in authentication data." msgstr "" "Fehlender Benutzer oder regulärer URL Ausdruck in Authentifizierungsdaten." -#: ../linkcheck/configuration/__init__.py:382 +#: ../linkcheck/configuration/__init__.py:319 msgid "activating text logger output." msgstr "aktiviere Loggerausgabe text." -#: ../linkcheck/configuration/__init__.py:392 -msgid "Clamav could not be initialized" -msgstr "Clamav konnte nicht initialisiert werden" - -#: ../linkcheck/configuration/__init__.py:398 +#: ../linkcheck/configuration/__init__.py:326 msgid "activating sendcookies." msgstr "aktiviere Option sendcookies." -#: ../linkcheck/configuration/__init__.py:401 +#: ../linkcheck/configuration/__init__.py:329 msgid "activating storecookies." msgstr "aktiviere Option storecookies." -#: ../linkcheck/configuration/__init__.py:410 +#: ../linkcheck/configuration/__init__.py:338 msgid "no CGI password fieldname given for login URL." msgstr " kein CGI Passwort Feldname für Login URL angegeben." -#: ../linkcheck/configuration/__init__.py:414 +#: ../linkcheck/configuration/__init__.py:342 msgid "no CGI user fieldname given for login URL." msgstr "kein CGI Benutzer Feldname für Login URL angegeben." -#: ../linkcheck/configuration/__init__.py:418 +#: ../linkcheck/configuration/__init__.py:346 msgid "no user/password authentication data found for login URL." msgstr "keine Benutzer/Passwort-Authentifizierung für Login URL gefunden." -#: ../linkcheck/configuration/__init__.py:421 +#: ../linkcheck/configuration/__init__.py:349 msgid "login URL is not a HTTP URL." msgstr "Login URL ist keine HTTP URL." -#: ../linkcheck/configuration/__init__.py:425 +#: ../linkcheck/configuration/__init__.py:353 msgid "login URL is incomplete." msgstr "Login URL ist unvollständig." -#: ../linkcheck/configuration/__init__.py:429 +#: ../linkcheck/configuration/__init__.py:357 #, python-format msgid "disabling login URL %(url)s." msgstr "deaktiviere Login URL %(url)s." -#: ../linkcheck/configuration/__init__.py:476 +#: ../linkcheck/configuration/__init__.py:391 +#, fuzzy, python-format +msgid "could not create plugin directory %(dirname)r: %(errmsg)r" +msgstr "Konnte Projekt %(filename)s nicht laden: %(err)s" + +#: ../linkcheck/configuration/__init__.py:429 #, python-format msgid "" "could not copy initial configuration file %(src)r to %(dst)r: %(errmsg)r" @@ -260,52 +257,132 @@ msgstr "" "Konnte initiale Konfigurationsdatei %(src)r nicht nach %(dst)r kopieren: " "%(errmsg)r" -#: ../linkcheck/logger/html.py:95 ../linkcheck/logger/text.py:89 +#: ../linkcheck/plugins/regexcheck.py:61 +#, python-format +msgid "Found %(match)r at line %(line)d in link contents." +msgstr "Habe %(match)r in Zeile %(line)d im Inhalt der Verknüpfung gefunden." + +#: ../linkcheck/plugins/countryinfo.py:38 +#, python-format +msgid "URL is located in %(country)s." +msgstr "URL befindet sich in %(country)s." + +#: ../linkcheck/plugins/sslcertcheck.py:62 +msgid "certificate did not include \"notAfter\" information" +msgstr "Zertifikat besitzt keine \"notAfter\"-Information" + +#: ../linkcheck/plugins/sslcertcheck.py:73 +#, python-format +msgid "invalid certficate \"notAfter\" value %r" +msgstr "ungültiger \"notAfter\" Zertifikatwert %r" + +#: ../linkcheck/plugins/sslcertcheck.py:81 +#, python-format +msgid "certficate is expired on %s" +msgstr "Zertifikat ist am %s abgelaufen" + +#: ../linkcheck/plugins/sslcertcheck.py:85 +#, python-format +msgid "certificate is only %s valid" +msgstr "Zertifikat ist nur noch %s gültig" + +#: ../linkcheck/plugins/sslcertcheck.py:91 +#, python-format +msgid "SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s." +msgstr "" +"SSL Warnung: %(msg)s. Verschlüsselungsverfahren %(cipher)s, %(protocol)s." + +#: ../linkcheck/plugins/anchorcheck.py:65 +#, python-format +msgid "Anchor `%(name)s' not found." +msgstr "Anker `%(name)s' nicht gefunden." + +#: ../linkcheck/plugins/anchorcheck.py:66 +#, python-format +msgid "Available anchors: %(anchors)s." +msgstr "Verfügbare Anker: %(anchors)s." + +#: ../linkcheck/plugins/viruscheck.py:97 +msgid "clamd is not ready for stream scanning" +msgstr "clamd ist nicht bereit, einen Stream zu prüfen" + +#: ../linkcheck/plugins/viruscheck.py:156 +msgid "ScannerDaemonOutputFormat must be disabled" +msgstr "ScannerDaemonOutputFormat muss deaktiviert sein" + +#: ../linkcheck/plugins/viruscheck.py:158 +msgid "only one of TCPSocket and LocalSocket must be enabled" +msgstr "nur einer von TCPSocket oder LocalSocket muss aktiviert sein" + +#: ../linkcheck/plugins/viruscheck.py:187 +msgid "one of TCPSocket or LocalSocket must be enabled" +msgstr "einer von TCPSocket oder LocalSocket muss aktiviert sein" + +#: ../linkcheck/plugins/viruscheck.py:222 +msgid "Could not connect to ClamAV daemon." +msgstr "Konnte nicht zu ClamAV verbinden." + +#: ../linkcheck/plugins/syntaxchecks.py:66 +msgid "valid HTML syntax" +msgstr "gültige HTML Syntax" + +#: ../linkcheck/plugins/syntaxchecks.py:74 +#, python-format +msgid "HTML W3C validation caused error: %(msg)s " +msgstr "HTML W3C Validierung verursachte Fehler: %(msg)s" + +#: ../linkcheck/plugins/syntaxchecks.py:113 +msgid "valid CSS syntax" +msgstr "gültige CSS Syntax" + +#: ../linkcheck/plugins/syntaxchecks.py:121 +#, python-format +msgid "CSS W3C validation caused error: %(msg)s " +msgstr "CSS W3C Validierung verursachte Fehler: %(msg)s" + +#: ../linkcheck/plugins/syntaxchecks.py:130 +#, python-format +msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s" +msgstr "" +"%(w3type)s Validierungsfehler in Zeile %(line)s Spalte %(column)s: %(msg)s" + +#: ../linkcheck/logger/html.py:121 ../linkcheck/logger/text.py:106 #, python-format msgid "Start checking at %s" msgstr "Beginne Prüfen am %s" -#: ../linkcheck/logger/html.py:142 -msgid "checked link" -msgstr "geprüfte Verknüpfung" - -#: ../linkcheck/logger/html.py:174 ../linkcheck/logger/text.py:142 +#: ../linkcheck/logger/html.py:190 ../linkcheck/logger/text.py:159 #: ../linkcheck/gui/urlmodel.py:78 #, python-format msgid ", line %d" msgstr ", Zeile %d" -#: ../linkcheck/logger/html.py:175 ../linkcheck/logger/text.py:143 +#: ../linkcheck/logger/html.py:191 ../linkcheck/logger/text.py:160 #: ../linkcheck/gui/urlmodel.py:79 #, python-format msgid ", col %d" msgstr ", Spalte %d" -#: ../linkcheck/logger/html.py:199 ../linkcheck/logger/html.py:211 -#: ../linkcheck/logger/text.py:159 ../linkcheck/logger/text.py:171 +#: ../linkcheck/logger/html.py:215 ../linkcheck/logger/html.py:227 +#: ../linkcheck/logger/text.py:176 ../linkcheck/logger/text.py:188 #: ../linkcheck/gui/properties.py:36 ../linkcheck/gui/properties.py:38 #, python-format msgid "%.3f seconds" msgstr "%.3f Sekunden" -#: ../linkcheck/logger/html.py:241 ../linkcheck/logger/text.py:195 +#: ../linkcheck/logger/html.py:256 ../linkcheck/logger/text.py:212 msgid "Valid" msgstr "Gültig" -#: ../linkcheck/logger/html.py:246 ../linkcheck/logger/text.py:198 +#: ../linkcheck/logger/html.py:261 ../linkcheck/logger/text.py:215 msgid "Error" msgstr "Fehler" -#: ../linkcheck/logger/html.py:253 +#: ../linkcheck/logger/html.py:268 msgid "Statistics" msgstr "Statistik" -#: ../linkcheck/logger/html.py:255 ../linkcheck/logger/text.py:251 -#, python-format -msgid "Number of domains: %d" -msgstr "Anzahl von Domains: %d" - -#: ../linkcheck/logger/html.py:259 ../linkcheck/logger/text.py:254 +#: ../linkcheck/logger/html.py:271 ../linkcheck/logger/text.py:265 #, python-format msgid "" "Content types: %(image)d image, %(text)d text, %(video)d video, %(audio)d " @@ -314,107 +391,97 @@ msgstr "" "Inhalte: %(image)d Bild, %(text)d Text, %(video)d Video, %(audio)d Audio, " "%(application)d Anwendung, %(mail)d E-Mail und %(other)d andere Inhalte." -#: ../linkcheck/logger/html.py:263 ../linkcheck/logger/text.py:257 +#: ../linkcheck/logger/html.py:275 ../linkcheck/logger/text.py:268 #, python-format msgid "URL lengths: min=%(min)d, max=%(max)d, avg=%(avg)d." msgstr "URL Längen: min=%(min)d, max=%(max)d, mittel=%(avg)d" -#: ../linkcheck/logger/html.py:268 ../linkcheck/logger/text.py:262 +#: ../linkcheck/logger/html.py:280 ../linkcheck/logger/text.py:273 msgid "No statistics available since no URLs were checked." msgstr "Keine Statistik verfügbar, da keine URLs geprüft wurden." -#: ../linkcheck/logger/html.py:274 ../linkcheck/logger/text.py:206 +#: ../linkcheck/logger/html.py:286 ../linkcheck/logger/text.py:223 msgid "That's it." msgstr "Das war's." -#: ../linkcheck/logger/html.py:276 ../linkcheck/logger/text.py:207 +#: ../linkcheck/logger/html.py:288 ../linkcheck/logger/text.py:224 #, python-format msgid "%d link checked." msgid_plural "%d links checked." msgstr[0] "%d Verknüpfung überprüft." msgstr[1] "%d Verknüpfungen überprüft." -#: ../linkcheck/logger/html.py:279 ../linkcheck/logger/text.py:210 +#: ../linkcheck/logger/html.py:291 ../linkcheck/logger/text.py:227 #, python-format msgid "%d warning found" msgid_plural "%d warnings found" msgstr[0] "%d Warnung gefunden" msgstr[1] "%d Warnungen gefunden" -#: ../linkcheck/logger/html.py:282 ../linkcheck/logger/text.py:218 +#: ../linkcheck/logger/html.py:294 ../linkcheck/logger/text.py:235 #, python-format msgid " (%d ignored or duplicates not printed)" msgstr " (%d ignorierte oder doppelte Vorkommen nicht ausgegeben)" -#: ../linkcheck/logger/html.py:285 ../linkcheck/logger/text.py:221 +#: ../linkcheck/logger/html.py:297 ../linkcheck/logger/text.py:238 #, python-format msgid "%d error found" msgid_plural "%d errors found" msgstr[0] "%d Fehler gefunden" msgstr[1] "%d Fehler gefunden" -#: ../linkcheck/logger/html.py:288 ../linkcheck/logger/text.py:229 +#: ../linkcheck/logger/html.py:300 ../linkcheck/logger/text.py:246 #, python-format msgid " (%d duplicates not printed)" msgstr " (%d doppelte Vorkommen nicht ausgegeben)" -#: ../linkcheck/logger/html.py:294 ../linkcheck/logger/text.py:234 +#: ../linkcheck/logger/html.py:306 ../linkcheck/logger/text.py:251 #, python-format msgid "There was %(num)d internal error." msgid_plural "There were %(num)d internal errors." msgstr[0] "Es gab %(num)d internen Fehler." msgstr[1] "Es gab %(num)d interne Fehler." -#: ../linkcheck/logger/html.py:299 ../linkcheck/logger/text.py:238 -#: ../linkcheck/logger/__init__.py:386 +#: ../linkcheck/logger/html.py:311 ../linkcheck/logger/text.py:255 +#: ../linkcheck/logger/__init__.py:390 #, python-format msgid "Stopped checking at %(time)s (%(duration)s)" msgstr "Beende Prüfen am %(time)s (%(duration)s)" -#: ../linkcheck/logger/html.py:304 +#: ../linkcheck/logger/html.py:316 #, python-format msgid "Get the newest version at %s" msgstr "Die neueste Version gibt es unter %s" -#: ../linkcheck/logger/html.py:307 +#: ../linkcheck/logger/html.py:319 #, python-format msgid "Write comments and bugs to %s" msgstr "Schreiben Sie Kommentare und Fehler an %s" -#: ../linkcheck/logger/html.py:310 +#: ../linkcheck/logger/html.py:322 #, python-format msgid "Support this project at %s" msgstr "Unterstütze dieses Projekt unter %s" -#: ../linkcheck/logger/text.py:81 ../linkcheck/logger/__init__.py:374 +#: ../linkcheck/logger/text.py:98 ../linkcheck/logger/__init__.py:378 #, python-format msgid "Get the newest version at %(url)s" msgstr "Die neueste Version gibt es unter %(url)s" -#: ../linkcheck/logger/text.py:83 ../linkcheck/logger/__init__.py:376 +#: ../linkcheck/logger/text.py:100 ../linkcheck/logger/__init__.py:380 #, python-format msgid "Write comments and bugs to %(url)s" msgstr "Schreiben Sie Kommentare und Fehler an %(url)s" -#: ../linkcheck/logger/text.py:85 ../linkcheck/logger/__init__.py:378 +#: ../linkcheck/logger/text.py:102 ../linkcheck/logger/__init__.py:382 #, python-format msgid "Support this project at %(url)s" msgstr "Unterstütze dieses Projekt unter %(url)s" -#: ../linkcheck/logger/text.py:245 +#: ../linkcheck/logger/text.py:262 msgid "Statistics:" msgstr "Statistik:" -#: ../linkcheck/logger/text.py:247 -#, python-format -msgid "Downloaded: %s" -msgstr "Heruntergeladen: %s" - -#: ../linkcheck/logger/text.py:249 -#, python-format -msgid "Robots.txt cache: %s" -msgstr "Robots.txt-Cache: %s" - #: ../linkcheck/logger/__init__.py:31 msgid "Real URL" msgstr "Tats. URL" @@ -424,22 +491,22 @@ msgid "Cache key" msgstr "Cache Schlüssel" #: ../linkcheck/logger/__init__.py:33 -#: ../linkcheck/gui/linkchecker_ui_main.py:816 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:793 ../linkcheck/gui/urlmodel.py:22 msgid "Result" msgstr "Ergebnis" #: ../linkcheck/logger/__init__.py:34 -#: ../linkcheck/gui/linkchecker_ui_main.py:810 +#: ../linkcheck/gui/linkchecker_ui_main.py:787 msgid "Base" msgstr "Basis" #: ../linkcheck/logger/__init__.py:35 -#: ../linkcheck/gui/linkchecker_ui_main.py:808 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:785 ../linkcheck/gui/urlmodel.py:22 msgid "Name" msgstr "Name" #: ../linkcheck/logger/__init__.py:36 -#: ../linkcheck/gui/linkchecker_ui_main.py:809 +#: ../linkcheck/gui/linkchecker_ui_main.py:786 msgid "Parent URL" msgstr "Vater URL" @@ -448,32 +515,32 @@ msgid "Extern" msgstr "Extern" #: ../linkcheck/logger/__init__.py:38 -#: ../linkcheck/gui/linkchecker_ui_main.py:814 +#: ../linkcheck/gui/linkchecker_ui_main.py:791 msgid "Info" msgstr "Info" #: ../linkcheck/logger/__init__.py:39 -#: ../linkcheck/gui/linkchecker_ui_main.py:815 +#: ../linkcheck/gui/linkchecker_ui_main.py:792 msgid "Warning" msgstr "Warnung" #: ../linkcheck/logger/__init__.py:40 -#: ../linkcheck/gui/linkchecker_ui_main.py:812 +#: ../linkcheck/gui/linkchecker_ui_main.py:789 msgid "D/L time" msgstr "D/L Zeit" #: ../linkcheck/logger/__init__.py:41 -#: ../linkcheck/gui/linkchecker_ui_main.py:813 +#: ../linkcheck/gui/linkchecker_ui_main.py:790 msgid "Size" msgstr "Größe" #: ../linkcheck/logger/__init__.py:42 -#: ../linkcheck/gui/linkchecker_ui_main.py:811 +#: ../linkcheck/gui/linkchecker_ui_main.py:788 msgid "Check time" msgstr "Prüfzeit" #: ../linkcheck/logger/__init__.py:43 -#: ../linkcheck/gui/linkchecker_ui_main.py:807 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:784 ../linkcheck/gui/urlmodel.py:22 msgid "URL" msgstr "URL" @@ -482,202 +549,135 @@ msgid "Level" msgstr "Tiefe" #: ../linkcheck/logger/__init__.py:45 -#: ../linkcheck/gui/linkchecker_ui_main.py:817 +#: ../linkcheck/gui/linkchecker_ui_main.py:794 msgid "Modified" msgstr "Geändert" -#: ../linkcheck/logger/__init__.py:272 +#: ../linkcheck/logger/__init__.py:276 #, python-format msgid "Happy birthday for LinkChecker, I'm %d years old today!" msgstr "" "Herzlichen Glückwunsch zum Geburtstag, LinkChecker, ich bin heute %d Jahre " "alt geworden!" -#: ../linkcheck/logger/__init__.py:371 +#: ../linkcheck/logger/__init__.py:375 #, python-format msgid "created by %(app)s at %(time)s" msgstr "erstellt von %(app)s am %(time)s" -#: ../linkcheck/clamav.py:56 -msgid "clamd is not ready for stream scanning" -msgstr "clamd ist nicht bereit, einen Stream zu prüfen" - -#: ../linkcheck/clamav.py:126 -msgid "ScannerDaemonOutputFormat must be disabled" -msgstr "ScannerDaemonOutputFormat muss deaktiviert sein" - -#: ../linkcheck/clamav.py:128 -msgid "only one of TCPSocket and LocalSocket must be enabled" -msgstr "nur einer von TCPSocket oder LocalSocket muss aktiviert sein" - -#: ../linkcheck/clamav.py:157 -msgid "one of TCPSocket or LocalSocket must be enabled" -msgstr "einer von TCPSocket oder LocalSocket muss aktiviert sein" - -#: ../linkcheck/clamav.py:193 -msgid "Could not connect to ClamAV daemon." -msgstr "Konnte nicht zu ClamAV verbinden." - #: ../linkcheck/checker/telneturl.py:53 msgid "Host is empty" msgstr "Rechnername ist leer" #: ../linkcheck/checker/unknownurl.py:144 -msgid "Outside of domain filter, checked only syntax." -msgstr "Außerhalb des Domain Filters; prüfe lediglich Syntax." - -#: ../linkcheck/checker/unknownurl.py:146 #, python-format msgid "%(scheme)s URL ignored." msgstr "%(scheme)s URL ignoriert." -#: ../linkcheck/checker/unknownurl.py:150 +#: ../linkcheck/checker/unknownurl.py:148 msgid "URL is unrecognized or has invalid syntax" msgstr "URL ist unbekannt oder besitzt ungültige Syntax" -#: ../linkcheck/checker/const.py:117 +#: ../linkcheck/checker/const.py:106 msgid "The effective URL is different from the original." msgstr "Die effektive URL unterscheidet sich vom Original." -#: ../linkcheck/checker/const.py:119 +#: ../linkcheck/checker/const.py:108 msgid "Could not get the content of the URL." msgstr "Konnte den Inhalt der URL nicht bekommen." -#: ../linkcheck/checker/const.py:120 -msgid "URL anchor was not found." -msgstr "URL Anker wurde nicht gefunden." - -#: ../linkcheck/checker/const.py:122 -msgid "The warning regular expression was found in the URL contents." -msgstr "" -"Der reguläre Ausdruck für Warnungen wurde in den URL Inhalten gefunden." - -#: ../linkcheck/checker/const.py:123 -msgid "The URL content is a duplicate of another URL." -msgstr "Der URL-Inhalte ist ein Duplikat einer anderen URL." - -#: ../linkcheck/checker/const.py:124 +#: ../linkcheck/checker/const.py:109 msgid "The URL content size is too large." msgstr "Der URL Inhalt ist zu groß." -#: ../linkcheck/checker/const.py:125 +#: ../linkcheck/checker/const.py:110 msgid "The URL content size is zero." msgstr "Der URL Inhaltsgrößenangabe ist Null." -#: ../linkcheck/checker/const.py:126 +#: ../linkcheck/checker/const.py:111 msgid "The URL content size and download size are unequal." msgstr "" "Der URL Inhaltsgrößenangabe und die Download-Größe sind unterschiedlich." -#: ../linkcheck/checker/const.py:127 +#: ../linkcheck/checker/const.py:112 msgid "The URL is longer than the recommended size." msgstr "Die URL ist länger als die empfohlene Länge." -#: ../linkcheck/checker/const.py:128 +#: ../linkcheck/checker/const.py:113 msgid "The URL contains leading or trailing whitespace." msgstr "Die URL %(url)s enthält Leerzeichen am Anfang oder Ende." -#: ../linkcheck/checker/const.py:129 +#: ../linkcheck/checker/const.py:114 msgid "The file: URL is missing a trailing slash." msgstr "Der file: URL fehlt ein abschließender Schrägstrich." -#: ../linkcheck/checker/const.py:131 +#: ../linkcheck/checker/const.py:116 msgid "The file: path is not the same as the system specific path." msgstr "Der file: Pfad ist nicht derselbe wie der Systempfad." -#: ../linkcheck/checker/const.py:132 +#: ../linkcheck/checker/const.py:117 msgid "The ftp: URL is missing a trailing slash." msgstr "Der ftp: URL fehlt ein abschließender Schrägstrich." -#: ../linkcheck/checker/const.py:133 -msgid "The http: URL checking has been denied." -msgstr "Die http: URL-Überprüfung wurde verweigert." - -#: ../linkcheck/checker/const.py:134 -msgid "The URL has moved permanently." -msgstr "Die URL wurde dauerhaft verschoben." - -#: ../linkcheck/checker/const.py:136 -msgid "The URL has been redirected to an URL of a different type." -msgstr "Die URL wurde zu einem anderen URL-Typ umgeleitet." - -#: ../linkcheck/checker/const.py:137 +#: ../linkcheck/checker/const.py:118 msgid "The URL had no content." msgstr "Die URL besitzt keinen Inhalt." -#: ../linkcheck/checker/const.py:139 +#: ../linkcheck/checker/const.py:120 msgid "An error occurred while storing a cookie." msgstr "Ein Fehler trat auf während des Speicherns eines Cookies." -#: ../linkcheck/checker/const.py:141 +#: ../linkcheck/checker/const.py:122 msgid "An error occurred while decompressing the URL content." msgstr "Ein Fehler trat beim Dekomprimieren des URL Inhalts auf." -#: ../linkcheck/checker/const.py:143 +#: ../linkcheck/checker/const.py:124 msgid "The URL content is encoded with an unknown encoding." msgstr "Der URL-Inhalt ist in einer unbekannten Kodierung verfasst." -#: ../linkcheck/checker/const.py:145 +#: ../linkcheck/checker/const.py:126 msgid "Unsupported HTTP authentication method." msgstr "Nicht unterstützte HTTP Authentifizierungsmethode." -#: ../linkcheck/checker/const.py:147 ../linkcheck/checker/httpurl.py:243 +#: ../linkcheck/checker/const.py:128 msgid "Unauthorized access without HTTP authentication." msgstr "Unauthorisierter Zugriff ohne HTTP-Authentifizierung." -#: ../linkcheck/checker/const.py:148 +#: ../linkcheck/checker/const.py:129 msgid "The SSL certificate is invalid or expired." msgstr "Das SSL-Zertifikat ist ungültig oder abgelaufen." -#: ../linkcheck/checker/const.py:149 +#: ../linkcheck/checker/const.py:130 msgid "The URL has been ignored." msgstr "Die URL wurde ignoriert." -#: ../linkcheck/checker/const.py:150 +#: ../linkcheck/checker/const.py:131 msgid "The mail MX host could not be found." msgstr "Der MX Mail-Rechner konnte nicht gefunden werden." -#: ../linkcheck/checker/const.py:152 -msgid "The mailto: address could not be verified." -msgstr "Die mailto: Addresse konnte nicht überprüft werden." - -#: ../linkcheck/checker/const.py:154 -msgid "No connection to a MX host could be established." -msgstr "Es konnte keine Verbindung zu einem MX-Rechner hergestellt werden." - -#: ../linkcheck/checker/const.py:155 +#: ../linkcheck/checker/const.py:132 msgid "No NNTP server was found." msgstr "Es wurde kein NNTP Server gefunden." -#: ../linkcheck/checker/const.py:156 +#: ../linkcheck/checker/const.py:133 msgid "The NNTP newsgroup could not be found." msgstr "Die NNTP Nachrichtengruppe konnte nicht gefunden werden." -#: ../linkcheck/checker/const.py:157 +#: ../linkcheck/checker/const.py:134 msgid "The IP is obfuscated." msgstr "Die IP-Adresse ist verschleiert." -#: ../linkcheck/checker/const.py:158 -#, fuzzy -msgid "HTML syntax error." -msgstr "gültige HTML Syntax" - -#: ../linkcheck/checker/const.py:159 -#, fuzzy -msgid "CSS syntax error." -msgstr "gültige CSS Syntax" - -#: ../linkcheck/checker/mailtourl.py:87 +#: ../linkcheck/checker/mailtourl.py:84 #, python-format msgid "No mail addresses found in `%(url)s'." msgstr "Keine Adressen wurden in `%(url)s' gefunden." -#: ../linkcheck/checker/mailtourl.py:126 +#: ../linkcheck/checker/mailtourl.py:123 #, python-format msgid "Error parsing CGI values: %s" msgstr "Fehler beim Parsen der CGI-Werte: %s" -#: ../linkcheck/checker/mailtourl.py:149 +#: ../linkcheck/checker/mailtourl.py:146 #, python-format msgid "" "Mail address `%(addr)s' too long. Allowed 256 chars, was %(length)d chars." @@ -685,22 +685,22 @@ msgstr "" "E-Mail-Adresse `%(addr)s' ist zu lang. Erlaubt sind 256 Zeichen, es waren " "aber %(length)d Zeichen." -#: ../linkcheck/checker/mailtourl.py:153 +#: ../linkcheck/checker/mailtourl.py:150 #, python-format msgid "Missing `@' in mail address `%(addr)s'." msgstr "Fehlendes `@' in E-Mail-Adresse `%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:159 +#: ../linkcheck/checker/mailtourl.py:156 #, python-format msgid "Missing local part of mail address `%(addr)s'." msgstr "Fehlender lokaler Teil der E-Mail-Adresse `%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:163 +#: ../linkcheck/checker/mailtourl.py:160 #, python-format msgid "Missing domain part of mail address `%(addr)s'." msgstr "Fehlender Domänen-Teil der E-Mail-Adresse `%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:167 +#: ../linkcheck/checker/mailtourl.py:164 #, python-format msgid "" "Local part of mail address `%(addr)s' too long. Allowed 64 chars, was " @@ -709,7 +709,7 @@ msgstr "" "Lokaler Teil der E-Mail-Adresse `%(addr)s' ist zu lang. Erlaubt sind 64 " "Zeichen, es waren aber %(length)d Zeichen." -#: ../linkcheck/checker/mailtourl.py:171 +#: ../linkcheck/checker/mailtourl.py:168 #, python-format msgid "" "Domain part of mail address `%(addr)s' too long. Allowed 255 chars, was " @@ -718,35 +718,35 @@ msgstr "" "Domänen-Teil der E-Mail-Adresse `%(addr)s' ist zu lang. Erlaubt sind 255 " "Zeichen, es waren aber %(length)d Zeichen." -#: ../linkcheck/checker/mailtourl.py:180 +#: ../linkcheck/checker/mailtourl.py:177 #, python-format msgid "Unquoted double quote or backslash in mail address `%(addr)s'." msgstr "" "Nicht kodiertes doppeltes Anführungszeichen oder Escape in E-Mail-Adresse `" "%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:185 +#: ../linkcheck/checker/mailtourl.py:182 #, python-format msgid "Local part of mail address `%(addr)s' may not start with a dot." msgstr "" "Der lokale Teil der E-Mail-Adresse `%(addr)s' darf nicht mit einem Punkt " "beginnen." -#: ../linkcheck/checker/mailtourl.py:189 +#: ../linkcheck/checker/mailtourl.py:186 #, python-format msgid "Local part of mail address `%(addr)s' may not end with a dot." msgstr "" "Der lokale Teil der E-Mail-Adresse `%(addr)s' darf nicht mit einem Punkt " "enden." -#: ../linkcheck/checker/mailtourl.py:193 +#: ../linkcheck/checker/mailtourl.py:190 #, python-format msgid "Local part of mail address `%(addr)s' may not contain two dots." msgstr "" "Der lokale Teil der E-Mail-Adresse `%(addr)s' darf nicht zwei Punkte " "beinhalten." -#: ../linkcheck/checker/mailtourl.py:198 +#: ../linkcheck/checker/mailtourl.py:195 #, python-format msgid "" "Local part of mail address `%(addr)s' contains unquoted character `%(char)s." @@ -754,179 +754,129 @@ msgstr "" "Lokaler Teil der E-Mail-Adresse `%(addr)s' beinhaltet ein nicht kodiertes " "Zeichen `%(char)s." -#: ../linkcheck/checker/mailtourl.py:210 +#: ../linkcheck/checker/mailtourl.py:207 #, python-format msgid "Domain part of mail address `%(addr)s' has invalid IP." msgstr "" "Domänen-Teil der E-Mail-Adresse `%(addr)s' besitzt eine ungültige IP-Adresse." -#: ../linkcheck/checker/mailtourl.py:216 +#: ../linkcheck/checker/mailtourl.py:213 #, python-format msgid "Invalid domain part of mail address `%(addr)s'." msgstr "Ungültige Domänen-Teil der E-Mail-Adresse `%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:220 +#: ../linkcheck/checker/mailtourl.py:217 #, python-format msgid "Invalid top level domain part of mail address `%(addr)s'." msgstr "Ungültige Toplevel-Domänen-Teil der E-Mail-Adresse `%(addr)s'." -#: ../linkcheck/checker/mailtourl.py:258 +#: ../linkcheck/checker/mailtourl.py:255 #, python-format msgid "No MX mail host for %(domain)s found." msgstr "Kein MX mail host für %(domain)s gefunden." -#: ../linkcheck/checker/mailtourl.py:266 +#: ../linkcheck/checker/mailtourl.py:263 #, python-format msgid "No host for %(domain)s found." msgstr "Kein Rechner für %(domain)s gefunden." -#: ../linkcheck/checker/mailtourl.py:280 +#: ../linkcheck/checker/mailtourl.py:277 #, python-format msgid "Got invalid DNS answer %(answer)s for %(domain)s." msgstr "Ungültige DNS Antwort %(answer)s für %(domain)s erhalten." -#: ../linkcheck/checker/mailtourl.py:324 -#, python-format -msgid "Verified address %(mail)s: %(info)s." -msgstr "Gültige Adresse %(mail)s: %(info)s." - -#: ../linkcheck/checker/mailtourl.py:328 -#, python-format -msgid "Unverified but presumably valid address %(mail)s: %(info)s." -msgstr "Unverifizierte aber wahrscheinlich gültige Adresse %(mail)s: %(info)s." - -#: ../linkcheck/checker/mailtourl.py:331 -#, python-format -msgid "Unverified address: %(info)s." -msgstr "Unverifizierte Adresse: %(info)s." - -#: ../linkcheck/checker/mailtourl.py:335 -#, python-format -msgid "MX mail host %(host)s did not accept connections: %(error)s." -msgstr "" -"Der MX mail host %(host)s akzeptierte keine SMTP Verbindungen: %(error)s." - -#: ../linkcheck/checker/mailtourl.py:341 -msgid "Could not connect, but syntax is correct" -msgstr "Konnte nicht konnektieren, aber die Syntax ist korrekt" - -#: ../linkcheck/checker/mailtourl.py:344 -#, python-format -msgid "Found MX mail host %(host)s" -msgstr "MX Mail host %(host)s gefunden" +#: ../linkcheck/checker/mailtourl.py:289 +#, fuzzy +msgid "Valid mail address syntax" +msgstr "Ungültige Mail Syntax" -#: ../linkcheck/checker/urlbase.py:70 +#: ../linkcheck/checker/urlbase.py:67 #, python-format msgid "URL has unparsable domain name: %(name)s" msgstr "URL besitzt einen nicht analysierbaren Rechnernamen: %(name)s" -#: ../linkcheck/checker/urlbase.py:161 +#: ../linkcheck/checker/urlbase.py:147 #, python-format msgid "Leading or trailing whitespace in URL `%(url)s'." msgstr "Die URL %(url)s enthält Leerzeichen am Anfang oder Ende." -#: ../linkcheck/checker/urlbase.py:388 +#: ../linkcheck/checker/urlbase.py:340 msgid "URL is missing" msgstr "URL fehlt" -#: ../linkcheck/checker/urlbase.py:391 +#: ../linkcheck/checker/urlbase.py:343 msgid "URL is empty" msgstr "URL ist leer" -#: ../linkcheck/checker/urlbase.py:405 +#: ../linkcheck/checker/urlbase.py:357 #, python-format msgid "Effective URL %(url)r." msgstr "Effektive URL %(url)r." -#: ../linkcheck/checker/urlbase.py:411 +#: ../linkcheck/checker/urlbase.py:363 #, python-format msgid "URL length %(len)d is longer than maximum of %(max)d." msgstr "URL-Länge %(len)d ist länger als das Maximum von %(max)d." -#: ../linkcheck/checker/urlbase.py:414 +#: ../linkcheck/checker/urlbase.py:366 #, python-format msgid "URL length %(len)d is longer than %(warn)d." msgstr "URL-Länge %(len)d ist länger als %(warn)d." -#: ../linkcheck/checker/urlbase.py:466 +#: ../linkcheck/checker/urlbase.py:411 #, python-format msgid "URL host %(host)r has invalid port" msgstr "URL Rechner %(host)r hat eine ungültige Portnummer" -#: ../linkcheck/checker/urlbase.py:470 +#: ../linkcheck/checker/urlbase.py:418 msgid "URL has empty hostname" msgstr "URL hat leeren Rechnernamen" -#: ../linkcheck/checker/urlbase.py:481 +#: ../linkcheck/checker/urlbase.py:440 #, python-format msgid "URL %(url)s has obfuscated IP address %(ip)s" msgstr "URL %(url)s besitzt die verschleierte IP-Adresse %(ip)s" -#: ../linkcheck/checker/urlbase.py:508 -#, python-format -msgid "URL is located in %(country)s." -msgstr "URL befindet sich in %(country)s." - -#: ../linkcheck/checker/urlbase.py:533 +#: ../linkcheck/checker/urlbase.py:478 msgid "Hostname not found" msgstr "Rechnername nicht gefunden" -#: ../linkcheck/checker/urlbase.py:536 ../linkcheck/checker/urlbase.py:554 -#, python-format -msgid "Bad HTTP response %(line)r" -msgstr "Ungültige HTTP Antwort %(line)r" - -#: ../linkcheck/checker/urlbase.py:539 +#: ../linkcheck/checker/urlbase.py:481 #, fuzzy, python-format msgid "Bad hostname %(host)r: %(msg)s" msgstr "konnte Rechnernamen %(host)r nicht parsen: %(msg)s" -#: ../linkcheck/checker/urlbase.py:555 +#: ../linkcheck/checker/urlbase.py:494 #, python-format msgid "could not get content: %(msg)s" msgstr "konnte Inhalt nicht lesen: %(msg)s" -#: ../linkcheck/checker/urlbase.py:700 -#, python-format -msgid "Anchor `%(name)s' not found." -msgstr "Anker `%(name)s' nicht gefunden." +#: ../linkcheck/checker/urlbase.py:625 +#, fuzzy +msgid "The URL is outside of the domain filter, checked only syntax." +msgstr "" +"Die Weiterleitungs-URL ist außerhalb des Domain Filters; prüfe lediglich " +"Syntax." -#: ../linkcheck/checker/urlbase.py:701 -#, python-format -msgid "Available anchors: %(anchors)s." -msgstr "Verfügbare Anker: %(anchors)s." +#: ../linkcheck/checker/urlbase.py:628 +msgid "filtered" +msgstr "gefiltert" -#: ../linkcheck/checker/urlbase.py:760 ../linkcheck/checker/urlbase.py:763 -#: ../linkcheck/checker/fileurl.py:207 ../linkcheck/checker/httpurl.py:674 -#: ../linkcheck/checker/httpurl.py:682 +#: ../linkcheck/checker/urlbase.py:646 ../linkcheck/checker/urlbase.py:660 +#: ../linkcheck/checker/fileurl.py:208 ../linkcheck/checker/httpurl.py:227 msgid "File size too large" msgstr "Dateigröße ist zu groß" -#: ../linkcheck/checker/urlbase.py:780 -#, python-format -msgid " with %s" -msgstr " mit %s" - -#: ../linkcheck/checker/urlbase.py:782 -#, python-format -msgid "Content%(size)s is the same as in URLs (%(urls)s)." -msgstr "Inhalt%(size)s ist derselbe wie in den URLs (%(urls)s)." - -#: ../linkcheck/checker/urlbase.py:822 -#, python-format -msgid "Found %(match)r at line %(line)d in link contents." -msgstr "Habe %(match)r in Zeile %(line)d im Inhalt der Verknüpfung gefunden." - -#: ../linkcheck/checker/urlbase.py:838 +#: ../linkcheck/checker/urlbase.py:679 msgid "Content size is zero." msgstr "Größe des Inhalts ist Null." -#: ../linkcheck/checker/urlbase.py:844 +#: ../linkcheck/checker/urlbase.py:685 #, python-format msgid "Content size %(dlsize)s is larger than %(maxbytes)s." msgstr "Inhalt %(dlsize)s is größer als %(maxbytes)s." -#: ../linkcheck/checker/urlbase.py:849 +#: ../linkcheck/checker/urlbase.py:690 #, python-format msgid "" "Download size (%(dlsize)d Byte) does not equal content size (%(size)d Byte)." @@ -934,38 +884,12 @@ msgstr "" "Download Grüße (%(dlsize)d Byte) ist ungleich der Inhaltsgröße (%(size)d " "Byte)." -#: ../linkcheck/checker/urlbase.py:861 +#: ../linkcheck/checker/urlbase.py:710 #, python-format -msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s" +msgid "More than %(num)d child URLs found, skipping the rest." msgstr "" -"%(w3type)s Validierungsfehler in Zeile %(line)s Spalte %(column)s: %(msg)s" - -#: ../linkcheck/checker/urlbase.py:882 -msgid "valid HTML syntax" -msgstr "gültige HTML Syntax" - -#: ../linkcheck/checker/urlbase.py:890 -#, python-format -msgid "HTML W3C validation caused error: %(msg)s " -msgstr "HTML W3C Validierung verursachte Fehler: %(msg)s" - -#: ../linkcheck/checker/urlbase.py:917 -msgid "valid CSS syntax" -msgstr "gültige CSS Syntax" -#: ../linkcheck/checker/urlbase.py:925 -#, python-format -msgid "CSS W3C validation caused error: %(msg)s " -msgstr "CSS W3C Validierung verursachte Fehler: %(msg)s" - -#: ../linkcheck/checker/urlbase.py:978 -#, python-format -msgid "%(num)d URL parsed." -msgid_plural "%(num)d URLs parsed." -msgstr[0] "%(num)d URL geparst." -msgstr[1] "%(num)d URLs geparst." - -#: ../linkcheck/checker/urlbase.py:1124 +#: ../linkcheck/checker/urlbase.py:772 #, python-format msgid "URL has unparsable domain name: %(domain)s" msgstr "URL besitzt einen nicht analysierbaren Rechnernamen: %(domain)s" @@ -995,58 +919,20 @@ msgstr "" msgid "%(host)r could not be resolved" msgstr "Der MX Mail-Rechner konnte nicht gefunden werden." -#: ../linkcheck/checker/httpsurl.py:38 -#, python-format -msgid "%s URL ignored." -msgstr "%s URL ignoriert." - -#: ../linkcheck/checker/httpsurl.py:55 -msgid "empty or no certificate found" -msgstr "leeres oder kein Zertifikat gefunden" - -#: ../linkcheck/checker/httpsurl.py:61 -msgid "certificate did not include \"subject\" information" -msgstr "Zertifikat besitzt keine \"subject\"-Information" - -#: ../linkcheck/checker/httpsurl.py:66 -msgid "certificate did not include \"notAfter\" information" -msgstr "Zertifikat besitzt keine \"notAfter\"-Information" - -#: ../linkcheck/checker/httpsurl.py:87 -#, python-format -msgid "invalid certficate \"notAfter\" value %r" -msgstr "ungültiger \"notAfter\" Zertifikatwert %r" - -#: ../linkcheck/checker/httpsurl.py:95 -#, python-format -msgid "certficate is expired on %s" -msgstr "Zertifikat ist am %s abgelaufen" - -#: ../linkcheck/checker/httpsurl.py:100 -#, python-format -msgid "certificate is only %s valid" -msgstr "Zertifikat ist nur noch %s gültig" - -#: ../linkcheck/checker/httpsurl.py:106 -#, python-format -msgid "SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s." -msgstr "" -"SSL Warnung: %(msg)s. Verschlüsselungsverfahren %(cipher)s, %(protocol)s." - -#: ../linkcheck/checker/ftpurl.py:96 +#: ../linkcheck/checker/ftpurl.py:91 msgid "Got no answer from FTP server" msgstr "Keine Antwort vom FTP Server" -#: ../linkcheck/checker/ftpurl.py:99 +#: ../linkcheck/checker/ftpurl.py:94 #, python-format msgid "Remote host has closed connection: %(msg)s" msgstr "Entfernter Rechner hat die Verbindung geschlossen: %(msg)s" -#: ../linkcheck/checker/ftpurl.py:142 +#: ../linkcheck/checker/ftpurl.py:137 msgid "Missing trailing directory slash in ftp url." msgstr "Fehlender / am Ende der FTP url." -#: ../linkcheck/checker/ftpurl.py:224 +#: ../linkcheck/checker/ftpurl.py:209 msgid "FTP file size too large" msgstr "FTP Dateigröße ist zu groß" @@ -1071,7 +957,7 @@ msgstr "" msgid "directory" msgstr "Verzeichnis" -#: ../linkcheck/checker/fileurl.py:197 +#: ../linkcheck/checker/fileurl.py:198 #, python-format msgid "" "The URL path %(path)r is not the same as the system path %(realpath)r. You " @@ -1080,127 +966,26 @@ msgstr "" "Der URL Pfad %(path)r ist nicht derselbe wie der Systempfad %(realpath)r. " "Sie sollten immer den Systempfad in URLs benutzen." -#: ../linkcheck/checker/httpurl.py:142 -msgid "Access denied by robots.txt, skipping content checks." -msgstr "" -"Zugriff verweigert durch robots.txt; lasse das Prüfen des URL Inhalts aus." - -#: ../linkcheck/checker/httpurl.py:191 -#, python-format -msgid "Enforced proxy `%(name)s'." -msgstr "Erzwungener Proxy `%(name)s'." - -#: ../linkcheck/checker/httpurl.py:197 -msgid "Missing 'Location' header with enforced proxy status 305, aborting." -msgstr "" -"Fehlender 'Location'-Header mit erzwungenem Proxystatus 305, breche ab." - -#: ../linkcheck/checker/httpurl.py:202 -msgid "Empty 'Location' header value with enforced proxy status 305, aborting." -msgstr "Leerer 'Location'-Header mit erzwungenem Proxystatus 305, breche ab." - -#: ../linkcheck/checker/httpurl.py:227 -#, python-format -msgid "more than %d redirections, aborting" -msgstr "mehr als %d Weiterleitungen, breche ab" - -#: ../linkcheck/checker/httpurl.py:250 -#, python-format -msgid "" -"Unsupported HTTP authentication `%(auth)s', only `Basic' authentication is " -"supported." -msgstr "" -"Nicht unterstützte HTTP Authentifizierungsmethode `%(auth)s', nur `Basic' " -"Authentifizierung ist unterstützt." - -#: ../linkcheck/checker/httpurl.py:331 -#, python-format -msgid "Redirected to `%(url)s'." -msgstr "Zu `%(url)s' umgeleitet." - -#: ../linkcheck/checker/httpurl.py:373 -#, python-format -msgid "Redirection to url `%(newurl)s' is not allowed." -msgstr "Umleitung zu `%(newurl)s' ist nicht erlaubt." - -#: ../linkcheck/checker/httpurl.py:375 ../linkcheck/checker/httpurl.py:409 -#: ../linkcheck/checker/httpurl.py:443 -msgid "syntax OK" -msgstr "Syntax OK" - -#: ../linkcheck/checker/httpurl.py:394 -msgid "" -"The redirected URL is outside of the domain filter, checked only syntax." -msgstr "" -"Die Weiterleitungs-URL ist außerhalb des Domain Filters; prüfe lediglich " -"Syntax." - -#: ../linkcheck/checker/httpurl.py:396 -msgid "filtered" -msgstr "gefiltert" - -#: ../linkcheck/checker/httpurl.py:407 -msgid "Access to redirected URL denied by robots.txt, checked only syntax." +#: ../linkcheck/checker/httpurl.py:120 +#, fuzzy +msgid "Access denied by robots.txt, checked only syntax." msgstr "" "Zugriff zur Weiterleitungs-URL verweigert durch robots.txt; prüfe lediglich " "Syntax." -#: ../linkcheck/checker/httpurl.py:425 -#, python-format -msgid "" -"recursive redirection encountered:\n" -" %(urls)s" -msgstr "" -"Rekursive Weiterleitung entdeckt:\n" -" %(urls)s" +#: ../linkcheck/checker/httpurl.py:121 +msgid "syntax OK" +msgstr "Syntax OK" -#: ../linkcheck/checker/httpurl.py:438 +#: ../linkcheck/checker/httpurl.py:184 #, python-format -msgid "" -"Redirection to URL `%(newurl)s' with different scheme found; the original " -"URL was `%(url)s'." -msgstr "" -"Weiterleitung zu URL `%(newurl)s' mit anderem Schema gefunden; die Original-" -"URL war `%(url)s'." - -#: ../linkcheck/checker/httpurl.py:453 -msgid "HTTP 301 (moved permanent) encountered: you should update this link." -msgstr "" -"HTTP 301 (moved permanent) gefunden: sie sollten diesen Link aktualisieren." +msgid "Redirected to `%(url)s'." +msgstr "Zu `%(url)s' umgeleitet." -#: ../linkcheck/checker/httpurl.py:484 +#: ../linkcheck/checker/httpurl.py:219 msgid "OK" msgstr "OK" -#: ../linkcheck/checker/httpurl.py:583 -#, python-format -msgid "Sent Cookie: %(cookie)s." -msgstr "Gesendetes Cookie: %(cookie)s." - -#: ../linkcheck/checker/httpurl.py:589 -#, python-format -msgid "Could not store cookies from headers: %(error)s." -msgstr "Konnte Cookies nicht aus Kopfdaten speichern: %(error)s." - -#: ../linkcheck/checker/httpurl.py:648 -#, python-format -msgid "Unsupported HTTP url scheme `%(scheme)s'" -msgstr "Nicht unterstütztes HTTP URL Schema `%(scheme)s'" - -#: ../linkcheck/checker/httpurl.py:669 -msgid "Redirection error" -msgstr "Weiterleitungsfehler" - -#: ../linkcheck/checker/httpurl.py:695 -#, python-format -msgid "Decompress error %(err)s" -msgstr "Entkomprimierungsfehler %(err)s" - -#: ../linkcheck/checker/httpurl.py:710 -#, python-format -msgid "Unsupported content encoding `%(encoding)s'." -msgstr "Content-Encoding `%(encoding)s' wird nicht unterstützt." - #: ../linkcheck/checker/nntpurl.py:45 msgid "No NNTP server was specified, skipping this URL." msgstr "Kein NNTP Server angegeben; prüfe lediglich Syntax." @@ -1224,35 +1009,35 @@ msgstr "Keine Newsgroup in der NNTP URL spezifiziert." msgid "NNTP server too busy; tried more than %d times." msgstr "NNTP Server zu beschäftigt; habe es mehr als %d mal versucht." -#: ../linkcheck/__init__.py:150 +#: ../linkcheck/__init__.py:152 msgid "CRITICAL" msgstr "KRITISCH" -#: ../linkcheck/__init__.py:151 +#: ../linkcheck/__init__.py:153 msgid "ERROR" msgstr "FEHLER" -#: ../linkcheck/__init__.py:152 +#: ../linkcheck/__init__.py:154 msgid "WARN" msgstr "WARN" -#: ../linkcheck/__init__.py:153 +#: ../linkcheck/__init__.py:155 msgid "WARNING" msgstr "WARNUNG" -#: ../linkcheck/__init__.py:154 +#: ../linkcheck/__init__.py:156 msgid "INFO" msgstr "INFO" -#: ../linkcheck/__init__.py:155 +#: ../linkcheck/__init__.py:157 msgid "DEBUG" msgstr "DEBUG" -#: ../linkcheck/__init__.py:156 +#: ../linkcheck/__init__.py:158 msgid "NOTSET" msgstr "NICHTS" -#: ../linkcheck/__init__.py:167 +#: ../linkcheck/__init__.py:169 msgid "Running as root user; dropping privileges by changing user to nobody." msgstr "" "Laufe als Benutzer root; Privilegien werden aufgegeben indem auf Benutzer " @@ -1262,240 +1047,236 @@ msgstr "" msgid "LinkChecker debug log" msgstr "LinkChecker Debugausgabe" -#: ../linkcheck/gui/linkchecker_ui_main.py:793 +#: ../linkcheck/gui/linkchecker_ui_main.py:770 msgid "LinkChecker" msgstr "LinkChecker" -#: ../linkcheck/gui/linkchecker_ui_main.py:794 +#: ../linkcheck/gui/linkchecker_ui_main.py:771 msgid "URL:" msgstr "URL:" -#: ../linkcheck/gui/linkchecker_ui_main.py:795 +#: ../linkcheck/gui/linkchecker_ui_main.py:772 msgid "Start checking the given URL." msgstr "Beginne, die gegebene URL zu Prüfen" -#: ../linkcheck/gui/linkchecker_ui_main.py:797 +#: ../linkcheck/gui/linkchecker_ui_main.py:774 msgid "URLs: " msgstr "URLs:" -#: ../linkcheck/gui/linkchecker_ui_main.py:798 +#: ../linkcheck/gui/linkchecker_ui_main.py:775 msgid "active" msgstr "aktiv" -#: ../linkcheck/gui/linkchecker_ui_main.py:799 -#: ../linkcheck/gui/linkchecker_ui_main.py:801 -#: ../linkcheck/gui/linkchecker_ui_main.py:803 +#: ../linkcheck/gui/linkchecker_ui_main.py:776 +#: ../linkcheck/gui/linkchecker_ui_main.py:778 +#: ../linkcheck/gui/linkchecker_ui_main.py:780 msgid "0" msgstr "0" -#: ../linkcheck/gui/linkchecker_ui_main.py:800 +#: ../linkcheck/gui/linkchecker_ui_main.py:777 msgid "queued" msgstr "wartend" -#: ../linkcheck/gui/linkchecker_ui_main.py:802 +#: ../linkcheck/gui/linkchecker_ui_main.py:779 msgid "checked" msgstr "geprüft" -#: ../linkcheck/gui/linkchecker_ui_main.py:804 +#: ../linkcheck/gui/linkchecker_ui_main.py:781 msgid "Info:" msgstr "Info:" -#: ../linkcheck/gui/linkchecker_ui_main.py:805 +#: ../linkcheck/gui/linkchecker_ui_main.py:782 msgid "-" msgstr "-" -#: ../linkcheck/gui/linkchecker_ui_main.py:806 +#: ../linkcheck/gui/linkchecker_ui_main.py:783 msgid "URL properties" msgstr "URL Eigenschaften" -#: ../linkcheck/gui/linkchecker_ui_main.py:818 +#: ../linkcheck/gui/linkchecker_ui_main.py:795 msgid "Check results" msgstr "Prüfergebnisse" -#: ../linkcheck/gui/linkchecker_ui_main.py:819 +#: ../linkcheck/gui/linkchecker_ui_main.py:796 msgid "Valid URLs" msgstr "Gültige URLs" -#: ../linkcheck/gui/linkchecker_ui_main.py:820 +#: ../linkcheck/gui/linkchecker_ui_main.py:797 msgid "Warnings" msgstr "Warnungen" -#: ../linkcheck/gui/linkchecker_ui_main.py:821 +#: ../linkcheck/gui/linkchecker_ui_main.py:798 msgid "Invalid URLs" msgstr "Ungültige URLs" -#: ../linkcheck/gui/linkchecker_ui_main.py:822 +#: ../linkcheck/gui/linkchecker_ui_main.py:799 msgid "Content type statistics" msgstr "Inhaltstatistik" -#: ../linkcheck/gui/linkchecker_ui_main.py:823 +#: ../linkcheck/gui/linkchecker_ui_main.py:800 msgid "Image" msgstr "Bild" -#: ../linkcheck/gui/linkchecker_ui_main.py:824 +#: ../linkcheck/gui/linkchecker_ui_main.py:801 msgid "Text" msgstr "Text" -#: ../linkcheck/gui/linkchecker_ui_main.py:825 +#: ../linkcheck/gui/linkchecker_ui_main.py:802 msgid "Application" msgstr "Anwendung" -#: ../linkcheck/gui/linkchecker_ui_main.py:826 +#: ../linkcheck/gui/linkchecker_ui_main.py:803 msgid "Audio" msgstr "Audio" -#: ../linkcheck/gui/linkchecker_ui_main.py:827 +#: ../linkcheck/gui/linkchecker_ui_main.py:804 msgid "Video" msgstr "Video" -#: ../linkcheck/gui/linkchecker_ui_main.py:828 +#: ../linkcheck/gui/linkchecker_ui_main.py:805 msgid "Other" msgstr "Andere" -#: ../linkcheck/gui/linkchecker_ui_main.py:829 +#: ../linkcheck/gui/linkchecker_ui_main.py:806 msgid "Mail" msgstr "E-Mail" -#: ../linkcheck/gui/linkchecker_ui_main.py:830 +#: ../linkcheck/gui/linkchecker_ui_main.py:807 msgid "URL statistics" msgstr "URL Statistik" -#: ../linkcheck/gui/linkchecker_ui_main.py:831 +#: ../linkcheck/gui/linkchecker_ui_main.py:808 msgid "Min. length" msgstr "Min. Länge" -#: ../linkcheck/gui/linkchecker_ui_main.py:832 +#: ../linkcheck/gui/linkchecker_ui_main.py:809 msgid "Avg. length" msgstr "Durchschn. Länge" -#: ../linkcheck/gui/linkchecker_ui_main.py:833 +#: ../linkcheck/gui/linkchecker_ui_main.py:810 msgid "Max. length" msgstr "Max. Länge" -#: ../linkcheck/gui/linkchecker_ui_main.py:834 -msgid "Domains" -msgstr "Domains" - -#: ../linkcheck/gui/linkchecker_ui_main.py:835 +#: ../linkcheck/gui/linkchecker_ui_main.py:811 msgid "&Edit" msgstr "&Bearbeiten" -#: ../linkcheck/gui/linkchecker_ui_main.py:836 +#: ../linkcheck/gui/linkchecker_ui_main.py:812 #: ../linkcheck/gui/linkchecker_ui_editor.py:34 msgid "&File" msgstr "&Datei" -#: ../linkcheck/gui/linkchecker_ui_main.py:837 -#: ../linkcheck/gui/linkchecker_ui_main.py:840 +#: ../linkcheck/gui/linkchecker_ui_main.py:813 +#: ../linkcheck/gui/linkchecker_ui_main.py:816 msgid "&Help" msgstr "&Hilfe" -#: ../linkcheck/gui/linkchecker_ui_main.py:838 +#: ../linkcheck/gui/linkchecker_ui_main.py:814 msgid "A&bout" msgstr "Ü&ber" -#: ../linkcheck/gui/linkchecker_ui_main.py:839 +#: ../linkcheck/gui/linkchecker_ui_main.py:815 msgid "About" msgstr "Über" -#: ../linkcheck/gui/linkchecker_ui_main.py:841 +#: ../linkcheck/gui/linkchecker_ui_main.py:817 msgid "Help" msgstr "Hilfe" -#: ../linkcheck/gui/linkchecker_ui_main.py:842 +#: ../linkcheck/gui/linkchecker_ui_main.py:818 msgid "View online" msgstr "Online anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:843 +#: ../linkcheck/gui/linkchecker_ui_main.py:819 msgid "View URL online" msgstr "URL Online anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:844 +#: ../linkcheck/gui/linkchecker_ui_main.py:820 msgid "&Options" msgstr "&Optionen" -#: ../linkcheck/gui/linkchecker_ui_main.py:845 +#: ../linkcheck/gui/linkchecker_ui_main.py:821 #: ../linkcheck/gui/linkchecker_ui_options.py:137 msgid "Options" msgstr "Optionen" -#: ../linkcheck/gui/linkchecker_ui_main.py:846 +#: ../linkcheck/gui/linkchecker_ui_main.py:822 msgid "Copy to clipboard" msgstr "URL kopieren" -#: ../linkcheck/gui/linkchecker_ui_main.py:847 +#: ../linkcheck/gui/linkchecker_ui_main.py:823 msgid "Copy URL to clipboard" msgstr "URL in die Zwischenablage kopieren" -#: ../linkcheck/gui/linkchecker_ui_main.py:848 +#: ../linkcheck/gui/linkchecker_ui_main.py:824 msgid "Ctrl+C" msgstr "Strg+C" -#: ../linkcheck/gui/linkchecker_ui_main.py:849 +#: ../linkcheck/gui/linkchecker_ui_main.py:825 msgid "View parent online" msgstr "Vater-URL Online anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:850 +#: ../linkcheck/gui/linkchecker_ui_main.py:826 msgid "View parent URL online" msgstr "Vater-URL Online anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:851 +#: ../linkcheck/gui/linkchecker_ui_main.py:827 msgid "View parent source" msgstr "Quellcode der Vater-URL anzeigen" -#: ../linkcheck/gui/linkchecker_ui_main.py:852 +#: ../linkcheck/gui/linkchecker_ui_main.py:828 msgid "View parent URL source" msgstr "Quellcode der Vater-URL anzeigen" -#: ../linkcheck/gui/linkchecker_ui_main.py:853 +#: ../linkcheck/gui/linkchecker_ui_main.py:829 msgid "Show debug" msgstr "Zeige Debug" -#: ../linkcheck/gui/linkchecker_ui_main.py:854 +#: ../linkcheck/gui/linkchecker_ui_main.py:830 msgid "View properties" msgstr "Eigenschaften anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:855 +#: ../linkcheck/gui/linkchecker_ui_main.py:831 msgid "View URL properties" msgstr "URL Eigenschaften anschauen" -#: ../linkcheck/gui/linkchecker_ui_main.py:856 +#: ../linkcheck/gui/linkchecker_ui_main.py:832 msgid "Save &results..." msgstr "E&rgebnisse speichern..." -#: ../linkcheck/gui/linkchecker_ui_main.py:857 +#: ../linkcheck/gui/linkchecker_ui_main.py:833 msgid "&Quit" msgstr "&Beenden" -#: ../linkcheck/gui/linkchecker_ui_main.py:858 +#: ../linkcheck/gui/linkchecker_ui_main.py:834 msgid "Ctrl+Q" msgstr "Strg+Q" -#: ../linkcheck/gui/linkchecker_ui_main.py:859 +#: ../linkcheck/gui/linkchecker_ui_main.py:835 msgid "Check for updates" msgstr "Prüfe auf Updates" -#: ../linkcheck/gui/linkchecker_ui_main.py:860 +#: ../linkcheck/gui/linkchecker_ui_main.py:836 msgid "Donate" msgstr "Spenden" -#: ../linkcheck/gui/linkchecker_ui_main.py:861 +#: ../linkcheck/gui/linkchecker_ui_main.py:837 msgid "&Open project..." msgstr "&Projekt öffnen..." -#: ../linkcheck/gui/linkchecker_ui_main.py:862 +#: ../linkcheck/gui/linkchecker_ui_main.py:838 msgid "Open project" msgstr "Projekt öffnen" -#: ../linkcheck/gui/linkchecker_ui_main.py:863 +#: ../linkcheck/gui/linkchecker_ui_main.py:839 msgid "Ctrl+O" msgstr "Strg+O" -#: ../linkcheck/gui/linkchecker_ui_main.py:864 +#: ../linkcheck/gui/linkchecker_ui_main.py:840 msgid "&Save project..." msgstr "Projekt &speichern..." -#: ../linkcheck/gui/linkchecker_ui_main.py:865 +#: ../linkcheck/gui/linkchecker_ui_main.py:841 #: ../linkcheck/gui/linkchecker_ui_editor.py:50 msgid "Ctrl+S" msgstr "Strg+S" @@ -1584,23 +1365,23 @@ msgstr "Datei nicht gefunden" msgid "Parent" msgstr "Vater" -#: ../linkcheck/gui/urlsave.py:21 +#: ../linkcheck/gui/urlsave.py:19 msgid "HTML output (*.html)" msgstr "HTML Ausgabe (*.html)" -#: ../linkcheck/gui/urlsave.py:22 +#: ../linkcheck/gui/urlsave.py:20 msgid "Text output (*.txt)" msgstr "Text Ausgabe (*.txt)" -#: ../linkcheck/gui/urlsave.py:23 +#: ../linkcheck/gui/urlsave.py:21 msgid "XML output (*.xml)" msgstr "XML Ausgabe (*.xml)" -#: ../linkcheck/gui/urlsave.py:24 +#: ../linkcheck/gui/urlsave.py:22 msgid "CSV output (*.csv)" msgstr "CSV Ausgabe (*.csv)" -#: ../linkcheck/gui/urlsave.py:59 +#: ../linkcheck/gui/urlsave.py:68 msgid "Save check results" msgstr "Prüfergebnisse speichern" @@ -1612,36 +1393,36 @@ msgstr "LinkChecker Quellcodeanzeige" msgid "&Save" msgstr "&Speichern" -#: ../linkcheck/gui/__init__.py:160 ../linkcheck/gui/__init__.py:487 +#: ../linkcheck/gui/__init__.py:160 ../linkcheck/gui/__init__.py:492 msgid "Ready." msgstr "Bereit." -#: ../linkcheck/gui/__init__.py:178 +#: ../linkcheck/gui/__init__.py:180 msgid "Check finished." msgstr "Prüfung beendet." -#: ../linkcheck/gui/__init__.py:294 +#: ../linkcheck/gui/__init__.py:296 msgid "Start" msgstr "Start" -#: ../linkcheck/gui/__init__.py:320 +#: ../linkcheck/gui/__init__.py:322 msgid "Stop" msgstr "Anhalten" -#: ../linkcheck/gui/__init__.py:366 +#: ../linkcheck/gui/__init__.py:369 msgid "yes" msgstr "ja" -#: ../linkcheck/gui/__init__.py:366 +#: ../linkcheck/gui/__init__.py:369 msgid "no" msgstr "nein" -#: ../linkcheck/gui/__init__.py:369 +#: ../linkcheck/gui/__init__.py:372 #, python-format msgid "About %(appname)s" msgstr "Über %(appname)s" -#: ../linkcheck/gui/__init__.py:370 +#: ../linkcheck/gui/__init__.py:373 #, python-format msgid "" "
\n" @@ -1672,45 +1453,45 @@ msgstr "" "Spende in Betracht ziehen. Vielen Dank!\n" "" -#: ../linkcheck/gui/__init__.py:422 +#: ../linkcheck/gui/__init__.py:427 #, python-format msgid "Closing active URLs with timeout %s..." msgstr "Schließe aktive URLs mit Timeout %s..." -#: ../linkcheck/gui/__init__.py:437 ../linkchecker:663 +#: ../linkcheck/gui/__init__.py:442 ../linkchecker:624 msgid "Dumping memory statistics..." msgstr "Generiere Speicherabzug..." -#: ../linkcheck/gui/__init__.py:439 +#: ../linkcheck/gui/__init__.py:444 msgid "LinkChecker memory dump written" msgstr "LinkChecker Speicherabzug geschrieben" -#: ../linkcheck/gui/__init__.py:440 ../linkchecker:665 +#: ../linkcheck/gui/__init__.py:445 ../linkchecker:626 #, python-format msgid "The memory dump has been written to `%(filename)s'." msgstr "Der Speicherabzug wurde in Datei `%(filename)s' geschrieben." -#: ../linkcheck/gui/__init__.py:463 +#: ../linkcheck/gui/__init__.py:468 msgid "Error, empty URL" msgstr "Fehler, leere URL" -#: ../linkcheck/gui/__init__.py:465 +#: ../linkcheck/gui/__init__.py:470 #, python-format msgid "Checking '%s'." msgstr "Prüfe '%s'" -#: ../linkcheck/gui/__init__.py:484 +#: ../linkcheck/gui/__init__.py:489 #, python-format msgid "%d URL selected." msgid_plural "%d URLs selected" msgstr[0] "%4d Verknüpfung ausgewählt" msgstr[1] "%4d Verknüpfungen ausgewählt" -#: ../linkcheck/gui/__init__.py:571 +#: ../linkcheck/gui/__init__.py:576 msgid "LinkChecker internal error" msgstr "LinkChecker interner Fehler" -#: ../linkcheck/gui/linkchecker_ui_options.py:138 ../linkchecker:349 +#: ../linkcheck/gui/linkchecker_ui_options.py:138 ../linkchecker:319 msgid "Checking options" msgstr "Prüf-Optionen" @@ -1825,12 +1606,12 @@ msgstr "Konnte Projekt %(filename)s nicht laden: %(err)s" msgid "Project file %(filename)s loaded successfully." msgstr "Projektdatei %(filename)s erfolgreich geladen." -#: ../linkcheck/cmdline.py:36 +#: ../linkcheck/cmdline.py:59 #, python-format msgid "Error: %(msg)s" msgstr "Fehler: %(msg)s" -#: ../linkcheck/cmdline.py:37 +#: ../linkcheck/cmdline.py:60 #, python-format msgid "Execute '%(program)s -h' for help" msgstr "Führen Sie '%(program)s -h' aus, um Hilfe zu erhalten" @@ -1942,20 +1723,6 @@ msgstr "" "Konnte Modul %(module)s für %(feature)s nicht importieren. Installieren Sie " "%(module)s von %(url)s, um dieses Feature zu nutzen." -#: ../linkcheck/strformat.py:332 -#, python-format -msgid "%d hit" -msgid_plural "%d hits" -msgstr[0] "%d Teffer" -msgstr[1] "%d Treffer" - -#: ../linkcheck/strformat.py:333 -#, python-format -msgid "%d miss" -msgid_plural "%d misses" -msgstr[0] "%d Verfehlung" -msgstr[1] "%d Verfehlungen" - #: ../linkchecker:55 msgid "" "NOTES\n" @@ -2270,16 +2037,11 @@ msgstr "Konnte Profiling-Datei %(file)r nicht finden." msgid "Please run linkchecker with --profile to generate it." msgstr "Bitte starten Sie linkchecker mit --profile, um sie zu generieren." -#: ../linkchecker:224 -#, python-format -msgid "Syntax error in %(arg)r: %(msg)s" -msgstr "Syntaxfehler in %(arg)r: %(msg)s" - -#: ../linkchecker:247 +#: ../linkchecker:236 msgid "General options" msgstr "Allgemeine Optionen" -#: ../linkchecker:251 +#: ../linkchecker:240 #, fuzzy, python-format msgid "" "Use FILENAME as configuration file. Per default LinkChecker uses\n" @@ -2290,7 +2052,7 @@ msgstr "" " LinkChecker ~/.linkchecker/linkcheckerrc\n" "(unter Windows %HOMEPATH%\\.linkchecker\\linkcheckerrc)." -#: ../linkchecker:256 +#: ../linkchecker:245 msgid "" "Generate no more than the given number of threads. Default number\n" "of threads is 10. To disable threading specify a non-positive number." @@ -2299,43 +2061,35 @@ msgstr "" "von Threads ist 10. Geben Sie eine negative Zahl an, um Threading zu " "deaktivieren." -#: ../linkchecker:259 +#: ../linkchecker:248 msgid "Print version and exit." msgstr "Drucke die Version und beende das Programm." -#: ../linkchecker:262 +#: ../linkchecker:251 +#, fuzzy +msgid "Print available check plugins and exit." +msgstr "Drucke die Version und beende das Programm." + +#: ../linkchecker:254 msgid "Read list of white-space separated URLs to check from stdin." msgstr "" "Lese eine Liste von URLs zum Prüfen von der Standardeingabe, getrennt durch " "Leerzeichen." -#: ../linkchecker:265 +#: ../linkchecker:257 msgid "Output options" msgstr "Ausgabeoptionen" -#: ../linkchecker:268 -#, fuzzy -msgid "Check syntax of CSS URLs with the W3C online validator." -msgstr "" -"Prüfe Syntax von CSS URLs mit cssutils. Falls es nicht installiert ist,\n" -"prüfe mit dem W3C Online Validator." - -#: ../linkchecker:271 +#: ../linkchecker:259 #, fuzzy -msgid "Check syntax of HTML URLs with the W3C online validator." -msgstr "" -"Prüfe Syntax von HTML URLs mit HTML tidy. Falls es nicht installiert ist,\n" -"prüfe mit dem W3C Online Validator." - -#: ../linkchecker:273 msgid "" "Log all URLs, including duplicates.\n" -"Default is to log duplicate URLs only once." +"Default is to log URLs only once." msgstr "" "Logge alle URLs, inklusive Duplikate.\n" "Standard ist, doppelte URLs nur einmal zu loggen." -#: ../linkchecker:276 +#: ../linkchecker:262 #, python-format msgid "" "Print debugging output for the given logger.\n" @@ -2355,7 +2109,7 @@ msgstr "" "\n" "Für exakte Resultate wird Threading während Debugläufen deaktiviert." -#: ../linkchecker:287 +#: ../linkchecker:273 #, python-format msgid "" "Output to a file linkchecker-out.TYPE, $HOME/.linkchecker/blacklist for\n" @@ -2385,15 +2139,15 @@ msgstr "" "Standard ist keine Dateiausgabe. Beachten Sie dass die Option\n" "'-o none' jegliche Ausgaben auf der Konsole verhindert." -#: ../linkchecker:301 +#: ../linkchecker:287 msgid "Do not print check status messages." msgstr "Gebe keine Statusmeldungen aus." -#: ../linkchecker:303 +#: ../linkchecker:289 msgid "Don't log warnings. Default is to log warnings." msgstr "Gebe keine Warnungen aus. Standard ist die Ausgabe von Warnungen." -#: ../linkchecker:306 +#: ../linkchecker:292 #, python-format msgid "" "Specify output as %(loggertypes)s. Default output type is text.\n" @@ -2408,7 +2162,7 @@ msgstr "" "Gültige Enkodierungen sind unter http://docs.python.org/lib/standard-" "encodings.html aufgeführt." -#: ../linkchecker:316 +#: ../linkchecker:302 msgid "" "Quiet operation, an alias for '-o none'.\n" "This is only useful with -F." @@ -2416,43 +2170,15 @@ msgstr "" "Keine Ausgabe, ein Alias für '-o none'.\n" "Dies ist nur in Verbindung mit -F nützlich." -#: ../linkchecker:320 -msgid "Scan content of URLs with ClamAV virus scanner." -msgstr "Prüfe Inhalt von URLs mit dem ClamAV Antivirus Programm." - -#: ../linkchecker:322 +#: ../linkchecker:305 msgid "Print tracing information." msgstr "Trace-Information ausgeben." -#: ../linkchecker:325 +#: ../linkchecker:308 msgid "Log all URLs. Default is to log only errors and warnings." msgstr "Logge alle URLs. Standard ist es, nur fehlerhafte URLs zu loggen." -#: ../linkchecker:331 -msgid "" -"Define a regular expression which prints a warning if it matches\n" -"any content of the checked link. This applies only to valid pages,\n" -"so we can get their content.\n" -"\n" -"Use this to check for pages that contain some form of error\n" -"message, for example 'This page has moved' or 'Oracle\n" -"Application error'.\n" -"\n" -"Note that multiple values can be combined in the regular expression,\n" -"for example \"(This page has moved|Oracle Application error)\"." -msgstr "" -"Definiere einen regulären Ausdruck, der eine Warnung ausgibt\n" -"falls er den Inhalt einer geprüften URL matcht.\n" -"Dies gilt nur für gültige Seiten deren Inhalt wir bekommen können.\n" -"\n" -"Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n" -"'Diese Seite ist umgezogen' oder 'Oracle Applikationsfehler'.\n" -"\n" -"Man beachte, dass mehrere Werte in dem regulären Ausdruck kombiniert\n" -"werden können, zum Beispiel \"(Diese Seite ist umgezogen|Oracle " -"Applikationsfehler)\"." - -#: ../linkchecker:344 +#: ../linkchecker:314 msgid "" "Print a warning if content size info is available and exceeds the\n" "given number of bytes." @@ -2460,15 +2186,7 @@ msgstr "" "Gebe eine Warnung aus wenn die Inhaltsgröße bekannt ist und die\n" "angegebene Anzahl an Bytes übersteigt." -#: ../linkchecker:352 -msgid "" -"Check HTTP anchor references. Default is not to check anchors.\n" -"This option enables logging of the warning 'url-anchor-not-found'." -msgstr "" -"Prüfe HTTP Anker Verweise. Standard ist keine Überprüfung.\n" -"Diese Option aktiviert die Ausgabe der Warnung 'url-anchor-not-found'." - -#: ../linkchecker:356 +#: ../linkchecker:322 msgid "" "Accept and send HTTP cookies according to RFC 2109. Only cookies\n" "which are sent back to the originating server are accepted.\n" @@ -2481,7 +2199,7 @@ msgstr "" "Gesendete und akzeptierte Cookies werden als zusätzlich geloggte\n" "Information aufgeführt." -#: ../linkchecker:362 +#: ../linkchecker:328 msgid "" "Read a file with initial cookie data. The cookie data format is\n" "explained below." @@ -2489,7 +2207,11 @@ msgstr "" "Lese eine Datei mit Cookie-Daten. Das Datenformat\n" "ist weiter unten erklärt." -#: ../linkchecker:366 +#: ../linkchecker:331 +msgid "Check also external URLs." +msgstr "" + +#: ../linkchecker:334 msgid "" "Only check syntax of URLs matching the given regular expression.\n" " This option can be given multiple times." @@ -2497,15 +2219,7 @@ msgstr "" "Prüfe lediglich den Syntax der URLs, welche auf den angegebenen regulären " "Ausdruck zutreffen. Diese Option kann mehrmals angegebenen werden." -#: ../linkchecker:370 -msgid "" -"Check but do not recurse into URLs matching the given regular\n" -"expression. This option can be given multiple times." -msgstr "" -"Prüfe URLs die auf den angegebenen regulären Ausdruck zutreffen, aber steige " -"nicht rekursiv in sie hinab. Diese Option kann mehrmals angegeben werden." - -#: ../linkchecker:374 +#: ../linkchecker:341 msgid "" "Specify an NNTP server for 'news:...' links. Default is the\n" "environment variable NNTP_SERVER. If no host is given,\n" @@ -2515,7 +2229,7 @@ msgstr "" "Umgebungsvariable NNTP_SERVER. Falls kein Rechner angegeben ist,\n" "wird lediglich auf korrekte Syntax des Links geprüft." -#: ../linkchecker:380 +#: ../linkchecker:347 msgid "" "Read a password from console and use it for HTTP and FTP authorization.\n" "For FTP the default password is 'anonymous@'. For HTTP there is\n" @@ -2527,7 +2241,7 @@ msgstr "" "Standardpasswort.\n" "Siehe auch -u." -#: ../linkchecker:386 +#: ../linkchecker:353 msgid "" "Pause the given number of seconds between two subsequent connection\n" "requests to the same host. Default is no pause between requests." @@ -2537,7 +2251,7 @@ msgstr "" "Verbindungen zum demselben Rechner. Standard ist keine Pause zwischen " "Verbindungen." -#: ../linkchecker:391 +#: ../linkchecker:358 msgid "" "Check recursively all links up to given depth. A negative depth\n" "will enable infinite recursion. Default depth is infinite." @@ -2546,7 +2260,7 @@ msgstr "" "negative Tiefe erwirkt unendliche Rekursion. Standard Tiefe ist\n" "unendlich." -#: ../linkchecker:396 +#: ../linkchecker:363 #, python-format msgid "" "Set the timeout for connection attempts in seconds. The default\n" @@ -2555,7 +2269,7 @@ msgstr "" "Setze den Timeout für Verbindungen in Sekunden. Der Standard\n" "Timeout ist %d Sekunden." -#: ../linkchecker:400 +#: ../linkchecker:367 msgid "" "Try the given username for HTTP and FTP authorization.\n" "For FTP the default username is 'anonymous'. For HTTP there is\n" @@ -2565,7 +2279,7 @@ msgstr "" "Authorisation. Für FTP ist der Standardname 'anonymous'. Für HTTP gibt es " "kein Standardnamen. Siehe auch -p." -#: ../linkchecker:405 +#: ../linkchecker:372 msgid "" "Specify the User-Agent string to send to the HTTP server, for example\n" "\"Mozilla/4.0\". The default is \"LinkChecker/X.Y\" where X.Y is the " @@ -2576,60 +2290,60 @@ msgstr "" "z.B. \"Mozilla/4.0\". Der Standard ist \"LinkChecker/X.Y\", wobei X.Y\n" "die aktuelle Version von LinkChecker ist." -#: ../linkchecker:439 +#: ../linkchecker:406 #, python-format msgid "Invalid debug level %(level)r" msgstr "Ungültiger Debuglevel %(level)r" -#: ../linkchecker:452 +#: ../linkchecker:419 #, python-format msgid "Unreadable config file: %r" msgstr "Nicht lesbare Konfigurationsdatei: %r" -#: ../linkchecker:460 +#: ../linkchecker:427 msgid "Running with python -O disables debugging." msgstr "Die Option python -O verhindert das Debuggen." -#: ../linkchecker:483 ../linkchecker:515 +#: ../linkchecker:451 ../linkchecker:483 #, python-format msgid "Unknown logger type %(type)r in %(output)r for option %(option)s" msgstr "Unbekannter Logtyp %(type)r in %(output)r für Option %(option)s" -#: ../linkchecker:487 ../linkchecker:521 +#: ../linkchecker:455 ../linkchecker:489 #, python-format msgid "Unknown encoding %(encoding)r in %(output)r for option %(option)s" msgstr "Unbekanntes Encoding %(encoding)r in %(output)r für Option %(option)s" -#: ../linkchecker:533 +#: ../linkchecker:501 #, python-format msgid "Enter LinkChecker HTTP/FTP password for user %(user)s:" msgstr "Gebe LinkChecker HTTP/FTP Passwort für Benutzer %(user)s ein:" -#: ../linkchecker:536 +#: ../linkchecker:504 msgid "Enter LinkChecker HTTP/FTP password:" msgstr "Gebe LinkChecker HTTP/FTP Passwort ein:" -#: ../linkchecker:543 ../linkchecker:561 +#: ../linkchecker:511 ../linkchecker:529 #, python-format msgid "Illegal argument %(arg)r for option %(option)s" msgstr "Ungültiges Argument %(arg)r für Option %(option)s" -#: ../linkchecker:599 +#: ../linkchecker:560 #, python-format msgid "Enter LinkChecker password for user %(user)s at %(strpattern)s:" msgstr "" "Gebe LinkChecker Passwort für Benutzer %(user)s bei %(strpattern)s ein:" -#: ../linkchecker:620 +#: ../linkchecker:581 #, python-format msgid "Could not parse cookie file: %s" msgstr "Konnte Cookie-Datei nicht parsen: %s" -#: ../linkchecker:635 +#: ../linkchecker:596 msgid "no files or URLs given" msgstr "keine Dateien oder URLs angegeben" -#: ../linkchecker:640 +#: ../linkchecker:601 #, python-format msgid "" "Overwrite profiling file %(file)r?\n" @@ -2638,11 +2352,11 @@ msgstr "" "Profildatei %(file)r überschreiben?\n" "Drücken Sie Strg-C zum Abbrechen, EINGABETASTE zum Fortfahren." -#: ../linkchecker:646 +#: ../linkchecker:607 msgid "Canceled." msgstr "Abgebrochen." -#: ../linkchecker:650 +#: ../linkchecker:611 msgid "" "The `cProfile' Python module is not installed, therefore the --profile " "option is disabled." @@ -2805,6 +2519,239 @@ msgstr "Option %s: ungültige Auswahl: %r (wähle von %s)" msgid "%s: error: %s\n" msgstr "ignore%d: Syntaxfehler %s\n" +#~ msgid "could not download update information" +#~ msgstr "konnte Update-Informationen nicht herunterladen" + +#~ msgid "Clamav could not be initialized" +#~ msgstr "Clamav konnte nicht initialisiert werden" + +#~ msgid "checked link" +#~ msgstr "geprüfte Verknüpfung" + +#~ msgid "Number of domains: %d" +#~ msgstr "Anzahl von Domains: %d" + +#~ msgid "Downloaded: %s" +#~ msgstr "Heruntergeladen: %s" + +#~ msgid "Robots.txt cache: %s" +#~ msgstr "Robots.txt-Cache: %s" + +#~ msgid "Outside of domain filter, checked only syntax." +#~ msgstr "Außerhalb des Domain Filters; prüfe lediglich Syntax." + +#~ msgid "URL anchor was not found." +#~ msgstr "URL Anker wurde nicht gefunden." + +#~ msgid "The warning regular expression was found in the URL contents." +#~ msgstr "" +#~ "Der reguläre Ausdruck für Warnungen wurde in den URL Inhalten gefunden." + +#~ msgid "The URL content is a duplicate of another URL." +#~ msgstr "Der URL-Inhalte ist ein Duplikat einer anderen URL." + +#~ msgid "The http: URL checking has been denied." +#~ msgstr "Die http: URL-Überprüfung wurde verweigert." + +#~ msgid "The URL has moved permanently." +#~ msgstr "Die URL wurde dauerhaft verschoben." + +#~ msgid "The URL has been redirected to an URL of a different type." +#~ msgstr "Die URL wurde zu einem anderen URL-Typ umgeleitet." + +#~ msgid "The mailto: address could not be verified." +#~ msgstr "Die mailto: Addresse konnte nicht überprüft werden." + +#~ msgid "No connection to a MX host could be established." +#~ msgstr "Es konnte keine Verbindung zu einem MX-Rechner hergestellt werden." + +#, fuzzy +#~ msgid "HTML syntax error." +#~ msgstr "gültige HTML Syntax" + +#, fuzzy +#~ msgid "CSS syntax error." +#~ msgstr "gültige CSS Syntax" + +#~ msgid "Verified address %(mail)s: %(info)s." +#~ msgstr "Gültige Adresse %(mail)s: %(info)s." + +#~ msgid "Unverified but presumably valid address %(mail)s: %(info)s." +#~ msgstr "" +#~ "Unverifizierte aber wahrscheinlich gültige Adresse %(mail)s: %(info)s." + +#~ msgid "Unverified address: %(info)s." +#~ msgstr "Unverifizierte Adresse: %(info)s." + +#~ msgid "MX mail host %(host)s did not accept connections: %(error)s." +#~ msgstr "" +#~ "Der MX mail host %(host)s akzeptierte keine SMTP Verbindungen: %(error)s." + +#~ msgid "Could not connect, but syntax is correct" +#~ msgstr "Konnte nicht konnektieren, aber die Syntax ist korrekt" + +#~ msgid "Found MX mail host %(host)s" +#~ msgstr "MX Mail host %(host)s gefunden" + +#~ msgid "Bad HTTP response %(line)r" +#~ msgstr "Ungültige HTTP Antwort %(line)r" + +#~ msgid " with %s" +#~ msgstr " mit %s" + +#~ msgid "Content%(size)s is the same as in URLs (%(urls)s)." +#~ msgstr "Inhalt%(size)s ist derselbe wie in den URLs (%(urls)s)." + +#~ msgid "%(num)d URL parsed." +#~ msgid_plural "%(num)d URLs parsed." +#~ msgstr[0] "%(num)d URL geparst." +#~ msgstr[1] "%(num)d URLs geparst." + +#~ msgid "%s URL ignored." +#~ msgstr "%s URL ignoriert." + +#~ msgid "empty or no certificate found" +#~ msgstr "leeres oder kein Zertifikat gefunden" + +#~ msgid "certificate did not include \"subject\" information" +#~ msgstr "Zertifikat besitzt keine \"subject\"-Information" + +#~ msgid "Access denied by robots.txt, skipping content checks." +#~ msgstr "" +#~ "Zugriff verweigert durch robots.txt; lasse das Prüfen des URL Inhalts aus." + +#~ msgid "Enforced proxy `%(name)s'." +#~ msgstr "Erzwungener Proxy `%(name)s'." + +#~ msgid "Missing 'Location' header with enforced proxy status 305, aborting." +#~ msgstr "" +#~ "Fehlender 'Location'-Header mit erzwungenem Proxystatus 305, breche ab." + +#~ msgid "" +#~ "Empty 'Location' header value with enforced proxy status 305, aborting." +#~ msgstr "" +#~ "Leerer 'Location'-Header mit erzwungenem Proxystatus 305, breche ab." + +#~ msgid "more than %d redirections, aborting" +#~ msgstr "mehr als %d Weiterleitungen, breche ab" + +#~ msgid "" +#~ "Unsupported HTTP authentication `%(auth)s', only `Basic' authentication " +#~ "is supported." +#~ msgstr "" +#~ "Nicht unterstützte HTTP Authentifizierungsmethode `%(auth)s', nur `Basic' " +#~ "Authentifizierung ist unterstützt." + +#~ msgid "Redirection to url `%(newurl)s' is not allowed." +#~ msgstr "Umleitung zu `%(newurl)s' ist nicht erlaubt." + +#~ msgid "" +#~ "recursive redirection encountered:\n" +#~ " %(urls)s" +#~ msgstr "" +#~ "Rekursive Weiterleitung entdeckt:\n" +#~ " %(urls)s" + +#~ msgid "" +#~ "Redirection to URL `%(newurl)s' with different scheme found; the original " +#~ "URL was `%(url)s'." +#~ msgstr "" +#~ "Weiterleitung zu URL `%(newurl)s' mit anderem Schema gefunden; die " +#~ "Original-URL war `%(url)s'." + +#~ msgid "HTTP 301 (moved permanent) encountered: you should update this link." +#~ msgstr "" +#~ "HTTP 301 (moved permanent) gefunden: sie sollten diesen Link " +#~ "aktualisieren." + +#~ msgid "Sent Cookie: %(cookie)s." +#~ msgstr "Gesendetes Cookie: %(cookie)s." + +#~ msgid "Could not store cookies from headers: %(error)s." +#~ msgstr "Konnte Cookies nicht aus Kopfdaten speichern: %(error)s." + +#~ msgid "Unsupported HTTP url scheme `%(scheme)s'" +#~ msgstr "Nicht unterstütztes HTTP URL Schema `%(scheme)s'" + +#~ msgid "Redirection error" +#~ msgstr "Weiterleitungsfehler" + +#~ msgid "Decompress error %(err)s" +#~ msgstr "Entkomprimierungsfehler %(err)s" + +#~ msgid "Unsupported content encoding `%(encoding)s'." +#~ msgstr "Content-Encoding `%(encoding)s' wird nicht unterstützt." + +#~ msgid "Domains" +#~ msgstr "Domains" + +#~ msgid "%d hit" +#~ msgid_plural "%d hits" +#~ msgstr[0] "%d Teffer" +#~ msgstr[1] "%d Treffer" + +#~ msgid "%d miss" +#~ msgid_plural "%d misses" +#~ msgstr[0] "%d Verfehlung" +#~ msgstr[1] "%d Verfehlungen" + +#~ msgid "Syntax error in %(arg)r: %(msg)s" +#~ msgstr "Syntaxfehler in %(arg)r: %(msg)s" + +#, fuzzy +#~ msgid "Check syntax of CSS URLs with the W3C online validator." +#~ msgstr "" +#~ "Prüfe Syntax von CSS URLs mit cssutils. Falls es nicht installiert ist,\n" +#~ "prüfe mit dem W3C Online Validator." + +#, fuzzy +#~ msgid "Check syntax of HTML URLs with the W3C online validator." +#~ msgstr "" +#~ "Prüfe Syntax von HTML URLs mit HTML tidy. Falls es nicht installiert " +#~ "ist,\n" +#~ "prüfe mit dem W3C Online Validator." + +#~ msgid "Scan content of URLs with ClamAV virus scanner." +#~ msgstr "Prüfe Inhalt von URLs mit dem ClamAV Antivirus Programm." + +#~ msgid "" +#~ "Define a regular expression which prints a warning if it matches\n" +#~ "any content of the checked link. This applies only to valid pages,\n" +#~ "so we can get their content.\n" +#~ "\n" +#~ "Use this to check for pages that contain some form of error\n" +#~ "message, for example 'This page has moved' or 'Oracle\n" +#~ "Application error'.\n" +#~ "\n" +#~ "Note that multiple values can be combined in the regular expression,\n" +#~ "for example \"(This page has moved|Oracle Application error)\"." +#~ msgstr "" +#~ "Definiere einen regulären Ausdruck, der eine Warnung ausgibt\n" +#~ "falls er den Inhalt einer geprüften URL matcht.\n" +#~ "Dies gilt nur für gültige Seiten deren Inhalt wir bekommen können.\n" +#~ "\n" +#~ "Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n" +#~ "'Diese Seite ist umgezogen' oder 'Oracle Applikationsfehler'.\n" +#~ "\n" +#~ "Man beachte, dass mehrere Werte in dem regulären Ausdruck kombiniert\n" +#~ "werden können, zum Beispiel \"(Diese Seite ist umgezogen|Oracle " +#~ "Applikationsfehler)\"." + +#~ msgid "" +#~ "Check HTTP anchor references. Default is not to check anchors.\n" +#~ "This option enables logging of the warning 'url-anchor-not-found'." +#~ msgstr "" +#~ "Prüfe HTTP Anker Verweise. Standard ist keine Überprüfung.\n" +#~ "Diese Option aktiviert die Ausgabe der Warnung 'url-anchor-not-found'." + +#~ msgid "" +#~ "Check but do not recurse into URLs matching the given regular\n" +#~ "expression. This option can be given multiple times." +#~ msgstr "" +#~ "Prüfe URLs die auf den angegebenen regulären Ausdruck zutreffen, aber " +#~ "steige nicht rekursiv in sie hinab. Diese Option kann mehrmals angegeben " +#~ "werden." + #~ msgid "USAGE\tlinkchecker [options] [file-or-url]..." #~ msgstr "BENUTZUNG\tlinkchecker [Optionen] [datei-oder-url]..." @@ -3170,9 +3117,6 @@ msgstr "ignore%d: Syntaxfehler %s\n" #~ msgid "Could not split the mail address" #~ msgstr "konnte die Mail Adresse nicht splitten" -#~ msgid "Invalid mail syntax" -#~ msgstr "Ungültige Mail Syntax" - #~ msgid "Stopping." #~ msgstr "Halte an." diff --git a/po/linkchecker.pot b/po/linkchecker.pot index 5a20278e3..04a5f4094 100644 --- a/po/linkchecker.pot +++ b/po/linkchecker.pot @@ -1,5 +1,5 @@ # SOME DESCRIPTIVE TITLE. -# Copyright (C) 2013-2014 Bastian Kleineidam +# Copyright (C) 2014 Bastian Kleineidam # This file is distributed under the same license as the PACKAGE package. # FIRST AUTHOR , YEAR. # @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: bastian.kleineidam@web.de\n" -"POT-Creation-Date: 2013-02-27 10:40+0100\n" +"POT-Creation-Date: 2014-02-25 16:10+0100\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -18,11 +18,11 @@ msgstr "" "Content-Transfer-Encoding: 8bit\n" "Plural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n" -#: ../linkcheck/director/aggregator.py:76 +#: ../linkcheck/director/aggregator.py:86 msgid "These URLs are still active:" msgstr "" -#: ../linkcheck/director/aggregator.py:83 +#: ../linkcheck/director/aggregator.py:93 #, python-format msgid "" "%(num)d URLs are still active. After a timeout of %(timeout)s the active URLs " @@ -87,7 +87,7 @@ msgstr "" msgid "System info:" msgstr "" -#: ../linkcheck/director/console.py:145 ../linkchecker:441 +#: ../linkcheck/director/console.py:145 ../linkchecker:408 #, python-format msgid "Python %(version)s on %(platform)s" msgstr "" @@ -133,10 +133,6 @@ msgstr "" msgid "user abort; force shutdown" msgstr "" -#: ../linkcheck/updater.py:63 -msgid "could not download update information" -msgstr "" - #: ../linkcheck/configuration/confparse.py:62 #, python-format msgid "Error parsing configuration: %s" @@ -157,240 +153,309 @@ msgstr "" msgid "invalid value for %s: %d must not be greater than %d" msgstr "" -#: ../linkcheck/configuration/confparse.py:184 +#: ../linkcheck/configuration/confparse.py:185 #, python-format msgid "missing auth part in entry %(val)r" msgstr "" -#: ../linkcheck/configuration/confparse.py:190 +#: ../linkcheck/configuration/confparse.py:191 #, python-format msgid "invalid login URL `%s'. Only HTTP and HTTPS URLs are supported." msgstr "" -#: ../linkcheck/configuration/confparse.py:216 +#: ../linkcheck/configuration/confparse.py:217 #, python-format msgid "For example execute 'chmod go-rw %s'." msgstr "" -#: ../linkcheck/configuration/confparse.py:218 +#: ../linkcheck/configuration/confparse.py:219 msgid "" "See http://support.microsoft.com/kb/308419 for more info on setting file " "permissions." msgstr "" -#: ../linkcheck/configuration/__init__.py:321 +#: ../linkcheck/configuration/__init__.py:264 #, python-format msgid "Configuration file %r does not exist." msgstr "" -#: ../linkcheck/configuration/__init__.py:323 +#: ../linkcheck/configuration/__init__.py:266 #, python-format msgid "Configuration file %r is not readable." msgstr "" -#: ../linkcheck/configuration/__init__.py:334 +#: ../linkcheck/configuration/__init__.py:277 msgid "missing user or URL pattern in authentication data." msgstr "" -#: ../linkcheck/configuration/__init__.py:382 +#: ../linkcheck/configuration/__init__.py:319 msgid "activating text logger output." msgstr "" -#: ../linkcheck/configuration/__init__.py:392 -msgid "Clamav could not be initialized" -msgstr "" - -#: ../linkcheck/configuration/__init__.py:398 +#: ../linkcheck/configuration/__init__.py:326 msgid "activating sendcookies." msgstr "" -#: ../linkcheck/configuration/__init__.py:401 +#: ../linkcheck/configuration/__init__.py:329 msgid "activating storecookies." msgstr "" -#: ../linkcheck/configuration/__init__.py:410 +#: ../linkcheck/configuration/__init__.py:338 msgid "no CGI password fieldname given for login URL." msgstr "" -#: ../linkcheck/configuration/__init__.py:414 +#: ../linkcheck/configuration/__init__.py:342 msgid "no CGI user fieldname given for login URL." msgstr "" -#: ../linkcheck/configuration/__init__.py:418 +#: ../linkcheck/configuration/__init__.py:346 msgid "no user/password authentication data found for login URL." msgstr "" -#: ../linkcheck/configuration/__init__.py:421 +#: ../linkcheck/configuration/__init__.py:349 msgid "login URL is not a HTTP URL." msgstr "" -#: ../linkcheck/configuration/__init__.py:425 +#: ../linkcheck/configuration/__init__.py:353 msgid "login URL is incomplete." msgstr "" -#: ../linkcheck/configuration/__init__.py:429 +#: ../linkcheck/configuration/__init__.py:357 #, python-format msgid "disabling login URL %(url)s." msgstr "" -#: ../linkcheck/configuration/__init__.py:476 +#: ../linkcheck/configuration/__init__.py:391 +#, python-format +msgid "could not create plugin directory %(dirname)r: %(errmsg)r" +msgstr "" + +#: ../linkcheck/configuration/__init__.py:429 #, python-format msgid "could not copy initial configuration file %(src)r to %(dst)r: %(errmsg)r" msgstr "" -#: ../linkcheck/logger/html.py:95 ../linkcheck/logger/text.py:89 +#: ../linkcheck/plugins/regexcheck.py:61 #, python-format -msgid "Start checking at %s" +msgid "Found %(match)r at line %(line)d in link contents." +msgstr "" + +#: ../linkcheck/plugins/countryinfo.py:38 +#, python-format +msgid "URL is located in %(country)s." +msgstr "" + +#: ../linkcheck/plugins/sslcertcheck.py:62 +msgid "certificate did not include \"notAfter\" information" +msgstr "" + +#: ../linkcheck/plugins/sslcertcheck.py:73 +#, python-format +msgid "invalid certficate \"notAfter\" value %r" +msgstr "" + +#: ../linkcheck/plugins/sslcertcheck.py:81 +#, python-format +msgid "certficate is expired on %s" +msgstr "" + +#: ../linkcheck/plugins/sslcertcheck.py:85 +#, python-format +msgid "certificate is only %s valid" +msgstr "" + +#: ../linkcheck/plugins/sslcertcheck.py:91 +#, python-format +msgid "SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s." +msgstr "" + +#: ../linkcheck/plugins/anchorcheck.py:65 +#, python-format +msgid "Anchor `%(name)s' not found." +msgstr "" + +#: ../linkcheck/plugins/anchorcheck.py:66 +#, python-format +msgid "Available anchors: %(anchors)s." +msgstr "" + +#: ../linkcheck/plugins/viruscheck.py:97 +msgid "clamd is not ready for stream scanning" +msgstr "" + +#: ../linkcheck/plugins/viruscheck.py:156 +msgid "ScannerDaemonOutputFormat must be disabled" +msgstr "" + +#: ../linkcheck/plugins/viruscheck.py:158 +msgid "only one of TCPSocket and LocalSocket must be enabled" +msgstr "" + +#: ../linkcheck/plugins/viruscheck.py:187 +msgid "one of TCPSocket or LocalSocket must be enabled" +msgstr "" + +#: ../linkcheck/plugins/viruscheck.py:222 +msgid "Could not connect to ClamAV daemon." +msgstr "" + +#: ../linkcheck/plugins/syntaxchecks.py:66 +msgid "valid HTML syntax" +msgstr "" + +#: ../linkcheck/plugins/syntaxchecks.py:74 +#, python-format +msgid "HTML W3C validation caused error: %(msg)s " +msgstr "" + +#: ../linkcheck/plugins/syntaxchecks.py:113 +msgid "valid CSS syntax" +msgstr "" + +#: ../linkcheck/plugins/syntaxchecks.py:121 +#, python-format +msgid "CSS W3C validation caused error: %(msg)s " msgstr "" -#: ../linkcheck/logger/html.py:142 -msgid "checked link" +#: ../linkcheck/plugins/syntaxchecks.py:130 +#, python-format +msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s" +msgstr "" + +#: ../linkcheck/logger/html.py:121 ../linkcheck/logger/text.py:106 +#, python-format +msgid "Start checking at %s" msgstr "" -#: ../linkcheck/logger/html.py:174 ../linkcheck/logger/text.py:142 +#: ../linkcheck/logger/html.py:190 ../linkcheck/logger/text.py:159 #: ../linkcheck/gui/urlmodel.py:78 #, python-format msgid ", line %d" msgstr "" -#: ../linkcheck/logger/html.py:175 ../linkcheck/logger/text.py:143 +#: ../linkcheck/logger/html.py:191 ../linkcheck/logger/text.py:160 #: ../linkcheck/gui/urlmodel.py:79 #, python-format msgid ", col %d" msgstr "" -#: ../linkcheck/logger/html.py:199 ../linkcheck/logger/html.py:211 -#: ../linkcheck/logger/text.py:159 ../linkcheck/logger/text.py:171 +#: ../linkcheck/logger/html.py:215 ../linkcheck/logger/html.py:227 +#: ../linkcheck/logger/text.py:176 ../linkcheck/logger/text.py:188 #: ../linkcheck/gui/properties.py:36 ../linkcheck/gui/properties.py:38 #, python-format msgid "%.3f seconds" msgstr "" -#: ../linkcheck/logger/html.py:241 ../linkcheck/logger/text.py:195 +#: ../linkcheck/logger/html.py:256 ../linkcheck/logger/text.py:212 msgid "Valid" msgstr "" -#: ../linkcheck/logger/html.py:246 ../linkcheck/logger/text.py:198 +#: ../linkcheck/logger/html.py:261 ../linkcheck/logger/text.py:215 msgid "Error" msgstr "" -#: ../linkcheck/logger/html.py:253 +#: ../linkcheck/logger/html.py:268 msgid "Statistics" msgstr "" -#: ../linkcheck/logger/html.py:255 ../linkcheck/logger/text.py:251 -#, python-format -msgid "Number of domains: %d" -msgstr "" - -#: ../linkcheck/logger/html.py:259 ../linkcheck/logger/text.py:254 +#: ../linkcheck/logger/html.py:271 ../linkcheck/logger/text.py:265 #, python-format msgid "" "Content types: %(image)d image, %(text)d text, %(video)d video, %(audio)d " "audio, %(application)d application, %(mail)d mail and %(other)d other." msgstr "" -#: ../linkcheck/logger/html.py:263 ../linkcheck/logger/text.py:257 +#: ../linkcheck/logger/html.py:275 ../linkcheck/logger/text.py:268 #, python-format msgid "URL lengths: min=%(min)d, max=%(max)d, avg=%(avg)d." msgstr "" -#: ../linkcheck/logger/html.py:268 ../linkcheck/logger/text.py:262 +#: ../linkcheck/logger/html.py:280 ../linkcheck/logger/text.py:273 msgid "No statistics available since no URLs were checked." msgstr "" -#: ../linkcheck/logger/html.py:274 ../linkcheck/logger/text.py:206 +#: ../linkcheck/logger/html.py:286 ../linkcheck/logger/text.py:223 msgid "That's it." msgstr "" -#: ../linkcheck/logger/html.py:276 ../linkcheck/logger/text.py:207 +#: ../linkcheck/logger/html.py:288 ../linkcheck/logger/text.py:224 #, python-format msgid "%d link checked." msgid_plural "%d links checked." msgstr[0] "" msgstr[1] "" -#: ../linkcheck/logger/html.py:279 ../linkcheck/logger/text.py:210 +#: ../linkcheck/logger/html.py:291 ../linkcheck/logger/text.py:227 #, python-format msgid "%d warning found" msgid_plural "%d warnings found" msgstr[0] "" msgstr[1] "" -#: ../linkcheck/logger/html.py:282 ../linkcheck/logger/text.py:218 +#: ../linkcheck/logger/html.py:294 ../linkcheck/logger/text.py:235 #, python-format msgid " (%d ignored or duplicates not printed)" msgstr "" -#: ../linkcheck/logger/html.py:285 ../linkcheck/logger/text.py:221 +#: ../linkcheck/logger/html.py:297 ../linkcheck/logger/text.py:238 #, python-format msgid "%d error found" msgid_plural "%d errors found" msgstr[0] "" msgstr[1] "" -#: ../linkcheck/logger/html.py:288 ../linkcheck/logger/text.py:229 +#: ../linkcheck/logger/html.py:300 ../linkcheck/logger/text.py:246 #, python-format msgid " (%d duplicates not printed)" msgstr "" -#: ../linkcheck/logger/html.py:294 ../linkcheck/logger/text.py:234 +#: ../linkcheck/logger/html.py:306 ../linkcheck/logger/text.py:251 #, python-format msgid "There was %(num)d internal error." msgid_plural "There were %(num)d internal errors." msgstr[0] "" msgstr[1] "" -#: ../linkcheck/logger/html.py:299 ../linkcheck/logger/text.py:238 -#: ../linkcheck/logger/__init__.py:386 +#: ../linkcheck/logger/html.py:311 ../linkcheck/logger/text.py:255 +#: ../linkcheck/logger/__init__.py:390 #, python-format msgid "Stopped checking at %(time)s (%(duration)s)" msgstr "" -#: ../linkcheck/logger/html.py:304 +#: ../linkcheck/logger/html.py:316 #, python-format msgid "Get the newest version at %s" msgstr "" -#: ../linkcheck/logger/html.py:307 +#: ../linkcheck/logger/html.py:319 #, python-format msgid "Write comments and bugs to %s" msgstr "" -#: ../linkcheck/logger/html.py:310 +#: ../linkcheck/logger/html.py:322 #, python-format msgid "Support this project at %s" msgstr "" -#: ../linkcheck/logger/text.py:81 ../linkcheck/logger/__init__.py:374 +#: ../linkcheck/logger/text.py:98 ../linkcheck/logger/__init__.py:378 #, python-format msgid "Get the newest version at %(url)s" msgstr "" -#: ../linkcheck/logger/text.py:83 ../linkcheck/logger/__init__.py:376 +#: ../linkcheck/logger/text.py:100 ../linkcheck/logger/__init__.py:380 #, python-format msgid "Write comments and bugs to %(url)s" msgstr "" -#: ../linkcheck/logger/text.py:85 ../linkcheck/logger/__init__.py:378 +#: ../linkcheck/logger/text.py:102 ../linkcheck/logger/__init__.py:382 #, python-format msgid "Support this project at %(url)s" msgstr "" -#: ../linkcheck/logger/text.py:245 +#: ../linkcheck/logger/text.py:262 msgid "Statistics:" msgstr "" -#: ../linkcheck/logger/text.py:247 -#, python-format -msgid "Downloaded: %s" -msgstr "" - -#: ../linkcheck/logger/text.py:249 -#, python-format -msgid "Robots.txt cache: %s" -msgstr "" - #: ../linkcheck/logger/__init__.py:31 msgid "Real URL" msgstr "" @@ -400,22 +465,22 @@ msgid "Cache key" msgstr "" #: ../linkcheck/logger/__init__.py:33 -#: ../linkcheck/gui/linkchecker_ui_main.py:816 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:793 ../linkcheck/gui/urlmodel.py:22 msgid "Result" msgstr "" #: ../linkcheck/logger/__init__.py:34 -#: ../linkcheck/gui/linkchecker_ui_main.py:810 +#: ../linkcheck/gui/linkchecker_ui_main.py:787 msgid "Base" msgstr "" #: ../linkcheck/logger/__init__.py:35 -#: ../linkcheck/gui/linkchecker_ui_main.py:808 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:785 ../linkcheck/gui/urlmodel.py:22 msgid "Name" msgstr "" #: ../linkcheck/logger/__init__.py:36 -#: ../linkcheck/gui/linkchecker_ui_main.py:809 +#: ../linkcheck/gui/linkchecker_ui_main.py:786 msgid "Parent URL" msgstr "" @@ -424,32 +489,32 @@ msgid "Extern" msgstr "" #: ../linkcheck/logger/__init__.py:38 -#: ../linkcheck/gui/linkchecker_ui_main.py:814 +#: ../linkcheck/gui/linkchecker_ui_main.py:791 msgid "Info" msgstr "" #: ../linkcheck/logger/__init__.py:39 -#: ../linkcheck/gui/linkchecker_ui_main.py:815 +#: ../linkcheck/gui/linkchecker_ui_main.py:792 msgid "Warning" msgstr "" #: ../linkcheck/logger/__init__.py:40 -#: ../linkcheck/gui/linkchecker_ui_main.py:812 +#: ../linkcheck/gui/linkchecker_ui_main.py:789 msgid "D/L time" msgstr "" #: ../linkcheck/logger/__init__.py:41 -#: ../linkcheck/gui/linkchecker_ui_main.py:813 +#: ../linkcheck/gui/linkchecker_ui_main.py:790 msgid "Size" msgstr "" #: ../linkcheck/logger/__init__.py:42 -#: ../linkcheck/gui/linkchecker_ui_main.py:811 +#: ../linkcheck/gui/linkchecker_ui_main.py:788 msgid "Check time" msgstr "" #: ../linkcheck/logger/__init__.py:43 -#: ../linkcheck/gui/linkchecker_ui_main.py:807 ../linkcheck/gui/urlmodel.py:22 +#: ../linkcheck/gui/linkchecker_ui_main.py:784 ../linkcheck/gui/urlmodel.py:22 msgid "URL" msgstr "" @@ -458,463 +523,321 @@ msgid "Level" msgstr "" #: ../linkcheck/logger/__init__.py:45 -#: ../linkcheck/gui/linkchecker_ui_main.py:817 +#: ../linkcheck/gui/linkchecker_ui_main.py:794 msgid "Modified" msgstr "" -#: ../linkcheck/logger/__init__.py:272 +#: ../linkcheck/logger/__init__.py:276 #, python-format msgid "Happy birthday for LinkChecker, I'm %d years old today!" msgstr "" -#: ../linkcheck/logger/__init__.py:371 +#: ../linkcheck/logger/__init__.py:375 #, python-format msgid "created by %(app)s at %(time)s" msgstr "" -#: ../linkcheck/clamav.py:56 -msgid "clamd is not ready for stream scanning" -msgstr "" - -#: ../linkcheck/clamav.py:126 -msgid "ScannerDaemonOutputFormat must be disabled" -msgstr "" - -#: ../linkcheck/clamav.py:128 -msgid "only one of TCPSocket and LocalSocket must be enabled" -msgstr "" - -#: ../linkcheck/clamav.py:157 -msgid "one of TCPSocket or LocalSocket must be enabled" -msgstr "" - -#: ../linkcheck/clamav.py:193 -msgid "Could not connect to ClamAV daemon." -msgstr "" - #: ../linkcheck/checker/telneturl.py:53 msgid "Host is empty" msgstr "" #: ../linkcheck/checker/unknownurl.py:144 -msgid "Outside of domain filter, checked only syntax." -msgstr "" - -#: ../linkcheck/checker/unknownurl.py:146 #, python-format msgid "%(scheme)s URL ignored." msgstr "" -#: ../linkcheck/checker/unknownurl.py:150 +#: ../linkcheck/checker/unknownurl.py:148 msgid "URL is unrecognized or has invalid syntax" msgstr "" -#: ../linkcheck/checker/const.py:117 +#: ../linkcheck/checker/const.py:106 msgid "The effective URL is different from the original." msgstr "" -#: ../linkcheck/checker/const.py:119 +#: ../linkcheck/checker/const.py:108 msgid "Could not get the content of the URL." msgstr "" -#: ../linkcheck/checker/const.py:120 -msgid "URL anchor was not found." -msgstr "" - -#: ../linkcheck/checker/const.py:122 -msgid "The warning regular expression was found in the URL contents." -msgstr "" - -#: ../linkcheck/checker/const.py:123 -msgid "The URL content is a duplicate of another URL." -msgstr "" - -#: ../linkcheck/checker/const.py:124 +#: ../linkcheck/checker/const.py:109 msgid "The URL content size is too large." msgstr "" -#: ../linkcheck/checker/const.py:125 +#: ../linkcheck/checker/const.py:110 msgid "The URL content size is zero." msgstr "" -#: ../linkcheck/checker/const.py:126 +#: ../linkcheck/checker/const.py:111 msgid "The URL content size and download size are unequal." msgstr "" -#: ../linkcheck/checker/const.py:127 +#: ../linkcheck/checker/const.py:112 msgid "The URL is longer than the recommended size." msgstr "" -#: ../linkcheck/checker/const.py:128 +#: ../linkcheck/checker/const.py:113 msgid "The URL contains leading or trailing whitespace." msgstr "" -#: ../linkcheck/checker/const.py:129 +#: ../linkcheck/checker/const.py:114 msgid "The file: URL is missing a trailing slash." msgstr "" -#: ../linkcheck/checker/const.py:131 +#: ../linkcheck/checker/const.py:116 msgid "The file: path is not the same as the system specific path." msgstr "" -#: ../linkcheck/checker/const.py:132 +#: ../linkcheck/checker/const.py:117 msgid "The ftp: URL is missing a trailing slash." msgstr "" -#: ../linkcheck/checker/const.py:133 -msgid "The http: URL checking has been denied." -msgstr "" - -#: ../linkcheck/checker/const.py:134 -msgid "The URL has moved permanently." -msgstr "" - -#: ../linkcheck/checker/const.py:136 -msgid "The URL has been redirected to an URL of a different type." -msgstr "" - -#: ../linkcheck/checker/const.py:137 +#: ../linkcheck/checker/const.py:118 msgid "The URL had no content." msgstr "" -#: ../linkcheck/checker/const.py:139 +#: ../linkcheck/checker/const.py:120 msgid "An error occurred while storing a cookie." msgstr "" -#: ../linkcheck/checker/const.py:141 +#: ../linkcheck/checker/const.py:122 msgid "An error occurred while decompressing the URL content." msgstr "" -#: ../linkcheck/checker/const.py:143 +#: ../linkcheck/checker/const.py:124 msgid "The URL content is encoded with an unknown encoding." msgstr "" -#: ../linkcheck/checker/const.py:145 +#: ../linkcheck/checker/const.py:126 msgid "Unsupported HTTP authentication method." msgstr "" -#: ../linkcheck/checker/const.py:147 ../linkcheck/checker/httpurl.py:243 +#: ../linkcheck/checker/const.py:128 msgid "Unauthorized access without HTTP authentication." msgstr "" -#: ../linkcheck/checker/const.py:148 +#: ../linkcheck/checker/const.py:129 msgid "The SSL certificate is invalid or expired." msgstr "" -#: ../linkcheck/checker/const.py:149 +#: ../linkcheck/checker/const.py:130 msgid "The URL has been ignored." msgstr "" -#: ../linkcheck/checker/const.py:150 +#: ../linkcheck/checker/const.py:131 msgid "The mail MX host could not be found." msgstr "" -#: ../linkcheck/checker/const.py:152 -msgid "The mailto: address could not be verified." -msgstr "" - -#: ../linkcheck/checker/const.py:154 -msgid "No connection to a MX host could be established." -msgstr "" - -#: ../linkcheck/checker/const.py:155 +#: ../linkcheck/checker/const.py:132 msgid "No NNTP server was found." msgstr "" -#: ../linkcheck/checker/const.py:156 +#: ../linkcheck/checker/const.py:133 msgid "The NNTP newsgroup could not be found." msgstr "" -#: ../linkcheck/checker/const.py:157 +#: ../linkcheck/checker/const.py:134 msgid "The IP is obfuscated." msgstr "" -#: ../linkcheck/checker/const.py:158 -msgid "HTML syntax error." -msgstr "" - -#: ../linkcheck/checker/const.py:159 -msgid "CSS syntax error." -msgstr "" - -#: ../linkcheck/checker/mailtourl.py:87 +#: ../linkcheck/checker/mailtourl.py:84 #, python-format msgid "No mail addresses found in `%(url)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:126 +#: ../linkcheck/checker/mailtourl.py:123 #, python-format msgid "Error parsing CGI values: %s" msgstr "" -#: ../linkcheck/checker/mailtourl.py:149 +#: ../linkcheck/checker/mailtourl.py:146 #, python-format msgid "" "Mail address `%(addr)s' too long. Allowed 256 chars, was %(length)d chars." msgstr "" -#: ../linkcheck/checker/mailtourl.py:153 +#: ../linkcheck/checker/mailtourl.py:150 #, python-format msgid "Missing `@' in mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:159 +#: ../linkcheck/checker/mailtourl.py:156 #, python-format msgid "Missing local part of mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:163 +#: ../linkcheck/checker/mailtourl.py:160 #, python-format msgid "Missing domain part of mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:167 +#: ../linkcheck/checker/mailtourl.py:164 #, python-format msgid "" "Local part of mail address `%(addr)s' too long. Allowed 64 chars, was " "%(length)d chars." msgstr "" -#: ../linkcheck/checker/mailtourl.py:171 +#: ../linkcheck/checker/mailtourl.py:168 #, python-format msgid "" "Domain part of mail address `%(addr)s' too long. Allowed 255 chars, was " "%(length)d chars." msgstr "" -#: ../linkcheck/checker/mailtourl.py:180 +#: ../linkcheck/checker/mailtourl.py:177 #, python-format msgid "Unquoted double quote or backslash in mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:185 +#: ../linkcheck/checker/mailtourl.py:182 #, python-format msgid "Local part of mail address `%(addr)s' may not start with a dot." msgstr "" -#: ../linkcheck/checker/mailtourl.py:189 +#: ../linkcheck/checker/mailtourl.py:186 #, python-format msgid "Local part of mail address `%(addr)s' may not end with a dot." msgstr "" -#: ../linkcheck/checker/mailtourl.py:193 +#: ../linkcheck/checker/mailtourl.py:190 #, python-format msgid "Local part of mail address `%(addr)s' may not contain two dots." msgstr "" -#: ../linkcheck/checker/mailtourl.py:198 +#: ../linkcheck/checker/mailtourl.py:195 #, python-format msgid "" "Local part of mail address `%(addr)s' contains unquoted character `%(char)s." msgstr "" -#: ../linkcheck/checker/mailtourl.py:210 +#: ../linkcheck/checker/mailtourl.py:207 #, python-format msgid "Domain part of mail address `%(addr)s' has invalid IP." msgstr "" -#: ../linkcheck/checker/mailtourl.py:216 +#: ../linkcheck/checker/mailtourl.py:213 #, python-format msgid "Invalid domain part of mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:220 +#: ../linkcheck/checker/mailtourl.py:217 #, python-format msgid "Invalid top level domain part of mail address `%(addr)s'." msgstr "" -#: ../linkcheck/checker/mailtourl.py:258 +#: ../linkcheck/checker/mailtourl.py:255 #, python-format msgid "No MX mail host for %(domain)s found." msgstr "" -#: ../linkcheck/checker/mailtourl.py:266 +#: ../linkcheck/checker/mailtourl.py:263 #, python-format msgid "No host for %(domain)s found." msgstr "" -#: ../linkcheck/checker/mailtourl.py:280 +#: ../linkcheck/checker/mailtourl.py:277 #, python-format msgid "Got invalid DNS answer %(answer)s for %(domain)s." msgstr "" -#: ../linkcheck/checker/mailtourl.py:324 -#, python-format -msgid "Verified address %(mail)s: %(info)s." +#: ../linkcheck/checker/mailtourl.py:289 +msgid "Valid mail address syntax" msgstr "" -#: ../linkcheck/checker/mailtourl.py:328 -#, python-format -msgid "Unverified but presumably valid address %(mail)s: %(info)s." -msgstr "" - -#: ../linkcheck/checker/mailtourl.py:331 -#, python-format -msgid "Unverified address: %(info)s." -msgstr "" - -#: ../linkcheck/checker/mailtourl.py:335 -#, python-format -msgid "MX mail host %(host)s did not accept connections: %(error)s." -msgstr "" - -#: ../linkcheck/checker/mailtourl.py:341 -msgid "Could not connect, but syntax is correct" -msgstr "" - -#: ../linkcheck/checker/mailtourl.py:344 -#, python-format -msgid "Found MX mail host %(host)s" -msgstr "" - -#: ../linkcheck/checker/urlbase.py:70 +#: ../linkcheck/checker/urlbase.py:67 #, python-format msgid "URL has unparsable domain name: %(name)s" msgstr "" -#: ../linkcheck/checker/urlbase.py:161 +#: ../linkcheck/checker/urlbase.py:147 #, python-format msgid "Leading or trailing whitespace in URL `%(url)s'." msgstr "" -#: ../linkcheck/checker/urlbase.py:388 +#: ../linkcheck/checker/urlbase.py:340 msgid "URL is missing" msgstr "" -#: ../linkcheck/checker/urlbase.py:391 +#: ../linkcheck/checker/urlbase.py:343 msgid "URL is empty" msgstr "" -#: ../linkcheck/checker/urlbase.py:405 +#: ../linkcheck/checker/urlbase.py:357 #, python-format msgid "Effective URL %(url)r." msgstr "" -#: ../linkcheck/checker/urlbase.py:411 +#: ../linkcheck/checker/urlbase.py:363 #, python-format msgid "URL length %(len)d is longer than maximum of %(max)d." msgstr "" -#: ../linkcheck/checker/urlbase.py:414 +#: ../linkcheck/checker/urlbase.py:366 #, python-format msgid "URL length %(len)d is longer than %(warn)d." msgstr "" -#: ../linkcheck/checker/urlbase.py:466 +#: ../linkcheck/checker/urlbase.py:411 #, python-format msgid "URL host %(host)r has invalid port" msgstr "" -#: ../linkcheck/checker/urlbase.py:470 +#: ../linkcheck/checker/urlbase.py:418 msgid "URL has empty hostname" msgstr "" -#: ../linkcheck/checker/urlbase.py:481 +#: ../linkcheck/checker/urlbase.py:440 #, python-format msgid "URL %(url)s has obfuscated IP address %(ip)s" msgstr "" -#: ../linkcheck/checker/urlbase.py:508 -#, python-format -msgid "URL is located in %(country)s." -msgstr "" - -#: ../linkcheck/checker/urlbase.py:533 +#: ../linkcheck/checker/urlbase.py:478 msgid "Hostname not found" msgstr "" -#: ../linkcheck/checker/urlbase.py:536 ../linkcheck/checker/urlbase.py:554 -#, python-format -msgid "Bad HTTP response %(line)r" -msgstr "" - -#: ../linkcheck/checker/urlbase.py:539 +#: ../linkcheck/checker/urlbase.py:481 #, python-format msgid "Bad hostname %(host)r: %(msg)s" msgstr "" -#: ../linkcheck/checker/urlbase.py:555 +#: ../linkcheck/checker/urlbase.py:494 #, python-format msgid "could not get content: %(msg)s" msgstr "" -#: ../linkcheck/checker/urlbase.py:700 -#, python-format -msgid "Anchor `%(name)s' not found." +#: ../linkcheck/checker/urlbase.py:625 +msgid "The URL is outside of the domain filter, checked only syntax." msgstr "" -#: ../linkcheck/checker/urlbase.py:701 -#, python-format -msgid "Available anchors: %(anchors)s." +#: ../linkcheck/checker/urlbase.py:628 +msgid "filtered" msgstr "" -#: ../linkcheck/checker/urlbase.py:760 ../linkcheck/checker/urlbase.py:763 -#: ../linkcheck/checker/fileurl.py:207 ../linkcheck/checker/httpurl.py:674 -#: ../linkcheck/checker/httpurl.py:682 +#: ../linkcheck/checker/urlbase.py:646 ../linkcheck/checker/urlbase.py:660 +#: ../linkcheck/checker/fileurl.py:208 ../linkcheck/checker/httpurl.py:227 msgid "File size too large" msgstr "" -#: ../linkcheck/checker/urlbase.py:780 -#, python-format -msgid " with %s" -msgstr "" - -#: ../linkcheck/checker/urlbase.py:782 -#, python-format -msgid "Content%(size)s is the same as in URLs (%(urls)s)." -msgstr "" - -#: ../linkcheck/checker/urlbase.py:822 -#, python-format -msgid "Found %(match)r at line %(line)d in link contents." -msgstr "" - -#: ../linkcheck/checker/urlbase.py:838 +#: ../linkcheck/checker/urlbase.py:679 msgid "Content size is zero." msgstr "" -#: ../linkcheck/checker/urlbase.py:844 +#: ../linkcheck/checker/urlbase.py:685 #, python-format msgid "Content size %(dlsize)s is larger than %(maxbytes)s." msgstr "" -#: ../linkcheck/checker/urlbase.py:849 +#: ../linkcheck/checker/urlbase.py:690 #, python-format msgid "" "Download size (%(dlsize)d Byte) does not equal content size (%(size)d Byte)." msgstr "" -#: ../linkcheck/checker/urlbase.py:861 -#, python-format -msgid "%(w3type)s validation error at line %(line)s col %(column)s: %(msg)s" -msgstr "" - -#: ../linkcheck/checker/urlbase.py:882 -msgid "valid HTML syntax" -msgstr "" - -#: ../linkcheck/checker/urlbase.py:890 +#: ../linkcheck/checker/urlbase.py:710 #, python-format -msgid "HTML W3C validation caused error: %(msg)s " -msgstr "" - -#: ../linkcheck/checker/urlbase.py:917 -msgid "valid CSS syntax" +msgid "More than %(num)d child URLs found, skipping the rest." msgstr "" -#: ../linkcheck/checker/urlbase.py:925 -#, python-format -msgid "CSS W3C validation caused error: %(msg)s " -msgstr "" - -#: ../linkcheck/checker/urlbase.py:978 -#, python-format -msgid "%(num)d URL parsed." -msgid_plural "%(num)d URLs parsed." -msgstr[0] "" -msgstr[1] "" - -#: ../linkcheck/checker/urlbase.py:1124 +#: ../linkcheck/checker/urlbase.py:772 #, python-format msgid "URL has unparsable domain name: %(domain)s" msgstr "" @@ -944,57 +867,20 @@ msgstr "" msgid "%(host)r could not be resolved" msgstr "" -#: ../linkcheck/checker/httpsurl.py:38 -#, python-format -msgid "%s URL ignored." -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:55 -msgid "empty or no certificate found" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:61 -msgid "certificate did not include \"subject\" information" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:66 -msgid "certificate did not include \"notAfter\" information" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:87 -#, python-format -msgid "invalid certficate \"notAfter\" value %r" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:95 -#, python-format -msgid "certficate is expired on %s" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:100 -#, python-format -msgid "certificate is only %s valid" -msgstr "" - -#: ../linkcheck/checker/httpsurl.py:106 -#, python-format -msgid "SSL warning: %(msg)s. Cipher %(cipher)s, %(protocol)s." -msgstr "" - -#: ../linkcheck/checker/ftpurl.py:96 +#: ../linkcheck/checker/ftpurl.py:91 msgid "Got no answer from FTP server" msgstr "" -#: ../linkcheck/checker/ftpurl.py:99 +#: ../linkcheck/checker/ftpurl.py:94 #, python-format msgid "Remote host has closed connection: %(msg)s" msgstr "" -#: ../linkcheck/checker/ftpurl.py:142 +#: ../linkcheck/checker/ftpurl.py:137 msgid "Missing trailing directory slash in ftp url." msgstr "" -#: ../linkcheck/checker/ftpurl.py:224 +#: ../linkcheck/checker/ftpurl.py:209 msgid "FTP file size too large" msgstr "" @@ -1017,120 +903,30 @@ msgstr "" msgid "directory" msgstr "" -#: ../linkcheck/checker/fileurl.py:197 +#: ../linkcheck/checker/fileurl.py:198 #, python-format msgid "" "The URL path %(path)r is not the same as the system path %(realpath)r. You " "should always use the system path in URLs." msgstr "" -#: ../linkcheck/checker/httpurl.py:142 -msgid "Access denied by robots.txt, skipping content checks." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:191 -#, python-format -msgid "Enforced proxy `%(name)s'." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:197 -msgid "Missing 'Location' header with enforced proxy status 305, aborting." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:202 -msgid "Empty 'Location' header value with enforced proxy status 305, aborting." +#: ../linkcheck/checker/httpurl.py:120 +msgid "Access denied by robots.txt, checked only syntax." msgstr "" -#: ../linkcheck/checker/httpurl.py:227 -#, python-format -msgid "more than %d redirections, aborting" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:250 -#, python-format -msgid "" -"Unsupported HTTP authentication `%(auth)s', only `Basic' authentication is " -"supported." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:331 -#, python-format -msgid "Redirected to `%(url)s'." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:373 -#, python-format -msgid "Redirection to url `%(newurl)s' is not allowed." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:375 ../linkcheck/checker/httpurl.py:409 -#: ../linkcheck/checker/httpurl.py:443 +#: ../linkcheck/checker/httpurl.py:121 msgid "syntax OK" msgstr "" -#: ../linkcheck/checker/httpurl.py:394 -msgid "The redirected URL is outside of the domain filter, checked only syntax." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:396 -msgid "filtered" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:407 -msgid "Access to redirected URL denied by robots.txt, checked only syntax." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:425 +#: ../linkcheck/checker/httpurl.py:184 #, python-format -msgid "" -"recursive redirection encountered:\n" -" %(urls)s" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:438 -#, python-format -msgid "" -"Redirection to URL `%(newurl)s' with different scheme found; the original URL " -"was `%(url)s'." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:453 -msgid "HTTP 301 (moved permanent) encountered: you should update this link." +msgid "Redirected to `%(url)s'." msgstr "" -#: ../linkcheck/checker/httpurl.py:484 +#: ../linkcheck/checker/httpurl.py:219 msgid "OK" msgstr "" -#: ../linkcheck/checker/httpurl.py:583 -#, python-format -msgid "Sent Cookie: %(cookie)s." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:589 -#, python-format -msgid "Could not store cookies from headers: %(error)s." -msgstr "" - -#: ../linkcheck/checker/httpurl.py:648 -#, python-format -msgid "Unsupported HTTP url scheme `%(scheme)s'" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:669 -msgid "Redirection error" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:695 -#, python-format -msgid "Decompress error %(err)s" -msgstr "" - -#: ../linkcheck/checker/httpurl.py:710 -#, python-format -msgid "Unsupported content encoding `%(encoding)s'." -msgstr "" - #: ../linkcheck/checker/nntpurl.py:45 msgid "No NNTP server was specified, skipping this URL." msgstr "" @@ -1154,35 +950,35 @@ msgstr "" msgid "NNTP server too busy; tried more than %d times." msgstr "" -#: ../linkcheck/__init__.py:150 +#: ../linkcheck/__init__.py:152 msgid "CRITICAL" msgstr "" -#: ../linkcheck/__init__.py:151 +#: ../linkcheck/__init__.py:153 msgid "ERROR" msgstr "" -#: ../linkcheck/__init__.py:152 +#: ../linkcheck/__init__.py:154 msgid "WARN" msgstr "" -#: ../linkcheck/__init__.py:153 +#: ../linkcheck/__init__.py:155 msgid "WARNING" msgstr "" -#: ../linkcheck/__init__.py:154 +#: ../linkcheck/__init__.py:156 msgid "INFO" msgstr "" -#: ../linkcheck/__init__.py:155 +#: ../linkcheck/__init__.py:157 msgid "DEBUG" msgstr "" -#: ../linkcheck/__init__.py:156 +#: ../linkcheck/__init__.py:158 msgid "NOTSET" msgstr "" -#: ../linkcheck/__init__.py:167 +#: ../linkcheck/__init__.py:169 msgid "Running as root user; dropping privileges by changing user to nobody." msgstr "" @@ -1190,240 +986,236 @@ msgstr "" msgid "LinkChecker debug log" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:793 +#: ../linkcheck/gui/linkchecker_ui_main.py:770 msgid "LinkChecker" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:794 +#: ../linkcheck/gui/linkchecker_ui_main.py:771 msgid "URL:" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:795 +#: ../linkcheck/gui/linkchecker_ui_main.py:772 msgid "Start checking the given URL." msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:797 +#: ../linkcheck/gui/linkchecker_ui_main.py:774 msgid "URLs: " msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:798 +#: ../linkcheck/gui/linkchecker_ui_main.py:775 msgid "active" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:799 -#: ../linkcheck/gui/linkchecker_ui_main.py:801 -#: ../linkcheck/gui/linkchecker_ui_main.py:803 +#: ../linkcheck/gui/linkchecker_ui_main.py:776 +#: ../linkcheck/gui/linkchecker_ui_main.py:778 +#: ../linkcheck/gui/linkchecker_ui_main.py:780 msgid "0" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:800 +#: ../linkcheck/gui/linkchecker_ui_main.py:777 msgid "queued" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:802 +#: ../linkcheck/gui/linkchecker_ui_main.py:779 msgid "checked" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:804 +#: ../linkcheck/gui/linkchecker_ui_main.py:781 msgid "Info:" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:805 +#: ../linkcheck/gui/linkchecker_ui_main.py:782 msgid "-" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:806 +#: ../linkcheck/gui/linkchecker_ui_main.py:783 msgid "URL properties" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:818 +#: ../linkcheck/gui/linkchecker_ui_main.py:795 msgid "Check results" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:819 +#: ../linkcheck/gui/linkchecker_ui_main.py:796 msgid "Valid URLs" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:820 +#: ../linkcheck/gui/linkchecker_ui_main.py:797 msgid "Warnings" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:821 +#: ../linkcheck/gui/linkchecker_ui_main.py:798 msgid "Invalid URLs" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:822 +#: ../linkcheck/gui/linkchecker_ui_main.py:799 msgid "Content type statistics" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:823 +#: ../linkcheck/gui/linkchecker_ui_main.py:800 msgid "Image" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:824 +#: ../linkcheck/gui/linkchecker_ui_main.py:801 msgid "Text" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:825 +#: ../linkcheck/gui/linkchecker_ui_main.py:802 msgid "Application" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:826 +#: ../linkcheck/gui/linkchecker_ui_main.py:803 msgid "Audio" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:827 +#: ../linkcheck/gui/linkchecker_ui_main.py:804 msgid "Video" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:828 +#: ../linkcheck/gui/linkchecker_ui_main.py:805 msgid "Other" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:829 +#: ../linkcheck/gui/linkchecker_ui_main.py:806 msgid "Mail" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:830 +#: ../linkcheck/gui/linkchecker_ui_main.py:807 msgid "URL statistics" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:831 +#: ../linkcheck/gui/linkchecker_ui_main.py:808 msgid "Min. length" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:832 +#: ../linkcheck/gui/linkchecker_ui_main.py:809 msgid "Avg. length" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:833 +#: ../linkcheck/gui/linkchecker_ui_main.py:810 msgid "Max. length" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:834 -msgid "Domains" -msgstr "" - -#: ../linkcheck/gui/linkchecker_ui_main.py:835 +#: ../linkcheck/gui/linkchecker_ui_main.py:811 msgid "&Edit" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:836 +#: ../linkcheck/gui/linkchecker_ui_main.py:812 #: ../linkcheck/gui/linkchecker_ui_editor.py:34 msgid "&File" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:837 -#: ../linkcheck/gui/linkchecker_ui_main.py:840 +#: ../linkcheck/gui/linkchecker_ui_main.py:813 +#: ../linkcheck/gui/linkchecker_ui_main.py:816 msgid "&Help" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:838 +#: ../linkcheck/gui/linkchecker_ui_main.py:814 msgid "A&bout" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:839 +#: ../linkcheck/gui/linkchecker_ui_main.py:815 msgid "About" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:841 +#: ../linkcheck/gui/linkchecker_ui_main.py:817 msgid "Help" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:842 +#: ../linkcheck/gui/linkchecker_ui_main.py:818 msgid "View online" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:843 +#: ../linkcheck/gui/linkchecker_ui_main.py:819 msgid "View URL online" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:844 +#: ../linkcheck/gui/linkchecker_ui_main.py:820 msgid "&Options" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:845 +#: ../linkcheck/gui/linkchecker_ui_main.py:821 #: ../linkcheck/gui/linkchecker_ui_options.py:137 msgid "Options" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:846 +#: ../linkcheck/gui/linkchecker_ui_main.py:822 msgid "Copy to clipboard" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:847 +#: ../linkcheck/gui/linkchecker_ui_main.py:823 msgid "Copy URL to clipboard" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:848 +#: ../linkcheck/gui/linkchecker_ui_main.py:824 msgid "Ctrl+C" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:849 +#: ../linkcheck/gui/linkchecker_ui_main.py:825 msgid "View parent online" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:850 +#: ../linkcheck/gui/linkchecker_ui_main.py:826 msgid "View parent URL online" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:851 +#: ../linkcheck/gui/linkchecker_ui_main.py:827 msgid "View parent source" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:852 +#: ../linkcheck/gui/linkchecker_ui_main.py:828 msgid "View parent URL source" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:853 +#: ../linkcheck/gui/linkchecker_ui_main.py:829 msgid "Show debug" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:854 +#: ../linkcheck/gui/linkchecker_ui_main.py:830 msgid "View properties" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:855 +#: ../linkcheck/gui/linkchecker_ui_main.py:831 msgid "View URL properties" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:856 +#: ../linkcheck/gui/linkchecker_ui_main.py:832 msgid "Save &results..." msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:857 +#: ../linkcheck/gui/linkchecker_ui_main.py:833 msgid "&Quit" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:858 +#: ../linkcheck/gui/linkchecker_ui_main.py:834 msgid "Ctrl+Q" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:859 +#: ../linkcheck/gui/linkchecker_ui_main.py:835 msgid "Check for updates" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:860 +#: ../linkcheck/gui/linkchecker_ui_main.py:836 msgid "Donate" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:861 +#: ../linkcheck/gui/linkchecker_ui_main.py:837 msgid "&Open project..." msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:862 +#: ../linkcheck/gui/linkchecker_ui_main.py:838 msgid "Open project" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:863 +#: ../linkcheck/gui/linkchecker_ui_main.py:839 msgid "Ctrl+O" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:864 +#: ../linkcheck/gui/linkchecker_ui_main.py:840 msgid "&Save project..." msgstr "" -#: ../linkcheck/gui/linkchecker_ui_main.py:865 +#: ../linkcheck/gui/linkchecker_ui_main.py:841 #: ../linkcheck/gui/linkchecker_ui_editor.py:50 msgid "Ctrl+S" msgstr "" @@ -1507,23 +1299,23 @@ msgstr "" msgid "Parent" msgstr "" -#: ../linkcheck/gui/urlsave.py:21 +#: ../linkcheck/gui/urlsave.py:19 msgid "HTML output (*.html)" msgstr "" -#: ../linkcheck/gui/urlsave.py:22 +#: ../linkcheck/gui/urlsave.py:20 msgid "Text output (*.txt)" msgstr "" -#: ../linkcheck/gui/urlsave.py:23 +#: ../linkcheck/gui/urlsave.py:21 msgid "XML output (*.xml)" msgstr "" -#: ../linkcheck/gui/urlsave.py:24 +#: ../linkcheck/gui/urlsave.py:22 msgid "CSV output (*.csv)" msgstr "" -#: ../linkcheck/gui/urlsave.py:59 +#: ../linkcheck/gui/urlsave.py:68 msgid "Save check results" msgstr "" @@ -1535,36 +1327,36 @@ msgstr "" msgid "&Save" msgstr "" -#: ../linkcheck/gui/__init__.py:160 ../linkcheck/gui/__init__.py:487 +#: ../linkcheck/gui/__init__.py:160 ../linkcheck/gui/__init__.py:492 msgid "Ready." msgstr "" -#: ../linkcheck/gui/__init__.py:178 +#: ../linkcheck/gui/__init__.py:180 msgid "Check finished." msgstr "" -#: ../linkcheck/gui/__init__.py:294 +#: ../linkcheck/gui/__init__.py:296 msgid "Start" msgstr "" -#: ../linkcheck/gui/__init__.py:320 +#: ../linkcheck/gui/__init__.py:322 msgid "Stop" msgstr "" -#: ../linkcheck/gui/__init__.py:366 +#: ../linkcheck/gui/__init__.py:369 msgid "yes" msgstr "" -#: ../linkcheck/gui/__init__.py:366 +#: ../linkcheck/gui/__init__.py:369 msgid "no" msgstr "" -#: ../linkcheck/gui/__init__.py:369 +#: ../linkcheck/gui/__init__.py:372 #, python-format msgid "About %(appname)s" msgstr "" -#: ../linkcheck/gui/__init__.py:370 +#: ../linkcheck/gui/__init__.py:373 #, python-format msgid "" "
\n" @@ -1582,45 +1374,45 @@ msgid "" "
" msgstr "" -#: ../linkcheck/gui/__init__.py:422 +#: ../linkcheck/gui/__init__.py:427 #, python-format msgid "Closing active URLs with timeout %s..." msgstr "" -#: ../linkcheck/gui/__init__.py:437 ../linkchecker:663 +#: ../linkcheck/gui/__init__.py:442 ../linkchecker:624 msgid "Dumping memory statistics..." msgstr "" -#: ../linkcheck/gui/__init__.py:439 +#: ../linkcheck/gui/__init__.py:444 msgid "LinkChecker memory dump written" msgstr "" -#: ../linkcheck/gui/__init__.py:440 ../linkchecker:665 +#: ../linkcheck/gui/__init__.py:445 ../linkchecker:626 #, python-format msgid "The memory dump has been written to `%(filename)s'." msgstr "" -#: ../linkcheck/gui/__init__.py:463 +#: ../linkcheck/gui/__init__.py:468 msgid "Error, empty URL" msgstr "" -#: ../linkcheck/gui/__init__.py:465 +#: ../linkcheck/gui/__init__.py:470 #, python-format msgid "Checking '%s'." msgstr "" -#: ../linkcheck/gui/__init__.py:484 +#: ../linkcheck/gui/__init__.py:489 #, python-format msgid "%d URL selected." msgid_plural "%d URLs selected" msgstr[0] "" msgstr[1] "" -#: ../linkcheck/gui/__init__.py:571 +#: ../linkcheck/gui/__init__.py:576 msgid "LinkChecker internal error" msgstr "" -#: ../linkcheck/gui/linkchecker_ui_options.py:138 ../linkchecker:349 +#: ../linkcheck/gui/linkchecker_ui_options.py:138 ../linkchecker:319 msgid "Checking options" msgstr "" @@ -1728,12 +1520,12 @@ msgstr "" msgid "Project file %(filename)s loaded successfully." msgstr "" -#: ../linkcheck/cmdline.py:36 +#: ../linkcheck/cmdline.py:59 #, python-format msgid "Error: %(msg)s" msgstr "" -#: ../linkcheck/cmdline.py:37 +#: ../linkcheck/cmdline.py:60 #, python-format msgid "Execute '%(program)s -h' for help" msgstr "" @@ -1831,20 +1623,6 @@ msgid "" "to use this feature." msgstr "" -#: ../linkcheck/strformat.py:332 -#, python-format -msgid "%d hit" -msgid_plural "%d hits" -msgstr[0] "" -msgstr[1] "" - -#: ../linkcheck/strformat.py:333 -#, python-format -msgid "%d miss" -msgid_plural "%d misses" -msgstr[0] "" -msgstr[1] "" - #: ../linkchecker:55 msgid "" "NOTES\n" @@ -2016,16 +1794,11 @@ msgstr "" msgid "Please run linkchecker with --profile to generate it." msgstr "" -#: ../linkchecker:224 -#, python-format -msgid "Syntax error in %(arg)r: %(msg)s" -msgstr "" - -#: ../linkchecker:247 +#: ../linkchecker:236 msgid "General options" msgstr "" -#: ../linkchecker:251 +#: ../linkchecker:240 #, python-format msgid "" "Use FILENAME as configuration file. Per default LinkChecker uses\n" @@ -2033,39 +1806,35 @@ msgid "" "%%HOMEPATH%%\\.linkchecker\\linkcheckerrc)." msgstr "" -#: ../linkchecker:256 +#: ../linkchecker:245 msgid "" "Generate no more than the given number of threads. Default number\n" "of threads is 10. To disable threading specify a non-positive number." msgstr "" -#: ../linkchecker:259 +#: ../linkchecker:248 msgid "Print version and exit." msgstr "" -#: ../linkchecker:262 -msgid "Read list of white-space separated URLs to check from stdin." -msgstr "" - -#: ../linkchecker:265 -msgid "Output options" +#: ../linkchecker:251 +msgid "Print available check plugins and exit." msgstr "" -#: ../linkchecker:268 -msgid "Check syntax of CSS URLs with the W3C online validator." +#: ../linkchecker:254 +msgid "Read list of white-space separated URLs to check from stdin." msgstr "" -#: ../linkchecker:271 -msgid "Check syntax of HTML URLs with the W3C online validator." +#: ../linkchecker:257 +msgid "Output options" msgstr "" -#: ../linkchecker:273 +#: ../linkchecker:259 msgid "" "Log all URLs, including duplicates.\n" -"Default is to log duplicate URLs only once." +"Default is to log URLs only once." msgstr "" -#: ../linkchecker:276 +#: ../linkchecker:262 #, python-format msgid "" "Print debugging output for the given logger.\n" @@ -2077,7 +1846,7 @@ msgid "" "For accurate results, threading will be disabled during debug runs." msgstr "" -#: ../linkchecker:287 +#: ../linkchecker:273 #, python-format msgid "" "Output to a file linkchecker-out.TYPE, $HOME/.linkchecker/blacklist for\n" @@ -2094,15 +1863,15 @@ msgid "" "suppress all console output with the option '-o none'." msgstr "" -#: ../linkchecker:301 +#: ../linkchecker:287 msgid "Do not print check status messages." msgstr "" -#: ../linkchecker:303 +#: ../linkchecker:289 msgid "Don't log warnings. Default is to log warnings." msgstr "" -#: ../linkchecker:306 +#: ../linkchecker:292 #, python-format msgid "" "Specify output as %(loggertypes)s. Default output type is text.\n" @@ -2112,51 +1881,27 @@ msgid "" "html." msgstr "" -#: ../linkchecker:316 +#: ../linkchecker:302 msgid "" "Quiet operation, an alias for '-o none'.\n" "This is only useful with -F." msgstr "" -#: ../linkchecker:320 -msgid "Scan content of URLs with ClamAV virus scanner." -msgstr "" - -#: ../linkchecker:322 +#: ../linkchecker:305 msgid "Print tracing information." msgstr "" -#: ../linkchecker:325 +#: ../linkchecker:308 msgid "Log all URLs. Default is to log only errors and warnings." msgstr "" -#: ../linkchecker:331 -msgid "" -"Define a regular expression which prints a warning if it matches\n" -"any content of the checked link. This applies only to valid pages,\n" -"so we can get their content.\n" -"\n" -"Use this to check for pages that contain some form of error\n" -"message, for example 'This page has moved' or 'Oracle\n" -"Application error'.\n" -"\n" -"Note that multiple values can be combined in the regular expression,\n" -"for example \"(This page has moved|Oracle Application error)\"." -msgstr "" - -#: ../linkchecker:344 +#: ../linkchecker:314 msgid "" "Print a warning if content size info is available and exceeds the\n" "given number of bytes." msgstr "" -#: ../linkchecker:352 -msgid "" -"Check HTTP anchor references. Default is not to check anchors.\n" -"This option enables logging of the warning 'url-anchor-not-found'." -msgstr "" - -#: ../linkchecker:356 +#: ../linkchecker:322 msgid "" "Accept and send HTTP cookies according to RFC 2109. Only cookies\n" "which are sent back to the originating server are accepted.\n" @@ -2164,135 +1909,133 @@ msgid "" "information." msgstr "" -#: ../linkchecker:362 +#: ../linkchecker:328 msgid "" "Read a file with initial cookie data. The cookie data format is\n" "explained below." msgstr "" -#: ../linkchecker:366 -msgid "" -"Only check syntax of URLs matching the given regular expression.\n" -" This option can be given multiple times." +#: ../linkchecker:331 +msgid "Check also external URLs." msgstr "" -#: ../linkchecker:370 +#: ../linkchecker:334 msgid "" -"Check but do not recurse into URLs matching the given regular\n" -"expression. This option can be given multiple times." +"Only check syntax of URLs matching the given regular expression.\n" +" This option can be given multiple times." msgstr "" -#: ../linkchecker:374 +#: ../linkchecker:341 msgid "" "Specify an NNTP server for 'news:...' links. Default is the\n" "environment variable NNTP_SERVER. If no host is given,\n" "only the syntax of the link is checked." msgstr "" -#: ../linkchecker:380 +#: ../linkchecker:347 msgid "" "Read a password from console and use it for HTTP and FTP authorization.\n" "For FTP the default password is 'anonymous@'. For HTTP there is\n" "no default password. See also -u." msgstr "" -#: ../linkchecker:386 +#: ../linkchecker:353 msgid "" "Pause the given number of seconds between two subsequent connection\n" "requests to the same host. Default is no pause between requests." msgstr "" -#: ../linkchecker:391 +#: ../linkchecker:358 msgid "" "Check recursively all links up to given depth. A negative depth\n" "will enable infinite recursion. Default depth is infinite." msgstr "" -#: ../linkchecker:396 +#: ../linkchecker:363 #, python-format msgid "" "Set the timeout for connection attempts in seconds. The default\n" "timeout is %d seconds." msgstr "" -#: ../linkchecker:400 +#: ../linkchecker:367 msgid "" "Try the given username for HTTP and FTP authorization.\n" "For FTP the default username is 'anonymous'. For HTTP there is\n" "no default username. See also -p." msgstr "" -#: ../linkchecker:405 +#: ../linkchecker:372 msgid "" "Specify the User-Agent string to send to the HTTP server, for example\n" "\"Mozilla/4.0\". The default is \"LinkChecker/X.Y\" where X.Y is the current\n" "version of LinkChecker." msgstr "" -#: ../linkchecker:439 +#: ../linkchecker:406 #, python-format msgid "Invalid debug level %(level)r" msgstr "" -#: ../linkchecker:452 +#: ../linkchecker:419 #, python-format msgid "Unreadable config file: %r" msgstr "" -#: ../linkchecker:460 +#: ../linkchecker:427 msgid "Running with python -O disables debugging." msgstr "" -#: ../linkchecker:483 ../linkchecker:515 +#: ../linkchecker:451 ../linkchecker:483 #, python-format msgid "Unknown logger type %(type)r in %(output)r for option %(option)s" msgstr "" -#: ../linkchecker:487 ../linkchecker:521 +#: ../linkchecker:455 ../linkchecker:489 #, python-format msgid "Unknown encoding %(encoding)r in %(output)r for option %(option)s" msgstr "" -#: ../linkchecker:533 +#: ../linkchecker:501 #, python-format msgid "Enter LinkChecker HTTP/FTP password for user %(user)s:" msgstr "" -#: ../linkchecker:536 +#: ../linkchecker:504 msgid "Enter LinkChecker HTTP/FTP password:" msgstr "" -#: ../linkchecker:543 ../linkchecker:561 +#: ../linkchecker:511 ../linkchecker:529 #, python-format msgid "Illegal argument %(arg)r for option %(option)s" msgstr "" -#: ../linkchecker:599 +#: ../linkchecker:560 #, python-format msgid "Enter LinkChecker password for user %(user)s at %(strpattern)s:" msgstr "" -#: ../linkchecker:620 +#: ../linkchecker:581 #, python-format msgid "Could not parse cookie file: %s" msgstr "" -#: ../linkchecker:635 +#: ../linkchecker:596 msgid "no files or URLs given" msgstr "" -#: ../linkchecker:640 +#: ../linkchecker:601 #, python-format msgid "" "Overwrite profiling file %(file)r?\n" "Press Ctrl-C to cancel, RETURN to continue." msgstr "" -#: ../linkchecker:646 +#: ../linkchecker:607 msgid "Canceled." msgstr "" -#: ../linkchecker:650 +#: ../linkchecker:611 msgid "" "The `cProfile' Python module is not installed, therefore the --profile option " "is disabled." diff --git a/requirements.txt b/requirements.txt index 784a63985..bf73c5320 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ # required: +requests # optional: argcomplete twill diff --git a/setup.py b/setup.py index f32405186..5c863626e 100644 --- a/setup.py +++ b/setup.py @@ -97,7 +97,7 @@ has_py2app = False # the application version -AppVersion = "8.7" +AppVersion = "9.0" # the application name AppName = "LinkChecker" @@ -915,6 +915,8 @@ def build_post_data(self, action): 'linkcheck.HtmlParser', 'linkcheck.logger', 'linkcheck.network', + 'linkcheck.parser', + 'linkcheck.plugins', 'linkcheck_dns.dns', 'linkcheck_dns.dns.rdtypes', 'linkcheck_dns.dns.rdtypes.ANY', diff --git a/tests/__init__.py b/tests/__init__.py index 28537ee5b..fb9a16f7a 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -24,6 +24,10 @@ from linkcheck import LinkCheckerInterrupt, winutil +basedir = os.path.dirname(__file__) +linkchecker_cmd = os.path.join(os.path.dirname(basedir), "linkchecker") + + class memoized (object): """Decorator that caches a function's return value each time it is called. If called later with the same arguments, the cached value is returned, and @@ -49,16 +53,32 @@ def __repr__(self): return self.func.__doc__ -def _run (cmd): +def run (cmd, verbosity=0, **kwargs): + """Run command without error checking. + @return: command return code""" + if kwargs.get("shell"): + # for shell calls the command must be a string + cmd = " ".join(cmd) + return subprocess.call(cmd, **kwargs) + + +def run_checked (cmd, ret_ok=(0,), **kwargs): + """Run command and raise OSError on error.""" + retcode = run(cmd, **kwargs) + if retcode not in ret_ok: + msg = "Command `%s' returned non-zero exit status %d" % (cmd, retcode) + raise OSError(msg) + return retcode + + + +def run_silent (cmd): """Run given command without output.""" null = open(os.name == 'nt' and ':NUL' or "/dev/null", 'w') try: - try: - return subprocess.call(cmd, stdout=null, stderr=subprocess.STDOUT) - finally: - null.close() - except OSError: - return -1 + return run(cmd, stdout=null, stderr=subprocess.STDOUT) + finally: + null.close() def _need_func (testfunc, name): @@ -91,7 +111,7 @@ def has_network (): @memoized def has_msgfmt (): """Test if msgfmt is available.""" - return _run(["msgfmt", "-V"]) == 0 + return run_silent(["msgfmt", "-V"]) == 0 need_msgfmt = _need_func(has_msgfmt, "msgfmt") diff --git a/tests/checker/__init__.py b/tests/checker/__init__.py index a271d80c2..13a7024f0 100644 --- a/tests/checker/__init__.py +++ b/tests/checker/__init__.py @@ -127,13 +127,12 @@ def get_test_aggregate (confargs, logargs): add_fileoutput_config(config) # uncomment for debugging #config.init_logging(None, debug=["all"]) - config["anchors"] = True config["verbose"] = True - config["complete"] = True config['threads'] = 0 config['status'] = False - config['cookies'] = True + config["checkextern"] = True config.update(confargs) + config.sanitize() return linkcheck.director.get_aggregate(config) diff --git a/tests/checker/data/Bookmarks.result b/tests/checker/data/Bookmarks.result index 0810797fb..eb1e2af29 100644 --- a/tests/checker/data/Bookmarks.result +++ b/tests/checker/data/Bookmarks.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/Bookmarks cache key file://%(curdir)s/%(datadir)s/Bookmarks real url file://%(curdir)s/%(datadir)s/Bookmarks name %(datadir)s/Bookmarks -info 1 URL parsed. valid url http://example.com/ diff --git a/tests/checker/data/a b/bl.html b/tests/checker/data/a b/bl.html index 0765d8305..1c87a3965 100644 --- a/tests/checker/data/a b/bl.html +++ b/tests/checker/data/a b/bl.html @@ -1,3 +1,2 @@ -Broken link -Broken link target +link External link diff --git a/tests/checker/data/a b/el.html b/tests/checker/data/a b/el.html index 82d3502f3..ef5dd7183 100644 --- a/tests/checker/data/a b/el.html +++ b/tests/checker/data/a b/el.html @@ -1,2 +1 @@ -Broken link -Broken link target +External link diff --git a/tests/checker/data/anchor.html b/tests/checker/data/anchor.html index eb3d6e293..7bab4f61b 100644 --- a/tests/checker/data/anchor.html +++ b/tests/checker/data/anchor.html @@ -1,6 +1,3 @@ -Bla diff --git a/tests/checker/data/anchor.html.result b/tests/checker/data/anchor.html.result deleted file mode 100644 index 9c420884c..000000000 --- a/tests/checker/data/anchor.html.result +++ /dev/null @@ -1,26 +0,0 @@ -url file://%(curdir)s/%(datadir)s/anchor.html -cache key file://%(curdir)s/%(datadir)s/anchor.html -real url file://%(curdir)s/%(datadir)s/anchor.html -name %(datadir)s/anchor.html -info 3 URLs parsed. -valid - -url #myid%%3A -cache key file://%(curdir)s/%(datadir)s/anchor.html#myid%%3A -real url file://%(curdir)s/%(datadir)s/anchor.html -name Bla -valid - -url #broken -cache key file://%(curdir)s/%(datadir)s/anchor.html#broken -real url file://%(curdir)s/%(datadir)s/anchor.html -name Bla3 -warning Anchor `broken' not found. Available anchors: `myid:'. -valid - -url #broken2 -cache key file://%(curdir)s/%(datadir)s/anchor.html#broken2 -real url file://%(curdir)s/%(datadir)s/anchor.html -name Bla4 -warning Anchor `broken2' not found. Available anchors: `myid:'. -valid diff --git a/tests/checker/data/archive.html.result b/tests/checker/data/archive.html.result index a4cd630e8..f6cf9af81 100644 --- a/tests/checker/data/archive.html.result +++ b/tests/checker/data/archive.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/archive.html cache key file://%(curdir)s/%(datadir)s/archive.html real url file://%(curdir)s/%(datadir)s/archive.html name %(datadir)s/archive.html -info 2 URLs parsed. valid url file.html diff --git a/tests/checker/data/base1.html.result b/tests/checker/data/base1.html.result index 4cf167198..a5f8249b3 100644 --- a/tests/checker/data/base1.html.result +++ b/tests/checker/data/base1.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/base1.html cache key file://%(curdir)s/%(datadir)s/base1.html real url file://%(curdir)s/%(datadir)s/base1.html name %(datadir)s/base1.html -info 2 URLs parsed. valid url base2.html diff --git a/tests/checker/data/base2.html.result b/tests/checker/data/base2.html.result index 442f30a74..b35747778 100644 --- a/tests/checker/data/base2.html.result +++ b/tests/checker/data/base2.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/base2.html cache key file://%(curdir)s/%(datadir)s/base2.html real url file://%(curdir)s/%(datadir)s/base2.html name %(datadir)s/base2.html -info 1 URL parsed. valid url test.txt diff --git a/tests/checker/data/base3.html.result b/tests/checker/data/base3.html.result index 98e338671..d34a07d9a 100644 --- a/tests/checker/data/base3.html.result +++ b/tests/checker/data/base3.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/base3.html cache key file://%(curdir)s/%(datadir)s/base3.html real url file://%(curdir)s/%(datadir)s/base3.html name %(datadir)s/base3.html -info 1 URL parsed. valid url test.txt diff --git a/tests/checker/data/base4.html.result b/tests/checker/data/base4.html.result index 87463be9f..574be5fb1 100644 --- a/tests/checker/data/base4.html.result +++ b/tests/checker/data/base4.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/base4.html cache key file://%(curdir)s/%(datadir)s/base4.html real url file://%(curdir)s/%(datadir)s/base4.html name %(datadir)s/base4.html -info 1 URL parsed. valid url test.txt diff --git a/tests/checker/data/dir.result b/tests/checker/data/dir.result index fc9c385d3..1b8935025 100644 --- a/tests/checker/data/dir.result +++ b/tests/checker/data/dir.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/dir/ cache key file://%(curdir)s/%(datadir)s/dir/ real url file://%(curdir)s/%(datadir)s/dir/ name %(datadir)s/dir -info 1 URL parsed. valid url %%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat diff --git a/tests/checker/data/file.css.result b/tests/checker/data/file.css.result index 297f06171..9029bd0bc 100644 --- a/tests/checker/data/file.css.result +++ b/tests/checker/data/file.css.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/file.css cache key file://%(curdir)s/%(datadir)s/file.css real url file://%(curdir)s/%(datadir)s/file.css name %(datadir)s/file.css -info 2 URLs parsed. valid url file.html diff --git a/tests/checker/data/file.doc.result b/tests/checker/data/file.doc.result index b2b13da2e..e539130b7 100644 --- a/tests/checker/data/file.doc.result +++ b/tests/checker/data/file.doc.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/file.doc cache key file://%(curdir)s/%(datadir)s/file.doc real url file://%(curdir)s/%(datadir)s/file.doc name %(datadir)s/file.doc -info 1 URL parsed. valid url http://www.example.org/ diff --git a/tests/checker/data/file.html b/tests/checker/data/file.html index 61eff6858..f5356b514 100644 --- a/tests/checker/data/file.html +++ b/tests/checker/data/file.html @@ -1,6 +1,3 @@ relative url -bad anchor -good anchor javascript url - anchor diff --git a/tests/checker/data/file.html.result b/tests/checker/data/file.html.result index ed7373aa6..a43ee61df 100644 --- a/tests/checker/data/file.html.result +++ b/tests/checker/data/file.html.result @@ -2,25 +2,11 @@ url file://%(curdir)s/%(datadir)s/file.html cache key file://%(curdir)s/%(datadir)s/file.html real url file://%(curdir)s/%(datadir)s/file.html name %(datadir)s/file.html -info 4 URLs parsed. -valid - -url file.html#isnix -cache key file://%(curdir)s/%(datadir)s/file.html#isnix -real url file://%(curdir)s/%(datadir)s/file.html -name bad anchor -warning Anchor `isnix' not found. Available anchors: `iswas'. -valid - -url file.html#iswas -cache key file://%(curdir)s/%(datadir)s/file.html#iswas -real url file://%(curdir)s/%(datadir)s/file.html -name good anchor valid url javascript:loadthis() cache key javascript:loadthis() real url javascript:loadthis() name javascript url -warning Javascript URL ignored. +info Javascript URL ignored. valid diff --git a/tests/checker/data/file.php.result b/tests/checker/data/file.php.result index 741431099..20d7a2ed0 100644 --- a/tests/checker/data/file.php.result +++ b/tests/checker/data/file.php.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/file.php cache key file://%(curdir)s/%(datadir)s/file.php real url file://%(curdir)s/%(datadir)s/file.php name %(datadir)s/file.php -info 3 URLs parsed. valid url anchor.html @@ -15,12 +14,12 @@ url test_ cache key file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E real url file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E name PHP 1 -warning File URL ignored. +info File URL ignored. valid url test_ cache key file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E real url file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E name PHP 2 -warning File URL ignored. +info File URL ignored. valid diff --git a/tests/checker/data/file.wml.result b/tests/checker/data/file.wml.result index 4de180346..881eef52b 100644 --- a/tests/checker/data/file.wml.result +++ b/tests/checker/data/file.wml.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/file.wml cache key file://%(curdir)s/%(datadir)s/file.wml real url file://%(curdir)s/%(datadir)s/file.wml name %(datadir)s/file.wml -info 2 URLs parsed. valid url file.html diff --git a/tests/checker/data/frames.html.result b/tests/checker/data/frames.html.result index 45e52d66f..660bc4e6b 100644 --- a/tests/checker/data/frames.html.result +++ b/tests/checker/data/frames.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/frames.html cache key file://%(curdir)s/%(datadir)s/frames.html real url file://%(curdir)s/%(datadir)s/frames.html name %(datadir)s/frames.html -info 2 URLs parsed. valid url file.html diff --git a/tests/checker/data/html5.html.result b/tests/checker/data/html5.html.result index ff3954ec8..911a0ad39 100644 --- a/tests/checker/data/html5.html.result +++ b/tests/checker/data/html5.html.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/html5.html cache key file://%(curdir)s/%(datadir)s/html5.html real url file://%(curdir)s/%(datadir)s/html5.html name %(datadir)s/html5.html -info 6 URLs parsed. valid url file.asc diff --git a/tests/checker/data/http.html b/tests/checker/data/http.html index 6dae00da0..c53104961 100644 --- a/tests/checker/data/http.html +++ b/tests/checker/data/http.html @@ -22,5 +22,5 @@ - -UnicodeError + +Error diff --git a/tests/checker/data/http.html.result b/tests/checker/data/http.html.result index 1de280908..27debb667 100644 --- a/tests/checker/data/http.html.result +++ b/tests/checker/data/http.html.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/http.html cache key http://localhost:%(port)d/%(datadir)s/http.html real url http://localhost:%(port)d/%(datadir)s/http.html -info 14 URLs parsed. valid url dns://www.example.org @@ -63,18 +62,16 @@ valid url clsid:12345-67890 cache key clsid:12345-67890 real url clsid:12345-67890 -warning Clsid URL ignored. +info Clsid URL ignored. valid url http://example.org/foo/ #a=1,2,3 -cache key http://example.org/foo/%%20#a%%3D1%%2C2%%2C3 +cache key http://example.org/foo/%%20 real url http://example.org/foo/%%20#a%%3D1%%2C2%%2C3 -warning Anchor `a%%3D1%%2C2%%2C3' not found. Available anchors: -. -valid +error url http://.example.org/ cache key http://.example.org/ real url http://.example.org/ -name UnicodeError -warning Access denied by robots.txt, skipping content checks. +name Error error diff --git a/tests/checker/data/http.xhtml.result b/tests/checker/data/http.xhtml.result index 82fe08ba5..bc741a464 100644 --- a/tests/checker/data/http.xhtml.result +++ b/tests/checker/data/http.xhtml.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/http.xhtml cache key http://localhost:%(port)d/%(datadir)s/http.xhtml real url http://localhost:%(port)d/%(datadir)s/http.xhtml -info 1 URL parsed. valid url http.html diff --git a/tests/checker/data/http_file.html.result b/tests/checker/data/http_file.html.result index 7f4b1d95e..f2af97a88 100644 --- a/tests/checker/data/http_file.html.result +++ b/tests/checker/data/http_file.html.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/http_file.html cache key http://localhost:%(port)d/%(datadir)s/http_file.html real url http://localhost:%(port)d/%(datadir)s/http_file.html -info 1 URL parsed. valid url file:///example/file diff --git a/tests/checker/data/http_lowercase.html.result b/tests/checker/data/http_lowercase.html.result index 56e81a361..246c9c7e3 100644 --- a/tests/checker/data/http_lowercase.html.result +++ b/tests/checker/data/http_lowercase.html.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/http_lowercase.html cache key http://localhost:%(port)d/%(datadir)s/http_lowercase.html real url http://localhost:%(port)d/%(datadir)s/http_lowercase.html -info 2 URLs parsed. valid url HtTP://WwW.ExaMple.cOm/ diff --git a/tests/checker/data/http_quotes.html.result b/tests/checker/data/http_quotes.html.result index c0ac00107..224e7b889 100644 --- a/tests/checker/data/http_quotes.html.result +++ b/tests/checker/data/http_quotes.html.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/http_quotes.html cache key http://localhost:%(port)d/%(datadir)s/http_quotes.html real url http://localhost:%(port)d/%(datadir)s/http_quotes.html -info 3 URLs parsed. valid url http://example.com/ diff --git a/tests/checker/data/http_slash.html b/tests/checker/data/http_slash.html index 392e8034c..b507fca30 100644 --- a/tests/checker/data/http_slash.html +++ b/tests/checker/data/http_slash.html @@ -1,7 +1,7 @@ -ok example -one slash example -no slash example -no scheme example +ok example +one slash example +no slash example +no scheme example no url no url, one slash no url, no slash diff --git a/tests/checker/data/http_slash.html.result b/tests/checker/data/http_slash.html.result index 9e3b490d3..c1829be64 100644 --- a/tests/checker/data/http_slash.html.result +++ b/tests/checker/data/http_slash.html.result @@ -1,48 +1,41 @@ url http://localhost:%(port)d/%(datadir)s/http_slash.html cache key http://localhost:%(port)d/%(datadir)s/http_slash.html real url http://localhost:%(port)d/%(datadir)s/http_slash.html -info 7 URLs parsed. valid -url http://www.example.com/1 -cache key http://www.example.com/1 -real url http://www.example.com/1 -name ok example -valid - -url http:/www.example.com/2 +url http: cache key None -real url http:///www.example.com/2 -name one slash example +real url http:// +name no url, no slash error -url http:www.example.com/3 +url http:/ cache key None -real url http:///www.example.com/3 -name no slash example +real url http:/// +name no url, one slash error -url //www.example.com/4 -cache key http://www.example.com/4 -real url http://www.example.com/4 -name no scheme example -valid - url http:// cache key None real url http:// name no url error -url http:/ +url http:www.example.com/ cache key None -real url http:/// -name no url, one slash +real url http:///www.example.com/ +name no slash example error -url http: +url http:/www.example.com/ cache key None -real url http:// -name no url, no slash +real url http:///www.example.com/ +name one slash example error +url http://www.example.com/ +cache key http://www.example.com/ +real url http://www.example.com/ +name ok example +valid + diff --git a/tests/checker/data/misc.html.result b/tests/checker/data/misc.html.result index 89ae37fe7..1ea86e3a3 100644 --- a/tests/checker/data/misc.html.result +++ b/tests/checker/data/misc.html.result @@ -2,9 +2,13 @@ url file://%(curdir)s/%(datadir)s/misc.html cache key file://%(curdir)s/%(datadir)s/misc.html real url file://%(curdir)s/%(datadir)s/misc.html name %(datadir)s/misc.html -info 5 URLs parsed. valid +url +cache key None +real url +error + url http://www.example.com/ cache key http://www.example.com/ real url http://www.example.com/ @@ -15,11 +19,6 @@ cache key file://%(curdir)s/%(datadir)s/favicon.ico real url file://%(curdir)s/%(datadir)s/favicon.ico valid -url -cache key None -real url -error - url test.swf cache key file://%(curdir)s/%(datadir)s/test.swf real url file://%(curdir)s/%(datadir)s/test.swf diff --git a/tests/checker/data/opera6.adr.result b/tests/checker/data/opera6.adr.result index d47f1b2bc..15b73d53d 100644 --- a/tests/checker/data/opera6.adr.result +++ b/tests/checker/data/opera6.adr.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/opera6.adr cache key file://%(curdir)s/%(datadir)s/opera6.adr real url file://%(curdir)s/%(datadir)s/opera6.adr name %(datadir)s/opera6.adr -info 1 URL parsed. valid url file.css diff --git a/tests/checker/data/plist_binary/Bookmarks.plist.result b/tests/checker/data/plist_binary/Bookmarks.plist.result index ea00f43f2..1c4ea430b 100644 --- a/tests/checker/data/plist_binary/Bookmarks.plist.result +++ b/tests/checker/data/plist_binary/Bookmarks.plist.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist cache key file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist real url file://%(curdir)s/%(datadir)s/plist_binary/Bookmarks.plist name %(datadir)s/plist_binary/Bookmarks.plist -info 2 URLs parsed. valid url http://www.example.com/ diff --git a/tests/checker/data/plist_xml/Bookmarks.plist.result b/tests/checker/data/plist_xml/Bookmarks.plist.result index 22f050389..04644259d 100644 --- a/tests/checker/data/plist_xml/Bookmarks.plist.result +++ b/tests/checker/data/plist_xml/Bookmarks.plist.result @@ -4,7 +4,6 @@ url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist cache key file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist real url file://%(curdir)s/%(datadir)s/plist_xml/Bookmarks.plist name %(datadir)s/plist_xml/Bookmarks.plist -info 2 URLs parsed. valid url http://www.example.com/ diff --git a/tests/checker/data/redir.html.result b/tests/checker/data/redir.html.result index 1ea7438d8..02a5a93c2 100644 --- a/tests/checker/data/redir.html.result +++ b/tests/checker/data/redir.html.result @@ -1,7 +1,6 @@ url http://localhost:%(port)d/%(datadir)s/redir.html cache key http://localhost:%(port)d/%(datadir)s/redir.html real url http://localhost:%(port)d/%(datadir)s/redir.html -info 1 URL parsed. valid url redirect_newhost.html diff --git a/tests/checker/data/urllist.txt b/tests/checker/data/urllist.txt index 466f29430..735789cd7 100644 --- a/tests/checker/data/urllist.txt +++ b/tests/checker/data/urllist.txt @@ -3,7 +3,5 @@ # comments are ignored file.html -file.html#isnix -file.html#iswas javascript:loadthis() diff --git a/tests/checker/data/urllist.txt.result b/tests/checker/data/urllist.txt.result index 45dda334f..5de36144a 100644 --- a/tests/checker/data/urllist.txt.result +++ b/tests/checker/data/urllist.txt.result @@ -2,7 +2,6 @@ url file://%(curdir)s/%(datadir)s/urllist.txt cache key file://%(curdir)s/%(datadir)s/urllist.txt real url file://%(curdir)s/%(datadir)s/urllist.txt name %(datadir)s/urllist.txt -info 4 URLs parsed. valid url file.html @@ -10,19 +9,8 @@ cache key file://%(curdir)s/%(datadir)s/file.html real url file://%(curdir)s/%(datadir)s/file.html valid -url file.html#isnix -cache key file://%(curdir)s/%(datadir)s/file.html#isnix -real url file://%(curdir)s/%(datadir)s/file.html -warning Anchor `isnix' not found. Available anchors: `iswas'. -valid - -url file.html#iswas -cache key file://%(curdir)s/%(datadir)s/file.html#iswas -real url file://%(curdir)s/%(datadir)s/file.html -valid - url javascript:loadthis() cache key javascript:loadthis() real url javascript:loadthis() -warning Javascript URL ignored. +info Javascript URL ignored. valid diff --git a/tests/checker/httpserver.py b/tests/checker/httpserver.py index a617150f8..94fc2cb1f 100644 --- a/tests/checker/httpserver.py +++ b/tests/checker/httpserver.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -235,7 +235,7 @@ def redirect_newhost (self): def redirect_newscheme (self): """Redirect request to a new scheme.""" if "file" in self.path: - path = "file:README" + path = "file:README.md" else: path = "ftp://example.com/" self.send_response(302) diff --git a/tests/checker/test_anchor.py b/tests/checker/test_anchor.py index 03367455b..23c0122e7 100644 --- a/tests/checker/test_anchor.py +++ b/tests/checker/test_anchor.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2009 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,4 +26,17 @@ class TestAnchor (LinkCheckTest): """ def test_anchor (self): - self.file_test("anchor.html") + confargs = {"enabledplugins": ["AnchorCheck"]} + url = u"file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs() + nurl = self.norm(url) + anchor = "broken" + urlanchor = url + "#" + anchor + resultlines = [ + u"url %s" % urlanchor, + u"cache key %s" % nurl, + u"real url %s" % nurl, + u"warning Anchor `%s' not found. Available anchors: `myid:'." % anchor, + u"valid", + ] + self.direct(urlanchor, resultlines, confargs=confargs) + diff --git a/tests/checker/test_error.py b/tests/checker/test_error.py index c77a4898c..5836ac97d 100644 --- a/tests/checker/test_error.py +++ b/tests/checker/test_error.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2011 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -40,7 +40,7 @@ def test_unrecognized (self): def test_invalid1 (self): # invalid scheme chars - url = u"äöü?:" + url = u"äöü:" attrs = self.get_attrs(url=url) attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs) resultlines = [ @@ -54,7 +54,7 @@ def test_invalid1 (self): def test_invalid2 (self): # missing scheme alltogether - url = u"?äöü?" + url = u"äöü" attrs = self.get_attrs(url=url) attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs) resultlines = [ diff --git a/tests/checker/test_file.py b/tests/checker/test_file.py index 12be5a269..c8fe837ae 100644 --- a/tests/checker/test_file.py +++ b/tests/checker/test_file.py @@ -138,30 +138,22 @@ def test_good_dir_space (self): nurl = self.norm(url) url2 = u"file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs() nurl2 = self.norm(url2) + url3 = u"file://%(curdir)s/%(datadir)s/a b/t.txt" % self.get_attrs() + nurl3 = self.norm(url3) resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"info 2 URLs parsed.", - u"valid", - u"url bl.html#bl", - u"cache key %s#bl" % nurl, - u"real url %s" % nurl, - u"name Broken link", - u"info 2 URLs parsed.", - u"warning Anchor `bl' not found. Available anchors: `BL'.", u"valid", u"url el.html", u"cache key %s" % nurl2, u"real url %s" % nurl2, u"name External link", - u"info 1 URL parsed.", u"valid", - u"url #bl", - u"cache key %s#bl" % nurl2, - u"real url %s" % nurl2, - u"name Broken link", - u"warning Anchor `bl' not found. Available anchors: `BL'.", + u"url t.txt", + u"cache key %s" % nurl3, + u"real url %s" % nurl3, + u"name External link", u"valid", ] self.direct(url, resultlines, recursionlevel=2) diff --git a/tests/checker/test_http.py b/tests/checker/test_http.py index 6d07dadd4..623780f7d 100644 --- a/tests/checker/test_http.py +++ b/tests/checker/test_http.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -48,14 +48,9 @@ def _test_status(self, status): ] if status in (204,): resultlines.append(u"warning No Content") - elif status == 401: - resultlines.append(u"warning Unauthorized access without HTTP authentication.") - elif status in (301, 302): - resultlines.append(u"info Redirected to `%s'." % url) - if (status != 101 and status < 200) or status >= 400 or status in (301, 302, 305): + if (status != 101 and status < 200) or status >= 400: result = u"error" else: result = u"valid" resultlines.append(result) self.direct(url, resultlines, recursionlevel=0) - diff --git a/tests/checker/test_http_misc.py b/tests/checker/test_http_misc.py index 7a9f49fda..096c8aca9 100644 --- a/tests/checker/test_http_misc.py +++ b/tests/checker/test_http_misc.py @@ -35,7 +35,6 @@ def swf_test (self): u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, - u"info 1 URL parsed.", u"valid", u"url http://www.example.org/", u"cache key http://www.example.org/", @@ -47,15 +46,15 @@ def swf_test (self): def obfuscate_test (self): if os.name != "posix" or sys.platform != 'linux2': return - host = "www.google.de" - ip = iputil.resolve_host(host).pop() + host = "www.heise.de" + ip = iputil.resolve_host(host)[0] url = u"http://%s/" % iputil.obfuscate_ip(ip) - rurl = u"http://%s/" % host + rurl = u"http://%s/" % ip resultlines = [ u"url %s" % url, - u"cache key %s" % url, + u"cache key %s" % rurl, u"real url %s" % rurl, - u"info Redirected to `%s'." % rurl, + u"info Access denied by robots.txt, checked only syntax.", u"warning URL %s has obfuscated IP address %s" % (url, ip), u"valid", ] diff --git a/tests/checker/test_http_redirect.py b/tests/checker/test_http_redirect.py index 7ba2fd692..163b1058b 100644 --- a/tests/checker/test_http_redirect.py +++ b/tests/checker/test_http_redirect.py @@ -56,13 +56,11 @@ def redirect2 (self): u"cache key %s" % nurl, u"real url %s" % rurl, u"info Redirected to `%s'." % rurl, - u"info 1 URL parsed.", u"valid", u"url newurl.html", u"cache key %s" % rurl, u"real url %s" % rurl, u"name Recursive Redirect", - u"info 1 URL parsed.", u"valid", ] self.direct(url, resultlines, recursionlevel=99) @@ -75,16 +73,17 @@ def redirect3 (self): def redirect4 (self): url = u"http://localhost:%d/redirect_newscheme_ftp" % self.port nurl = url - rurl = u"ftp://example.com/" + #rurl = u"ftp://example.com/" resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"info Redirected to `%s'." % rurl, - u"valid", - u"url %s" % rurl, - u"cache key %s" % rurl, - u"real url %s" % rurl, + # don't allow ftp redirects + #u"info Redirected to `%s'." % rurl, + #u"valid", + #u"url %s" % rurl, + #u"cache key %s" % rurl, + #u"real url %s" % rurl, u"error", ] self.direct(url, resultlines, recursionlevel=99) @@ -92,15 +91,15 @@ def redirect4 (self): def redirect5 (self): url = u"http://localhost:%d/redirect_newscheme_file" % self.port nurl = url - rurl = u"file:README" - rnurl = u"file:///README" + #rurl = u"file:README" + #rnurl = u"file:///README" resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"info Redirected to `%s'." % rurl, - u"warning Redirection to url `%s' is not allowed." % rnurl, - u"valid", + # don't allow file redirects + #u"info Redirected to `%s'." % rurl, + #u"warning Redirection to url `%s' is not allowed." % rnurl, + u"error", ] self.direct(url, resultlines, recursionlevel=99) - diff --git a/tests/checker/test_http_robots.py b/tests/checker/test_http_robots.py index da06aac8a..d41f1a57d 100644 --- a/tests/checker/test_http_robots.py +++ b/tests/checker/test_http_robots.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -42,7 +42,7 @@ def robots_txt2_test (self): u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, - u"warning Access denied by robots.txt, skipping content checks.", - u"error", + u"info Access denied by robots.txt, checked only syntax.", + u"valid", ] self.direct(url, resultlines, recursionlevel=5) diff --git a/tests/checker/test_https.py b/tests/checker/test_https.py index d21b85146..8de4ebf94 100644 --- a/tests/checker/test_https.py +++ b/tests/checker/test_https.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -28,11 +28,13 @@ class TestHttps (LinkCheckTest): @need_network def test_https (self): - url = u"https://www.amazon.de/" + url = u"https://www.amazon.com/" + rurl = u"http://www.amazon.com/" resultlines = [ u"url %s" % url, u"cache key %s" % url, - u"real url %s" % url, + u"real url %s" % rurl, + u"info Redirected to `%s'." % rurl, u"valid", ] - self.direct(url, resultlines) + self.direct(url, resultlines, recursionlevel=0) diff --git a/tests/checker/test_https_redirect.py b/tests/checker/test_https_redirect.py index bee2e125d..7f30ca038 100644 --- a/tests/checker/test_https_redirect.py +++ b/tests/checker/test_https_redirect.py @@ -29,16 +29,17 @@ def __init__(self, methodName='runTest'): def test_redirect (self): url = u"http://localhost:%d/redirect1" % self.port nurl = url - rurl = u"https://localhost:%d/newurl1" % self.port + #rurl = u"https://localhost:%d/newurl1" % self.port resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % url, - u"info Redirected to `%s'." % rurl.replace('http:', 'https:'), - u"valid", - u"url %s" % rurl, - u"cache key %s" % rurl, - u"real url %s" % rurl, + # XXX the redirect fails because this is not an SSL server + #u"info Redirected to `%s'." % rurl.replace('http:', 'https:'), + #u"valid", + #u"url %s" % rurl, + #u"cache key %s" % rurl, + #u"real url %s" % rurl, u"error", ] self.direct(url, resultlines, recursionlevel=0) diff --git a/tests/checker/test_mail_bad.py b/tests/checker/test_mail_bad.py index 97e30105b..b9280dfb5 100644 --- a/tests/checker/test_mail_bad.py +++ b/tests/checker/test_mail_bad.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ Test mail checking of bad mail addresses. """ from . import MailTest +from linkcheck.checker.const import URL_WARN_LENGTH class TestMailBad (MailTest): @@ -28,11 +29,11 @@ def test_error_mail (self): self.mail_error(u"mailto:@") self.mail_error(u"mailto:@example.org") self.mail_error(u"mailto:a@") - url_too_long = "URL length %d is longer than 255." - url = u"mailto:%s@%s" % (u"a"*60, u"b"*200) + url_too_long = "URL length %%d is longer than %d." % URL_WARN_LENGTH + url = u"mailto:%s@%s" % (u"a"*60, u"b"*(URL_WARN_LENGTH - 60)) warning = url_too_long % len(url) self.mail_error(url, warning=warning) - url = u"mailto:a@%s" % (u"a"*256) + url = u"mailto:a@%s" % (u"a"*URL_WARN_LENGTH) warning = url_too_long % len(url) self.mail_error(url, warning=warning) self.mail_error(u"mailto:%s@example.org" % (u"a"*65)) diff --git a/tests/checker/test_mail_good.py b/tests/checker/test_mail_good.py index 932c88c14..377c492dd 100644 --- a/tests/checker/test_mail_good.py +++ b/tests/checker/test_mail_good.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -35,7 +35,6 @@ def test_good_mail (self): u"url %s" % url, u"cache key mailto:calvin@users.sourceforge.net", u"real url %s" % url, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", u"valid", ] self.direct(url, resultlines) @@ -45,7 +44,6 @@ def test_good_mail (self): u"url %s" % url, u"cache key mailto:calvin@users.sourceforge.net", u"real url %s" % url, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", u"valid", ] self.direct(url, resultlines) @@ -54,7 +52,6 @@ def test_good_mail (self): u"url %s" % url, u"cache key mailto:calvin@users.sourceforge.net", u"real url %s" % url, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", u"valid", ] self.direct(url, resultlines) @@ -63,7 +60,6 @@ def test_good_mail (self): u"url %s" % url, u"cache key mailto:o'hara@users.sourceforge.net", u"real url %s" % url, - u"warning Unverified address: 550 Unrouteable address.", u"valid", ] self.direct(url, resultlines) @@ -74,9 +70,6 @@ def test_good_mail (self): u"cache key mailto:calvin@users.sourceforge.net," u"calvin_CC@users.sourceforge.net,calvin_cc@users.sourceforge.net", u"real url %s" % url, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", - u"warning Unverified address: 550 Unrouteable address.", - u"warning Unverified address: 550 Unrouteable address.", u"valid", ] self.direct(url, resultlines) @@ -87,7 +80,6 @@ def test_good_mail (self): u"url %s" % url, u"cache key mailto:news-admins@freecode.com", u"real url %s" % url, - u"warning Unverified address: 502 5.5.1 VRFY command is disabled.", u"valid", ] self.direct(url, resultlines) @@ -102,7 +94,6 @@ def test_warn_mail (self): u"url %s" % url, u"cache key mailto:calvin@users.sourceforge.net", u"real url %s" % qurl, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", u"valid", ] self.direct(url, resultlines) @@ -112,7 +103,6 @@ def test_warn_mail (self): u"url %s" % url, u"cache key mailto:calvin@users.sourceforge.net", u"real url %s" % qurl, - u"info Verified address calvin@users.sourceforge.net: 250 is deliverable.", u"valid", ] self.direct(url, resultlines) @@ -131,7 +121,6 @@ def _mail_valid_unverified(self, char): addr = u'abc%sdef@sourceforge.net' % char url = u"mailto:%s" % addr self.mail_valid(url, - warning=u"Unverified address: 550 <%s> Unrouteable address." % addr, cache_key=url) @need_network @@ -162,7 +151,6 @@ def test_unicode_mail (self): u"url %s" % url, u"cache key %s" % mailto, u"real url %s" % url, - u"warning Unverified address: 550 Unrouteable address.", u"valid", ] self.direct(url, resultlines) diff --git a/tests/checker/test_unknown.py b/tests/checker/test_unknown.py index 831754555..689f18263 100644 --- a/tests/checker/test_unknown.py +++ b/tests/checker/test_unknown.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2010-2012 Bastian Kleineidam +# Copyright (C) 2010-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -30,7 +30,7 @@ def test_skype (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Skype URL ignored.", + u"info Skype URL ignored.", u"valid", ] self.direct(url, resultlines) @@ -42,7 +42,7 @@ def test_irc (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Irc URL ignored.", + u"info Irc URL ignored.", u"valid", ] self.direct(url, resultlines) @@ -52,7 +52,7 @@ def test_irc (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Ircs URL ignored.", + u"info Ircs URL ignored.", u"valid", ] self.direct(url, resultlines) @@ -64,7 +64,7 @@ def test_steam (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Steam URL ignored.", + u"info Steam URL ignored.", u"valid", ] self.direct(url, resultlines) @@ -76,7 +76,7 @@ def test_feed (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Feed URL ignored.", + u"info Feed URL ignored.", u"valid", ] self.direct(url, resultlines) @@ -86,7 +86,7 @@ def test_feed (self): u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % nurl, - u"warning Feed URL ignored.", + u"info Feed URL ignored.", u"valid", ] self.direct(url, resultlines) diff --git a/tests/checker/test_urllen.py b/tests/checker/test_urllen.py index 5bc70ebef..50b4faadc 100644 --- a/tests/checker/test_urllen.py +++ b/tests/checker/test_urllen.py @@ -18,6 +18,7 @@ Test URL length checks. """ from . import LinkCheckTest +from linkcheck.checker.const import URL_MAX_LENGTH, URL_WARN_LENGTH class TestURLLength(LinkCheckTest): @@ -26,19 +27,20 @@ class TestURLLength(LinkCheckTest): """ def test_url_warn(self): - url = u"http://www.example.org/%s" % (u"a" * 256) + url = u"http://www.example.org/" + (u"a" * URL_WARN_LENGTH) + urllen = len(url) attrs = self.get_attrs(url=url) resultlines = [ u"url %(url)s" % attrs, u"cache key %(url)s" % attrs, u"real url %(url)s" % attrs, - u"warning URL length 279 is longer than 255.", + u"warning URL length %d is longer than %d." % (urllen, URL_WARN_LENGTH), u"error", ] self.direct(url, resultlines) def test_url_error(self): - url = u"http://www.example.org/%s" % ("a" * 2000) + url = u"http://www.example.org/" + ("a" * URL_MAX_LENGTH) attrs = self.get_attrs(url=url) attrs['nurl'] = self.norm(url) resultlines = [ @@ -48,4 +50,3 @@ def test_url_error(self): u"error", ] self.direct(url, resultlines) - diff --git a/tests/configuration/data/config0.ini b/tests/configuration/data/config0.ini index 7420b56b5..7e5df6e0a 100644 --- a/tests/configuration/data/config0.ini +++ b/tests/configuration/data/config0.ini @@ -1,20 +1,20 @@ # config with test values [checking] +allowedschemes=http,https,ftp threads=5 timeout=42 -anchors=0 +aborttimeout=99 recursionlevel=1 -warningregex=Oracle DB Error -warnsizebytes=2000 nntpserver=example.org -cookies=1 cookiefile=blablabla useragent=Example/0.0 -pause=99 debugmemory=1 localwebroot=foo sslverify=/path/to/cacerts.crt -warnsslcertdaysvalid=99 +maxnumurls=1000 +maxrunseconds=1 +maxfilesizeparse=100 +maxfilesizedownload=100 [filtering] ignore= @@ -25,7 +25,8 @@ nofollow= # IMADOOFUS nofollow_imadoofus1 nofollow_imadoofus2 -ignorewarnings=url-unicode-domain,anchor-not-found +ignorewarnings=url-unicode-domain +checkextern=True [authentication] entry= @@ -46,7 +47,6 @@ debug=Thread status=0 log=xmL verbose=1 -complete=1 warnings=1 quiet=0 fileoutput = Text, html, Gml, sql,csv, xml, gxml, dot @@ -116,3 +116,12 @@ encoding=utf-8 filename=imadoofus.gxml parts=realurL encoding=utf-8 + +[AnchorCheck] +[CssSyntaxCheck] +[HtmlSyntaxCheck] +[LocationInfo] +[RegexCheck] +[SslCertificateCheck] +[VirusCheck] + diff --git a/tests/configuration/test_config.py b/tests/configuration/test_config.py index d217bd184..c29bb6df8 100644 --- a/tests/configuration/test_config.py +++ b/tests/configuration/test_config.py @@ -39,30 +39,31 @@ def test_confparse (self): files = [get_file("config0.ini")] config.read(files) # checking section + for scheme in ("http", "https", "ftp"): + self.assertTrue(scheme in config["allowedschemes"]) self.assertEqual(config["threads"], 5) self.assertEqual(config["timeout"], 42) - self.assertFalse(config["anchors"]) + self.assertEqual(config["aborttimeout"], 99) self.assertEqual(config["recursionlevel"], 1) - self.assertEqual(config["warningregex"].pattern, "Oracle DB Error") - self.assertEqual(config["warnsizebytes"], 2000) self.assertEqual(config["nntpserver"], "example.org") - self.assertTrue(config["sendcookies"]) - self.assertTrue(config["storecookies"]) self.assertEqual(config["cookiefile"], "blablabla") self.assertEqual(config["useragent"], "Example/0.0") - self.assertEqual(config["wait"], 99) self.assertEqual(config["debugmemory"], 1) self.assertEqual(config["localwebroot"], "foo") self.assertEqual(config["sslverify"], "/path/to/cacerts.crt") - self.assertEqual(config["warnsslcertdaysvalid"], 99) + self.assertEqual(config["maxnumurls"], 1000) + self.assertEqual(config["maxrunseconds"], 1) + self.assertEqual(config["maxfilesizeparse"], 100) + self.assertEqual(config["maxfilesizedownload"], 100) # filtering section patterns = [x["pattern"].pattern for x in config["externlinks"]] for prefix in ("ignore_", "nofollow_"): for suffix in ("1", "2"): key = "%simadoofus%s" % (prefix, suffix) self.assertTrue(key in patterns) - for key in ("url-unicode-domain", "anchor-not-found"): + for key in ("url-unicode-domain",): self.assertTrue(key in config["ignorewarnings"]) + self.assertTrue(config["checkextern"]) # authentication section patterns = [x["pattern"].pattern for x in config["authentication"]] for suffix in ("1", "2"): @@ -81,10 +82,12 @@ def test_confparse (self): self.assertFalse(config["status"]) self.assertTrue(isinstance(config["logger"], linkcheck.logger.customxml.CustomXMLLogger)) self.assertTrue(config["verbose"]) - self.assertTrue(config["complete"]) self.assertTrue(config["warnings"]) self.assertFalse(config["quiet"]) self.assertEqual(len(config["fileoutput"]), 8) + # plugins + for plugin in ("AnchorCheck", "CssSyntaxCheck", "HtmlSyntaxCheck", "LocationInfo", "RegexCheck", "SslCertificateCheck", "VirusCheck"): + self.assertTrue(plugin in config["enabledplugins"]) # text logger section self.assertEqual(config["text"]["filename"], "imadoofus.txt") self.assertEqual(config["text"]["parts"], ["realurl"]) diff --git a/tests/test_clamav.py b/tests/test_clamav.py index 05cd8f5b0..9fe339c87 100644 --- a/tests/test_clamav.py +++ b/tests/test_clamav.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2006-2012 Bastian Kleineidam +# Copyright (C) 2006-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,15 +19,18 @@ """ import unittest from tests import need_clamav -from linkcheck import clamav +from linkcheck.plugins import viruscheck as clamav class TestClamav (unittest.TestCase): + def setUp(self): + self.clamav_conf = clamav.get_clamav_conf("/etc/clamav/clamd.conf") + @need_clamav def testClean (self): data = "" - infected, errors = clamav.scan(data) + infected, errors = clamav.scan(data, self.clamav_conf) self.assertFalse(infected) self.assertFalse(errors) @@ -47,7 +50,7 @@ def testInfected (self): 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA' \ 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAW0NMQU1BVl' \ '0AEAAAABAAAAACAAABAAAAAAAAAAAAAAAAAAAAAAAAwA==">t' - infected, errors = clamav.scan(data) + infected, errors = clamav.scan(data, self.clamav_conf) msg = 'stream: ClamAV-Test-File(2d1206194bd704385e37000be6113f73:781) FOUND\n' self.assertTrue(msg in infected) self.assertFalse(errors) diff --git a/tests/test_cookies.py b/tests/test_cookies.py index 0cc204467..9091bb46c 100644 --- a/tests/test_cookies.py +++ b/tests/test_cookies.py @@ -25,237 +25,7 @@ class TestCookies (unittest.TestCase): """Test cookie routines.""" - def test_netscape_cookie1 (self): - data = ( - ("Foo", "Bar"), - ("Comment", "justatest"), - ("Max-Age", "1000"), - ("Path", "/"), - ("Version", "1"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.NetscapeCookie(value, scheme, host, path) - self.assertTrue(cookie.check_expired()) - self.assertTrue(cookie.is_valid_for("http", host, 80, "/")) - self.assertTrue(cookie.is_valid_for("https", host, 443, "/a")) - - def test_netscape_cookie2 (self): - data = ( - ("Foo", "Bar"), - ("Comment", "justatest"), - ("Max-Age", "0"), - ("Path", "/"), - ("Version", "1"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.NetscapeCookie(value, scheme, host, path) - self.assertTrue(cookie.is_expired()) - - def test_netscape_cookie3 (self): - # invalid port - data = ( - ("Foo", "Bar"), - ("Port", "hul,la"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - self.assertRaises(linkcheck.cookies.CookieError, - linkcheck.cookies.NetscapeCookie, value, scheme, host, path) - - def test_netscape_cookie4 (self): - data = ( - ("Foo", "Bar"), - ("Domain", "localhost"), - ("Port", "100,555,76"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.NetscapeCookie(value, scheme, host, path) - self.assertTrue(cookie.is_valid_for("http", host, 100, "/")) - - def test_netscape_cookie5 (self): - data = ( - ("Foo", "Bar"), - ("Domain", "example.org"), - ("Expires", "Wed, 12-Dec-2001 19:27:57 GMT"), - ("Path", "/"), - ) - # note: values are without quotes - value = "; ".join('%s=%s' % (key, value) for key, value in data) - scheme = "http" - host = "example.org" - path = "/" - cookie = linkcheck.cookies.NetscapeCookie(value, scheme, host, path) - self.assertTrue(cookie.is_expired()) - - def test_netscape_cookie6 (self): - data = ( - ("Foo", "Bar"), - ("Domain", "example.org"), - ("Path", "/"), - ) - # note: values are without quotes - value = "; ".join('%s=%s' % (key, value) for key, value in data) - scheme = "http" - host = "example.org" - path = "/" - cookie = linkcheck.cookies.NetscapeCookie(value, scheme, host, path) - self.assertTrue(cookie.is_valid_for("http", "example.org", 80, "/")) - self.assertTrue(cookie.is_valid_for("http", "www.example.org", 80, "/")) - self.assertFalse(cookie.is_valid_for("http", "www.b.example.org", 80, "/")) - - def test_netscape_cookie7 (self): - data1 = ( - ("Foo", "Bar"), - ("Domain", "example.org"), - ("Path", "/"), - ) - data2 = ( - ("FOO", "Baz"), - ("Domain", "example.org"), - ("Path", "/"), - ) - data3 = ( - ("FOOl", "Baz"), - ("Domain", "example.org"), - ("Path", "/"), - ) - # note: values are without quotes - value1 = "; ".join('%s=%s' % (key, value) for key, value in data1) - value2 = "; ".join('%s=%s' % (key, value) for key, value in data2) - value3 = "; ".join('%s=%s' % (key, value) for key, value in data3) - scheme = "http" - host = "example.org" - path = "/" - cookie1 = linkcheck.cookies.NetscapeCookie(value1, scheme, host, path) - cookie2 = linkcheck.cookies.NetscapeCookie(value2, scheme, host, path) - cookie3 = linkcheck.cookies.NetscapeCookie(value3, scheme, host, path) - self.assertEqual(cookie1, cookie2) - self.assertNotEqual(cookie1, cookie3) - self.assertNotEqual(cookie2, cookie3) - - def test_netscape_cookie8 (self): - value = "" - scheme = "http" - host = "localhost" - path = "/" - self.assertRaises(linkcheck.cookies.CookieError, - linkcheck.cookies.NetscapeCookie, value, scheme, host, path) - - def test_netscape_cookie9 (self): - # illegal expiration date - data = ( - ("Foo", "Bar"), - ("Domain", "example.org"), - ("Expires", "Thu, 08-Oct-3697739 18:36:07 GMT"), - ("Path", "/"), - ) - # note: values are without quotes - value = "; ".join('%s=%s' % (key, value) for key, value in data) - scheme = "http" - host = "example.org" - path = "/" - self.assertRaises(linkcheck.cookies.CookieError, - linkcheck.cookies.NetscapeCookie, value, scheme, host, path) - - def test_rfc_cookie1 (self): - data = ( - ("Foo", "Bar"), - ("Comment", "justatest"), - ("Max-Age", "1000"), - ("Path", "/"), - ("Version", "1"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.Rfc2965Cookie(value, scheme, host, path) - self.assertTrue(cookie.check_expired()) - self.assertTrue(cookie.is_valid_for("http", host, 80, "/")) - self.assertTrue(cookie.is_valid_for("https", host, 443, "/a")) - - def test_rfc_cookie2 (self): - data = ( - ("Foo", "Bar"), - ("Comment", "justatest"), - ("Max-Age", "0"), - ("Path", "/"), - ("Version", "1"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.Rfc2965Cookie(value, scheme, host, path) - self.assertTrue(cookie.is_expired()) - - def test_rfc_cookie3 (self): - # invalid port - data = ( - ("Foo", "Bar"), - ("Port", "hul,la"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - self.assertRaises(linkcheck.cookies.CookieError, - linkcheck.cookies.Rfc2965Cookie, value, scheme, host, path) - - def test_rfc_cookie4 (self): - data = ( - ("Foo", "Bar"), - ("Port", "100,555,76"), - ) - value = "; ".join('%s="%s"' % (key, value) for key, value in data) - scheme = "http" - host = "localhost" - path = "/" - cookie = linkcheck.cookies.Rfc2965Cookie(value, scheme, host, path) - self.assertTrue(cookie.is_valid_for("http", host, 100, "/")) - - def test_rfc_cookie5 (self): - data1 = ( - ("Foo", "Bar"), - ("Domain", "example.org"), - ("Path", "/"), - ) - data2 = ( - ("FOO", "Baz"), - ("Domain", "EXAMPLE.org"), - ("Path", "/"), - ) - data3 = ( - ("FOOl", "Baz"), - ("Domain", "EXAMPLE.org"), - ("Path", "/"), - ) - # note: values are without quotes - value1 = "; ".join('%s=%s' % (key, value) for key, value in data1) - value2 = "; ".join('%s=%s' % (key, value) for key, value in data2) - value3 = "; ".join('%s=%s' % (key, value) for key, value in data3) - scheme = "http" - host = "example.org" - path = "/" - cookie1 = linkcheck.cookies.Rfc2965Cookie(value1, scheme, host, path) - cookie2 = linkcheck.cookies.Rfc2965Cookie(value2, scheme, host, path) - cookie3 = linkcheck.cookies.Rfc2965Cookie(value3, scheme, host, path) - self.assertEqual(cookie1, cookie2) - self.assertNotEqual(cookie1, cookie3) - self.assertNotEqual(cookie2, cookie3) - - def test_cookie_parse1 (self): + def test_cookie_parse_multiple_headers (self): lines = [ 'Host: example.org', 'Path: /hello', @@ -263,33 +33,33 @@ def test_cookie_parse1 (self): 'Set-cookie: spam="egg"', ] from_headers = linkcheck.cookies.from_headers - headers, scheme, host, path = from_headers("\r\n".join(lines)) - self.assertEqual(scheme, "http") - self.assertEqual(host, "example.org") - self.assertEqual(path, "/hello") - self.assertEqual(len(headers.headers), 4) - - def test_cookie_parse2 (self): + cookies = from_headers("\r\n".join(lines)) + self.assertEqual(len(cookies), 2) + for cookie in cookies: + self.assertEqual(cookie.domain, "example.org") + self.assertEqual(cookie.path, "/hello") + self.assertEqual(cookies[0].name, 'ID') + self.assertEqual(cookies[0].value, 'smee') + self.assertEqual(cookies[1].name, 'spam') + self.assertEqual(cookies[1].value, 'egg') + + def test_cookie_parse_multiple_values (self): lines = [ - 'Scheme: https', 'Host: example.org', 'Set-cookie: baggage="elitist"; comment="hologram"', ] from_headers = linkcheck.cookies.from_headers - headers, scheme, host, path = from_headers("\r\n".join(lines)) - self.assertEqual(scheme, "https") - self.assertEqual(host, "example.org") - self.assertEqual(path, "/") - self.assertEqual(len(headers.headers), 3) - - def test_cookie_parse3 (self): - lines = [ - 'Scheme: https', - ] - from_headers = linkcheck.cookies.from_headers - self.assertRaises(ValueError, from_headers, "\r\n".join(lines)) - - def test_cookie_parse4 (self): + cookies = from_headers("\r\n".join(lines)) + self.assertEqual(len(cookies), 2) + for cookie in cookies: + self.assertEqual(cookie.domain, "example.org") + self.assertEqual(cookie.path, "/") + self.assertEqual(cookies[0].name, 'baggage') + self.assertEqual(cookies[0].value, 'elitist') + self.assertEqual(cookies[1].name, 'comment') + self.assertEqual(cookies[1].value, 'hologram') + + def test_cookie_parse_error (self): lines = [ ' Host: imaweevil.org', 'Set-cookie: baggage="elitist"; comment="hologram"', diff --git a/tests/test_linkchecker.py b/tests/test_linkchecker.py new file mode 100644 index 000000000..421545f56 --- /dev/null +++ b/tests/test_linkchecker.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2014 Bastian Kleineidam +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import unittest +import sys +from . import linkchecker_cmd, run_checked + + +def run_with_options(options, cmd=linkchecker_cmd): + """Run a command with given options.""" + run_checked([sys.executable, cmd] + options) + + +class TestLinkchecker (unittest.TestCase): + """Test the linkchecker commandline client.""" + + def test_linkchecker(self): + # test some single options + for option in ("-V", "--version", "-h", "--help", "--list-plugins", "-Dall"): + run_with_options([option]) + # unknown option + self.assertRaises(OSError, run_with_options, ['--imadoofus']) diff --git a/tests/test_network.py b/tests/test_network.py index a2352539d..922095a2d 100644 --- a/tests/test_network.py +++ b/tests/test_network.py @@ -43,7 +43,7 @@ def test_iputils (self): host = "dinsdale.python.org" ips = iputil.resolve_host(host) self.assertTrue(len(ips) > 0) - obfuscated = iputil.obfuscate_ip(ips.pop()) + obfuscated = iputil.obfuscate_ip(ips[0]) self.assertTrue(iputil.is_obfuscated_ip(obfuscated)) hosts = iputil.lookup_ips([obfuscated]) self.assertTrue(host in hosts) diff --git a/tests/test_url.py b/tests/test_url.py index 0b3b03fb6..92a2e8a22 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -1,5 +1,5 @@ # -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2012 Bastian Kleineidam +# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -361,7 +361,7 @@ def test_norm_other (self): self.urlnormtest(url, nurl) # ldap url url = "ldap://[2001:db8::7]/c=GB?objectClass?one" - nurl = "ldap://%5B2001:db8::7%5D/c=GB%3FobjectClass%3Fone" + nurl = "ldap://%5B2001:db8::7%5D/c=GB?objectClass?one" self.urlnormtest(url, nurl) url = "tel:+1-816-555-1212" nurl = url