From fe18f7b47f5a59509eff6c54264b8a680fdda9d3 Mon Sep 17 00:00:00 2001 From: gitressa Date: Mon, 19 Oct 2020 12:34:40 +0200 Subject: [PATCH] Update CrawlCommand.php Increases Max header size for HTTP client (in bytes), to allow crawling of pages with a lot of content in the Cache-Tags field, which some CMS'es can have, due to complex caching rules. --- lib/Console/Command/CrawlCommand.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Console/Command/CrawlCommand.php b/lib/Console/Command/CrawlCommand.php index 9b38ade..a5d9f15 100644 --- a/lib/Console/Command/CrawlCommand.php +++ b/lib/Console/Command/CrawlCommand.php @@ -98,7 +98,7 @@ protected function configure() $this->addOption(self::OPT_PUBLISHER, 'p', InputOption::VALUE_REQUIRED, 'Publisher to use: `json` or `csv`', 'json'); $this->addOption(self::OPT_DISPLAY_BUFSIZE, null, InputOption::VALUE_REQUIRED, 'Size of report buffer to display', 5); $this->addOption(self::OPT_CLIENT_MAX_BODY_SIZE, null, InputOption::VALUE_REQUIRED, 'Max body size for HTTP client (in bytes)', 10485760); - $this->addOption(self::OPT_CLIENT_MAX_HEADER_SIZE, null, InputOption::VALUE_REQUIRED, 'Max header size for HTTP client (in bytes)', 8192); + $this->addOption(self::OPT_CLIENT_MAX_HEADER_SIZE, null, InputOption::VALUE_REQUIRED, 'Max header size for HTTP client (in bytes)', 16384); $this->addOption(self::OPT_CLIENT_MAX_REDIRECTS, null, InputOption::VALUE_REQUIRED, 'Maximum number of redirects to follow', 5); $this->addOption(self::OPT_CLIENT_MAX_TIMEOUT, null, InputOption::VALUE_REQUIRED, 'Number of milliseconds to wait for URL', 15000); $this->addOption(self::OPT_CLIENT_SECURITY_LEVEL, null, InputOption::VALUE_REQUIRED, 'TLS Security level, see https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_security_level.html');