diff --git a/includes/class-feed.php b/includes/class-feed.php index 018939c9..a0899e9a 100644 --- a/includes/class-feed.php +++ b/includes/class-feed.php @@ -869,6 +869,8 @@ public function discover_available_feeds( $url ) { if ( isset( $available_feeds[ $link_url ]['parser'] ) || ! isset( $feed['parser'] ) ) { continue; } + } else { + $available_feeds[ $link_url ] = array(); } $available_feeds[ $link_url ] = array_merge( $available_feeds[ $link_url ], $feed ); $available_feeds[ $link_url ]['url'] = $link_url; diff --git a/libs/Mf2/Parser.php b/libs/Mf2/Parser.php index 33b2f04f..07e8b046 100644 --- a/libs/Mf2/Parser.php +++ b/libs/Mf2/Parser.php @@ -37,13 +37,13 @@ * } * * @param string|DOMDocument $input The HTML string or DOMDocument object to parse - * @param string $url The URL the input document was found at, for relative URL resolution - * @param bool $convertClassic whether or not to convert classic microformats + * @param string $url The URL the input document was found at, for relative URL resolution + * @param bool $convertClassic whether or not to convert classic microformats * @return array Canonical MF2 array structure */ -function parse( $input, $url = null, $convertClassic = true ) { - $parser = new Parser( $input, $url ); - return $parser->parse( $convertClassic ); +function parse($input, $url = null, $convertClassic = true) { + $parser = new Parser($input, $url); + return $parser->parse($convertClassic); } /** @@ -58,47 +58,42 @@ function parse( $input, $url = null, $convertClassic = true ) { * for the actual value. * * @param string $url The URL to fetch - * @param bool $convertClassic (optional, default true) whether or not to convert classic microformats - * @param array $curlInfo (optional) the results of curl_getinfo will be placed in this variable for debugging + * @param bool $convertClassic (optional, default true) whether or not to convert classic microformats + * @param &array $curlInfo (optional) the results of curl_getinfo will be placed in this variable for debugging * @return array|null canonical microformats2 array structure on success, null on failure */ -function fetch( $url, $convertClassic = true, &$curlInfo = null ) { - // Friends modification: use wp_safe_remote_get() instead of curl directly. - $response = wp_safe_remote_get( - $url, - array( - 'timeout' => 20, - 'redirection' => 5, - 'headers' => array( - 'Accept: text/html', - ), - ) - ); - if ( 200 !== wp_remote_retrieve_response_code( $response ) ) { - return null; - } - $html = wp_remote_retrieve_body( $response ); - $headers = wp_remote_retrieve_headers( $response ); - - if ( stripos( $headers['content-type'], 'html' ) === false ) { +function fetch($url, $convertClassic = true, &$curlInfo=null) { + $ch = curl_init(); + curl_setopt($ch, CURLOPT_URL, $url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($ch, CURLOPT_HEADER, 0); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); + curl_setopt($ch, CURLOPT_MAXREDIRS, 5); + curl_setopt($ch, CURLOPT_HTTPHEADER, array( + 'Accept: text/html' + )); + $html = curl_exec($ch); + $info = $curlInfo = curl_getinfo($ch); + curl_close($ch); + + if (strpos(strtolower($info['content_type']), 'html') === false) { // The content was not delivered as HTML, do not attempt to parse it. return null; } - // ensure the final URL is used to resolve relative URLs - $url = $response['http_response']->get_response_object()->url; - // end modification. - return parse( $html, $url, $convertClassic ); + # ensure the final URL is used to resolve relative URLs + $url = $info['url']; + + return parse($html, $url, $convertClassic); } /** * Unicode to HTML Entities - * * @param string $input String containing characters to convert into HTML entities * @return string */ -function unicodeToHtmlEntities( $input ) { - return \mb_convert_encoding( $input, 'HTML-ENTITIES', \mb_detect_encoding( $input ) ); +function unicodeToHtmlEntities($input) { + return mb_convert_encoding($input, 'HTML-ENTITIES', mb_detect_encoding($input)); } /** @@ -111,15 +106,15 @@ function unicodeToHtmlEntities( $input ) { * @param string $str * @return string */ -function collapseWhitespace( $str ) { - return preg_replace( '/[\s|\n]+/', ' ', $str ); +function collapseWhitespace($str) { + return preg_replace('/[\s|\n]+/', ' ', $str); } -function unicodeTrim( $str ) { +function unicodeTrim($str) { // this is cheating. TODO: find a better way if this causes any problems - $str = str_replace( \mb_convert_encoding( ' ', 'UTF-8', 'HTML-ENTITIES' ), ' ', $str ); - $str = preg_replace( '/^\s+/', '', $str ); - return preg_replace( '/\s+$/', '', $str ); + $str = str_replace(mb_convert_encoding(' ', 'UTF-8', 'HTML-ENTITIES'), ' ', $str); + $str = preg_replace('/^\s+/', '', $str); + return preg_replace('/\s+$/', '', $str); } /** @@ -132,17 +127,17 @@ function unicodeTrim( $str ) { * @param string $prefix The prefix to look for * @return string|array The prefixed name of the first microfomats class found or false */ -function mfNamesFromClass( $class, $prefix = 'h-' ) { - $class = str_replace( array( ' ', ' ', "\n" ), ' ', $class ); - $classes = explode( ' ', $class ); - $classes = preg_grep( '#^(h|p|u|dt|e)-([a-z0-9]+-)?[a-z]+(-[a-z]+)*$#', $classes ); +function mfNamesFromClass($class, $prefix='h-') { + $class = str_replace(array(' ', ' ', "\n"), ' ', $class); + $classes = explode(' ', $class); + $classes = preg_grep('#^(h|p|u|dt|e)-([a-z0-9]+-)?[a-z]+(-[a-z]+)*$#', $classes); $matches = array(); - foreach ( $classes as $classname ) { + foreach ($classes as $classname) { $compare_classname = ' ' . $classname; $compare_prefix = ' ' . $prefix; - if ( strstr( $compare_classname, $compare_prefix ) !== false && ( $compare_classname != $compare_prefix ) ) { - $matches[] = ( $prefix === 'h-' ) ? $classname : substr( $classname, strlen( $prefix ) ); + if (strstr($compare_classname, $compare_prefix) !== false && ($compare_classname != $compare_prefix)) { + $matches[] = ($prefix === 'h-') ? $classname : substr($classname, strlen($prefix)); } } @@ -151,12 +146,11 @@ function mfNamesFromClass( $class, $prefix = 'h-' ) { /** * Registered with the XPath object and used within XPaths for finding root elements. - * * @param string $class * @return bool */ -function classHasMf2RootClassname( $class ) { - return count( mfNamesFromClass( $class, 'h-' ) ) > 0; +function classHasMf2RootClassname($class) { + return count(mfNamesFromClass($class, 'h-')) > 0; } /** @@ -168,19 +162,19 @@ function classHasMf2RootClassname( $class ) { * @param string $class * @return array */ -function nestedMfPropertyNamesFromClass( $class ) { - $prefixes = array( 'p-', 'u-', 'dt-', 'e-' ); +function nestedMfPropertyNamesFromClass($class) { + $prefixes = array('p-', 'u-', 'dt-', 'e-'); $propertyNames = array(); - foreach ( $prefixes as $prefix ) { - $classes = mfNamesFromClass( $class, $prefix ); - foreach ( $classes as $property ) { - $propertyNames[ $property ][] = $prefix; + foreach ($prefixes as $prefix) { + $classes = mfNamesFromClass($class, $prefix); + foreach ($classes as $property) { + $propertyNames[$property][] = $prefix; } } - foreach ( $propertyNames as $property => $prefixes ) { - $propertyNames[ $property ] = array_unique( $prefixes ); + foreach ($propertyNames as $property => $prefixes) { + $propertyNames[$property] = array_unique($prefixes); } return $propertyNames; @@ -190,61 +184,61 @@ function nestedMfPropertyNamesFromClass( $class ) { * Wraps mfNamesFromClass to handle an element as input (common) * * @param DOMElement $e The element to get the classname for - * @param string $prefix The prefix to look for + * @param string $prefix The prefix to look for * @return mixed See return value of mf2\Parser::mfNameFromClass() */ -function mfNamesFromElement( \DOMElement $e, $prefix = 'h-' ) { - $class = $e->getAttribute( 'class' ); - return mfNamesFromClass( $class, $prefix ); +function mfNamesFromElement(\DOMElement $e, $prefix = 'h-') { + $class = $e->getAttribute('class'); + return mfNamesFromClass($class, $prefix); } /** * Wraps nestedMfPropertyNamesFromClass to handle an element as input */ -function nestedMfPropertyNamesFromElement( \DOMElement $e ) { - $class = $e->getAttribute( 'class' ); - return nestedMfPropertyNamesFromClass( $class ); +function nestedMfPropertyNamesFromElement(\DOMElement $e) { + $class = $e->getAttribute('class'); + return nestedMfPropertyNamesFromClass($class); } /** * Converts various time formats to HH:MM - * * @param string $time The time to convert * @return string */ -function convertTimeFormat( $time ) { +function convertTimeFormat($time) { $hh = $mm = $ss = ''; - preg_match( '/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches ); + preg_match('/(\d{1,2}):?(\d{2})?:?(\d{2})?(a\.?m\.?|p\.?m\.?)?/i', $time, $matches); // If no am/pm is specified: - if ( empty( $matches[4] ) ) { + if (empty($matches[4])) { return $time; } else { // Otherwise, am/pm is specified. - $meridiem = strtolower( str_replace( '.', '', $matches[4] ) ); + $meridiem = strtolower(str_replace('.', '', $matches[4])); // Hours. $hh = $matches[1]; // Add 12 to hours if pm applies. - if ( $meridiem == 'pm' && ( $hh < 12 ) ) { + if ($meridiem == 'pm' && ($hh < 12)) { $hh += 12; } - $hh = str_pad( $hh, 2, '0', STR_PAD_LEFT ); + $hh = str_pad($hh, 2, '0', STR_PAD_LEFT); // Minutes. - $mm = ( empty( $matches[2] ) ) ? '00' : $matches[2]; + $mm = (empty($matches[2]) ) ? '00' : $matches[2]; // Seconds, only if supplied. - if ( ! empty( $matches[3] ) ) { + if (!empty($matches[3])) { $ss = $matches[3]; } - if ( empty( $ss ) ) { - return sprintf( '%s:%s', $hh, $mm ); - } else { - return sprintf( '%s:%s:%s', $hh, $mm, $ss ); + if (empty($ss)) { + return sprintf('%s:%s', $hh, $mm); + } + else { + return sprintf('%s:%s:%s', $hh, $mm, $ss); } } } @@ -253,18 +247,17 @@ function convertTimeFormat( $time ) { * Normalize an ordinal date to YYYY-MM-DD * This function should only be called after validating the $dtValue * matches regex \d{4}-\d{2} - * * @param string $dtValue * @return string */ -function normalizeOrdinalDate( $dtValue ) { - list($year, $day) = explode( '-', $dtValue, 2 ); - $day = intval( $day ); - if ( $day < 367 && $day > 0 ) { - $date = \DateTime::createFromFormat( 'Y-z', $dtValue ); - $date->modify( '-1 day' ); // 'z' format is zero-based so need to adjust - if ( $date->format( 'Y' ) === $year ) { - return $date->format( 'Y-m-d' ); +function normalizeOrdinalDate($dtValue) { + list($year, $day) = explode('-', $dtValue, 2); + $day = intval($day); + if ($day < 367 && $day > 0) { + $date = \DateTime::createFromFormat('Y-z', $dtValue); + $date->modify('-1 day'); # 'z' format is zero-based so need to adjust + if ($date->format('Y') === $year) { + return $date->format('Y-m-d'); } } return ''; @@ -272,54 +265,44 @@ function normalizeOrdinalDate( $dtValue ) { /** * If a date value has a timezone offset, normalize it. - * * @param string $dtValue * @return string isolated, normalized TZ offset for implied TZ for other dt- properties */ -function normalizeTimezoneOffset( &$dtValue ) { - preg_match( '/Z|[+-]\d{1,2}:?(\d{2})?$/i', $dtValue, $matches ); +function normalizeTimezoneOffset(&$dtValue) { + preg_match('/Z|[+-]\d{1,2}:?(\d{2})?$/i', $dtValue, $matches); - if ( empty( $matches ) ) { + if (empty($matches)) { return null; } $timezoneOffset = null; if ( $matches[0] != 'Z' ) { - $timezoneString = str_replace( ':', '', $matches[0] ); - $plus_minus = substr( $timezoneString, 0, 1 ); - $timezoneOffset = substr( $timezoneString, 1 ); - if ( strlen( $timezoneOffset ) <= 2 ) { + $timezoneString = str_replace(':', '', $matches[0]); + $plus_minus = substr($timezoneString, 0, 1); + $timezoneOffset = substr($timezoneString, 1); + if ( strlen($timezoneOffset) <= 2 ) { $timezoneOffset .= '00'; } - $timezoneOffset = str_pad( $timezoneOffset, 4, 0, STR_PAD_LEFT ); + $timezoneOffset = str_pad($timezoneOffset, 4, 0, STR_PAD_LEFT); $timezoneOffset = $plus_minus . $timezoneOffset; - $dtValue = preg_replace( '/Z?[+-]\d{1,2}:?(\d{2})?$/i', $timezoneOffset, $dtValue ); + $dtValue = preg_replace('/Z?[+-]\d{1,2}:?(\d{2})?$/i', $timezoneOffset, $dtValue); } return $timezoneOffset; } -function applySrcsetUrlTransformation( $srcset, $transformation ) { - return implode( - ', ', - array_filter( - array_map( - function ( $srcsetPart ) use ( $transformation ) { - $parts = explode( " \t\n\r\0\x0B", trim( $srcsetPart ), 2 ); - $parts[0] = rtrim( $parts[0] ); +function applySrcsetUrlTransformation($srcset, $transformation) { + return implode(', ', array_filter(array_map(function ($srcsetPart) use ($transformation) { + $parts = explode(" \t\n\r\0\x0B", trim($srcsetPart), 2); + $parts[0] = rtrim($parts[0]); - if ( empty( $parts[0] ) ) { - return false; } + if (empty($parts[0])) { return false; } - $parts[0] = call_user_func( $transformation, $parts[0] ); + $parts[0] = call_user_func($transformation, $parts[0]); - return $parts[0] . ( empty( $parts[1] ) ? '' : ' ' . $parts[1] ); - }, - explode( ',', trim( $srcset ) ) - ) - ) - ); + return $parts[0] . (empty($parts[1]) ? '' : ' ' . $parts[1]); + }, explode(',', trim($srcset))))); } /** @@ -359,14 +342,12 @@ class Parser { /** * Elements upgraded to mf2 during backcompat - * * @var SplObjectStorage */ protected $upgraded; /** * Whether to convert classic microformats - * * @var bool */ public $convertClassic; @@ -375,44 +356,48 @@ class Parser { * Constructor * * @param DOMDocument|string $input The data to parse. A string of HTML or a DOMDocument - * @param string $url The URL of the parsed document, for relative URL resolution - * @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON. + * @param string $url The URL of the parsed document, for relative URL resolution + * @param boolean $jsonMode Whether or not to use a stdClass instance for an empty `rels` dictionary. This breaks PHP looping over rels, but allows the output to be correctly serialized as JSON. */ - public function __construct( $input, $url = null, $jsonMode = false ) { + public function __construct($input, $url = null, $jsonMode = false) { $emptyDocDefault = ''; - libxml_use_internal_errors( true ); - set_error_handler( '__return_null' ); - if ( is_string( $input ) ) { - if ( empty( $input ) ) { + libxml_use_internal_errors(true); + if (is_string($input)) { + if (empty($input)) { $input = $emptyDocDefault; } - if ( class_exists( 'Masterminds\\HTML5' ) ) { - $doc = new \Masterminds\HTML5( array( 'disable_html_ns' => true ) ); - $doc = $doc->loadHTML( $input ); + if (class_exists('Masterminds\\HTML5')) { + $doc = new \Masterminds\HTML5(array('disable_html_ns' => true)); + $doc = $doc->loadHTML($input); } else { $doc = new DOMDocument(); - $doc->loadHTML( unicodeToHtmlEntities( $input ), \LIBXML_NOWARNING ); + @$doc->loadHTML(unicodeToHtmlEntities($input), \LIBXML_NOWARNING); } - } elseif ( is_a( $input, 'DOMDocument' ) ) { + } elseif (is_a($input, 'DOMDocument')) { $doc = clone $input; } else { $doc = new DOMDocument(); - @$doc->loadHTML( $emptyDocDefault ); + @$doc->loadHTML($emptyDocDefault); } - restore_error_handler(); // Create an XPath object and allow some PHP functions to be used within XPath queries. - $this->xpath = new DOMXPath( $doc ); - $this->xpath->registerNamespace( 'php', 'http://php.net/xpath' ); - $this->xpath->registerPhpFunctions( '\\Friends\\Mf2\\classHasMf2RootClassname' ); + $this->xpath = new DOMXPath($doc); + $this->xpath->registerNamespace('php', 'http://php.net/xpath'); + $this->xpath->registerPhpFunctions('\\Friends\\Mf2\\classHasMf2RootClassname'); $baseurl = $url; - foreach ( $this->xpath->query( '//base[@href]' ) as $base ) { - $baseElementUrl = $base->getAttribute( 'href' ); + foreach ($this->xpath->query('//base[@href]') as $base) { + $baseElementUrl = $base->getAttribute('href'); - if ( parse_url( $baseElementUrl, PHP_URL_SCHEME ) === null ) { - $baseurl = resolveUrl( $url, $baseElementUrl ); + if (parse_url($baseElementUrl, PHP_URL_SCHEME) === null) { + /* The base element URL is relative to the document URL. + * + * :/ + * + * Perhaps the author was high? */ + + $baseurl = resolveUrl($url, $baseElementUrl); } else { $baseurl = $baseElementUrl; } @@ -420,8 +405,8 @@ public function __construct( $input, $url = null, $jsonMode = false ) { } // Ignore