Full-Text RSS 3.7

This commit is contained in:
FiveFilters.org 2019-04-04 23:23:27 +02:00
parent 9658f6a00b
commit d3009c43e3
62 changed files with 1979 additions and 656 deletions

View File

@ -2,6 +2,26 @@ FiveFilters.org: Full-Text RSS
http://fivefilters.org/content-only/ http://fivefilters.org/content-only/
CHANGELOG CHANGELOG
------------------------------------ ------------------------------------
3.7 (2017-02-12)
- Request HTML5 output using HTML5-PHP - new config option $options->html5_output and new request parameter &content=html5
- Improve support for lazy-loading images
- Feed preview now displays RTL content correctly (added dir='auto' to feed.xsl)
- New request parameter images=0 to remove all images from extracted content
- Open Graph and Twitter card metadata now returned in JSON output (no longer in RSS output)
- Metadata now returned in extract.php even if article extraction fails
- Additional data returned in extract.php for developers: 'domain', 'word_count'
- HTML5-PHP library updated
- SimplePie library updated (fixes PHP 7.1 issue)
- New VPS Puppet script (ubuntu-16.04.pp) - installs PHP 7 and Gumbo PHP extension for faster HTML5 parsing
- Bug fix: Language detection now works correctly with PHP 7
- Bug fix: Take base href URL into account when following next_page/single_page links (thanks Lukas!)
- Bug fix: VPS Puppet script installs new version of PECL HTTP extension that fixes problem when requesting punycode encoded domains
- Site config files updated for better extraction
- Compatibility test file updated (will tell you if Gumbo PHP will be used)
- Tidy won't be used to repair HTML if using an HTML5 parser (unless explicitly requested in site config file - tidy: yes)
- New config option $options->blocked_message - set what a user will see when a URL is blocked by Full-Text RSS
- Other fixes/improvements
3.6 (2016-02-21) 3.6 (2016-02-21)
- Insert og:image (if we find one) at the top of the article when no images have been extracted - Insert og:image (if we find one) at the top of the article when no images have been extracted
- Additional lazy image load handling - helps preserve more images designed for JS-enabled browsers - Additional lazy image load handling - helps preserve more images designed for JS-enabled browsers

View File

@ -59,6 +59,20 @@ $options->max_entries = 10;
// from the output. // from the output.
$options->content = 'user'; $options->content = 'user';
// HTML5 output
// ----------------------
// By default, Full-Text RSS uses libxml to convert the parsed DOM tree back into HTML.
// If this is enabled, we'll use HTML5-PHP to produce the HTML. This will be a little
// slower, but might produce better results, adhering to the HTML5 spec.
//
// Note: in a future release we might make HTML5 output the default.
//
// Possible values...
// HTML5 (slower): true
// libxml (faster): false
// libxml unless user overrides (&content=html5): 'user' (default)
$options->html5_output = 'user';
// Excerpts // Excerpts
// ---------------------- // ----------------------
// By default Full-Text RSS does not include excerpts in the output. // By default Full-Text RSS does not include excerpts in the output.
@ -242,6 +256,16 @@ $options->allowed_urls = array();
// Note: for feeds, this option applies to both feed URLs and item URLs within those feeds. // Note: for feeds, this option applies to both feed URLs and item URLs within those feeds.
$options->blocked_urls = array(); $options->blocked_urls = array();
// Blocked message
// -----------------------
// If a request is blocked outright because of the two rules above, this is the message
// that is shown. Please note:
// * If the input URL is a feed and it's not blocked, feed items that are blocked will
// be skipped, and this message will not be shown.
// * If the input URL itself is blocked (feed or not), we will output this message instead
// of producing a feed.
$options->blocked_message = '<strong>URL blocked</strong>';
// Key holder(s) only? // Key holder(s) only?
// ---------------------- // ----------------------
// Set this to true if you want to restrict access only to // Set this to true if you want to restrict access only to
@ -313,7 +337,7 @@ $options->xss_filter = 'user';
// Use effective URL in place of item URL // Use effective URL in place of item URL
// ---------------------- // ----------------------
// When we extract content for feed items, we often end up at a different URL than the // When we extract content for feed items, we often end up at a different URL than the
// one in the original feed. This is often a result of URL shorteners being used or // one in the original feed. This is often a result of URL shorteners or
// tracking services being used by the feed publisher. We include the final // tracking services being used by the feed publisher. We include the final
// (effective) URL we reached to get the content inside the dc:identifier field. // (effective) URL we reached to get the content inside the dc:identifier field.
// If you enable this, we'll also use this URL in place of the original item URL // If you enable this, we'll also use this URL in place of the original item URL
@ -500,7 +524,7 @@ $options->cache_cleanup = 100;
/// DO NOT CHANGE ANYTHING BELOW THIS /////////// /// DO NOT CHANGE ANYTHING BELOW THIS ///////////
///////////////////////////////////////////////// /////////////////////////////////////////////////
if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.6'); if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.7');
if (basename(__FILE__) == 'config.php') { if (basename(__FILE__) == 'config.php') {
if (file_exists(dirname(__FILE__).'/custom_config.php')) { if (file_exists(dirname(__FILE__).'/custom_config.php')) {

View File

@ -10,13 +10,13 @@
<link rel="stylesheet" type="text/css" href="css/feed.css" /> <link rel="stylesheet" type="text/css" href="css/feed.css" />
</head> </head>
<body> <body>
<div id="explanation"> <div id="explanation" dir="auto">
<h1><xsl:value-of select="$title"/> <span class="small"> (full-text feed)</span></h1> <h1><xsl:value-of select="$title"/> <span class="small"> (full-text feed)</span></h1>
<p>You are viewing an auto-generated full-text <acronym title="Really Simple Syndication">RSS</acronym> feed. RSS feeds allow you to stay up to date with the latest news and features you want from websites.<br /><a href="{$subscribe}">Subscribe to this feed.</a></p> <p>You are viewing an auto-generated full-text <acronym title="Really Simple Syndication">RSS</acronym> feed. RSS feeds allow you to stay up to date with the latest news and features you want from websites.<br /><a href="{$subscribe}">Subscribe to this feed.</a></p>
<p>Below is the latest content available from this feed.</p> <p>Below is the latest content available from this feed.</p>
</div> </div>
<div id="content"> <div id="content" dir="auto">
<ul> <ul>
<xsl:for-each select="rss/channel/item"> <xsl:for-each select="rss/channel/item">
<div class="article"> <div class="article">

View File

@ -50,8 +50,12 @@ $_POST['accept'] = 'html';
$_POST['format'] = 'json'; $_POST['format'] = 'json';
// Enable excerpts // Enable excerpts
$_POST['summary'] = '1'; $_POST['summary'] = '1';
// Guess language if it's not already marked up
$_POST['lang'] = '2';
// Don't produce result if extraction fails // Don't produce result if extraction fails
$_POST['exc'] = '1'; // Additional meta elements might still be useful (e.g. og/twitter elements),
// so we're commeting this out from FTR 3.7
//$_POST['exc'] = '1';
// Enable XSS filtering (unless explicitly disabled) // Enable XSS filtering (unless explicitly disabled)
if (isset($_POST['xss']) && $_POST['xss'] !== '0') { if (isset($_POST['xss']) && $_POST['xss'] !== '0') {
$_POST['xss'] = '1'; $_POST['xss'] = '1';

View File

@ -16,7 +16,7 @@ SimplePie.org. We have kept most of their checks intact as we use SimplePie in o
http://github.com/simplepie/simplepie/tree/master/compatibility_test/ http://github.com/simplepie/simplepie/tree/master/compatibility_test/
*/ */
$app_name = 'Full-Text RSS 3.6'; $app_name = 'Full-Text RSS 3.7';
// Full-Text RSS is not yet compatible with HHVM, that's why we check for it with HHVM_VERSION. // Full-Text RSS is not yet compatible with HHVM, that's why we check for it with HHVM_VERSION.
//$php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.2.0', '>=') && !defined('HHVM_VERSION')); //$php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.2.0', '>=') && !defined('HHVM_VERSION'));
@ -31,6 +31,7 @@ $curl_ok = function_exists('curl_exec');
$parallel_ok = ((extension_loaded('http') && class_exists('http\Client\Request')) || ($curl_ok && function_exists('curl_multi_init'))); $parallel_ok = ((extension_loaded('http') && class_exists('http\Client\Request')) || ($curl_ok && function_exists('curl_multi_init')));
$allow_url_fopen_ok = (bool)ini_get('allow_url_fopen'); $allow_url_fopen_ok = (bool)ini_get('allow_url_fopen');
$filter_ok = extension_loaded('filter'); $filter_ok = extension_loaded('filter');
$gumbo_ok = class_exists('Layershifter\Gumbo\Parser');
if (extension_loaded('xmlreader')) { if (extension_loaded('xmlreader')) {
$xml_ok = true; $xml_ok = true;
@ -376,15 +377,16 @@ div.chunk {
?> ?>
<h4>HTML parser</h4> <h4>HTML parser</h4>
<p>Full-Text RSS uses the fast libxml parser (the default PHP parser) but it can also make use of HTML5-PHP (an HTML5 parser written in PHP) if your version of PHP supports it. The latter might produce better results for some sites, especially if Tidy is not available on your server, however, it is slower than libxml.</p> <p><?php echo $app_name; ?> uses the fast libxml parser (the default PHP parser) but it will automatically make use of Gumbo (a fast HTML5 parser) if the <a href="https://github.com/layershifter/gumbo-php">Gumbo PHP</a> extension is installed. Alternatively, HTML5-PHP (an HTML5 parser written in PHP) can be used by passing &amp;parser=html5 as a parameter. The latter might produce better results than libxml for some sites, but is a little slower.</p>
<?php <?php
if (version_compare(PHP_VERSION, '5.3.0') >= 0) { if ($gumbo_ok) {
echo '<p class="highlight"><strong>HTML5-PHP</strong> can be used on this server.</p>'; echo '<p class="highlight"><strong>Gumbo PHP</strong> will be used on this server.</p>';
} else { } else {
echo '<p class="highlight">You need at least PHP 5.3 to be able to use HTML5-PHP.</p>'; echo '<p class="highlight">libxml will be used by default, unless HTML5 parsing is requested.</p>';
} }
?> ?>
<!--
<h4>Language detection</h4> <h4>Language detection</h4>
<p>Full-Text RSS can detect the language of each article processed. This occurs using <a href="http://pear.php.net/package/Text_LanguageDetect">Text_LanguageDetect</a> or <a href="https://github.com/lstrojny/php-cld">PHP-CLD</a> (if available).</p> <p>Full-Text RSS can detect the language of each article processed. This occurs using <a href="http://pear.php.net/package/Text_LanguageDetect">Text_LanguageDetect</a> or <a href="https://github.com/lstrojny/php-cld">PHP-CLD</a> (if available).</p>
<?php <?php
@ -394,7 +396,7 @@ div.chunk {
echo '<p class="highlight"><strong>Text_LanguageDetect</strong> will be used on this server.</p>'; echo '<p class="highlight"><strong>Text_LanguageDetect</strong> will be used on this server.</p>';
} }
?> ?>
-->
<h4>Automatic site config updates</h4> <h4>Automatic site config updates</h4>
<p>Full-Text RSS can be configured to update its site config files (which determine how content should be extracted for certain sites) by downloading the latest set from our GitHub repository. This functionaility is not required, and can be done manually. To configure this to occur automatically, you will need zip support enabled in PHP - we make use of the ZipArchive class.</p> <p>Full-Text RSS can be configured to update its site config files (which determine how content should be extracted for certain sites) by downloading the latest set from our GitHub repository. This functionaility is not required, and can be done manually. To configure this to occur automatically, you will need zip support enabled in PHP - we make use of the ZipArchive class.</p>
<?php <?php

File diff suppressed because one or more lines are too long

View File

@ -5,10 +5,10 @@
* Uses patterns specified in site config files and auto detection (hNews/PHP Readability) * Uses patterns specified in site config files and auto detection (hNews/PHP Readability)
* to extract content from HTML files. * to extract content from HTML files.
* *
* @version 1.2 * @version 1.3
* @date 2016-02-21 * @date 2017-02-12
* @author Keyvan Minoukadeh * @author Keyvan Minoukadeh
* @copyright 2016 Keyvan Minoukadeh * @copyright 2017 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/ */
@ -43,10 +43,12 @@ class ContentExtractor
protected $success = false; protected $success = false;
protected $nextPageUrl; protected $nextPageUrl;
protected $opengraph = array(); protected $opengraph = array();
protected $twitterCard = array();
public $allowedParsers = array('libxml', 'html5php'); public $allowedParsers = array('libxml', 'html5php');
public $defaultParser = 'libxml'; public $defaultParser = 'libxml';
public $parserOverride = null; public $parserOverride = null;
public $fingerprints = array(); public $fingerprints = array();
public $stripImages = false;
public $readability; public $readability;
public $debug = false; public $debug = false;
public $debugVerbose = false; public $debugVerbose = false;
@ -81,6 +83,7 @@ class ContentExtractor
$this->nextPageUrl = null; $this->nextPageUrl = null;
$this->success = false; $this->success = false;
$this->opengraph = array(); $this->opengraph = array();
$this->twitterCard = array();
} }
public function findHostUsingFingerprints($html) { public function findHostUsingFingerprints($html) {
@ -188,22 +191,6 @@ class ContentExtractor
unset($_count); unset($_count);
} }
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
$html = $tidy->value;
}
unset($tidy);
}
// load and parse html // load and parse html
if ($this->parserOverride) { if ($this->parserOverride) {
// from querystring: &parser=xxx // from querystring: &parser=xxx
@ -218,6 +205,34 @@ class ContentExtractor
$this->debug("HTML parser $_parser not listed, using ".$this->defaultParser." instead"); $this->debug("HTML parser $_parser not listed, using ".$this->defaultParser." instead");
$_parser = $this->defaultParser; $_parser = $this->defaultParser;
} }
// Full-Text RSS 3.7...
if (class_exists('Layershifter\Gumbo\Parser')) {
$this->debug("Gumbo PHP extension will be used for HTML parsing");
$_parser = 'gumbo'; // fast HTML5 parser
}
// use tidy (if it exists)?
// This fixes problems with some sites which would otherwise
// trouble DOMDocument's HTML parsing. (Although sometimes it
// makes matters worse, which is why you can override it in site config files.)
$tidied = false;
if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) {
// if we're using HTML5 parser and no explicit tidy declaration in site config file
// we'll skip tidy
if (($_parser == 'gumbo' || $_parser == 'html5php') && ($this->config->tidy === null)) {
// No Tidy
} else {
$this->debug('Using Tidy');
$tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8');
if (tidy_clean_repair($tidy)) {
$original_html = $html;
$tidied = true;
$html = $tidy->value;
}
unset($tidy);
}
}
$this->debug("Attempting to parse HTML with $_parser"); $this->debug("Attempting to parse HTML with $_parser");
$this->readability = new Readability($html, $url, $_parser); $this->readability = new Readability($html, $url, $_parser);
@ -321,7 +336,7 @@ class ContentExtractor
} }
} }
// try to open graph properties // try to get open graph elements
$elems = @$xpath->query("//head//meta[@property='og:title' or @property='og:type' or @property='og:url' or @property='og:image' or @property='og:description']", $this->readability->dom); $elems = @$xpath->query("//head//meta[@property='og:title' or @property='og:type' or @property='og:url' or @property='og:image' or @property='og:description']", $this->readability->dom);
// check for matches // check for matches
if ($elems && $elems->length > 0) { if ($elems && $elems->length > 0) {
@ -339,6 +354,25 @@ class ContentExtractor
unset($_prop, $_val); unset($_prop, $_val);
} }
// try to get Twitter Card elements
// TODO: add more, but multiple colons, e.g. twitter:site:id cause problems for RSS validation (namespace). For the others, maybe only return in JSON output
$elems = @$xpath->query("//head//meta[@name='twitter:card' or @name='twitter:site' or @name='twitter:creator' or @name='twitter:description' or @name='twitter:title' or @name='twitter:image']", $this->readability->dom);
// check for matches
if ($elems && $elems->length > 0) {
$this->debug('Extracting Twiter Card elements');
foreach ($elems as $elem) {
if ($elem->hasAttribute('content')) {
$_prop = strtolower($elem->getAttribute('name'));
$_val = $elem->getAttribute('content');
// currently one of each is returned, so we keep the first one
if (!isset($this->twitterCard[$_prop])) {
$this->twitterCard[$_prop] = $_val;
}
}
}
unset($_prop, $_val);
}
// try to get date // try to get date
foreach ($this->config->date as $pattern) { foreach ($this->config->date as $pattern) {
$elems = @$xpath->evaluate($pattern, $this->readability->dom); $elems = @$xpath->evaluate($pattern, $this->readability->dom);
@ -707,6 +741,21 @@ class ContentExtractor
} }
} }
// Find date in Open Graph meta element
// http://ogp.me/#no_vertical
if ($detect_date) {
$elems = @$xpath->query("//meta[@property='article:published_time' and @content]", $this->readability->dom);
if ($elems && $elems->length == 1) {
$this->date = strtotime(trim($elems->item(0)->getAttribute('content')));
if ($this->date) {
$this->debug('Date found (article:published_time): '.date('Y-m-d H:i:s', $this->date));
$detect_date = false;
} else {
$this->date = null;
}
}
}
// Find date in pubdate marked time element // Find date in pubdate marked time element
// For the same reason given above, we only use this // For the same reason given above, we only use this
// if there's exactly one element. // if there's exactly one element.
@ -765,16 +814,29 @@ class ContentExtractor
} }
} }
// prevent self-closing iframes // prevent self-closing iframes
if ($this->body->tagName === 'iframe') { // better to do this or to look for all elements not matching known void elements?
if (!$this->body->hasChildNodes()) { // Will requesting HTML5 output using HTML5-PHP fix this issue?
$this->body->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); $_dont_self_close = array('iframe', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6');
} foreach ($_dont_self_close as $_tagname) {
} else { if ($this->body->tagName === $_tagname) {
$elems = $this->body->getElementsByTagName('iframe'); if (!$this->body->hasChildNodes()) {
for ($i = $elems->length-1; $i >= 0; $i--) { if ($_tagname === 'iframe') {
$e = $elems->item($i); $this->body->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
if (!$e->hasChildNodes()) { } else {
$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); $this->body->appendChild($this->body->ownerDocument->createTextNode(''));
}
}
} else {
$elems = $this->body->getElementsByTagName($_tagname);
for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i);
if (!$e->hasChildNodes()) {
if ($_tagname === 'iframe') {
$e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]'));
} else {
$e->appendChild($this->body->ownerDocument->createTextNode(''));
}
}
} }
} }
} }
@ -782,7 +844,7 @@ class ContentExtractor
// the plugin replaces the src attribute to point to a 1x1 gif and puts the original src // the plugin replaces the src attribute to point to a 1x1 gif and puts the original src
// inside the data-lazy-src attribute. It also places the original image inside a noscript element // inside the data-lazy-src attribute. It also places the original image inside a noscript element
// next to the amended one. // next to the amended one.
$elems = @$xpath->query("//img[@data-lazy-src]", $this->body); $elems = @$xpath->query(".//img[@data-lazy-src]", $this->body);
for ($i = $elems->length-1; $i >= 0; $i--) { for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i); $e = $elems->item($i);
// let's see if we can grab image from noscript // let's see if we can grab image from noscript
@ -800,19 +862,31 @@ class ContentExtractor
// now let's deal with another lazy load technique. Example: // now let's deal with another lazy load technique. Example:
// <img src="data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==" class="lazyload" // <img src="data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==" class="lazyload"
// data-src="http://i68.tinypic.com/2jabu8.jpg" alt="Image and video hosting by TinyPic" border="0" /> // data-src="http://i68.tinypic.com/2jabu8.jpg" alt="Image and video hosting by TinyPic" border="0" />
$elems = @$xpath->query("//img[@data-src and contains(@class, 'lazyload') and contains(@src, 'data:image')]", $this->body); $elems = @$xpath->query(".//img[@data-src and (contains(@src, 'data:image') or contains(@src, '.gif'))]", $this->body);
for ($i = $elems->length-1; $i >= 0; $i--) { for ($i = $elems->length-1; $i >= 0; $i--) {
$e = $elems->item($i); $e = $elems->item($i);
$e->setAttribute('src', $e->getAttribute('data-src')); $e->setAttribute('src', $e->getAttribute('data-src'));
$e->removeAttribute('data-src'); $e->removeAttribute('data-src');
} }
// If there's an og:image, but we have no images in the article, let's place it at the beginning of the article. // Strip images?
if ($this->body->hasChildNodes() && isset($this->opengraph['og:image']) && substr($this->opengraph['og:image'], 0, 4) === 'http') { if ($this->stripImages && $this->body->hasChildNodes()) {
$elems = @$xpath->query("//img", $this->body); $elems = @$xpath->query("//figure | //img | //figcaption", $this->body);
if ($elems->length === 0) { // check for matches
$_new_elem = $this->body->ownerDocument->createDocumentFragment(); if ($elems && $elems->length > 0) {
@$_new_elem->appendXML('<div><img src="'.htmlspecialchars($this->opengraph['og:image']).'" class="ff-og-image-inserted" /></div>'); $this->debug('Stripping images: '.$elems->length.' img/figure/figcaption elements');
$this->body->insertBefore($_new_elem, $this->body->firstChild); for ($i=$elems->length-1; $i >= 0; $i--) {
@$elems->item($i)->parentNode->removeChild($elems->item($i));
}
}
} else {
// If there's an og:image, but we have no images in the article, let's place it at the beginning of the article.
if ($this->body->hasChildNodes() && isset($this->opengraph['og:image']) && substr($this->opengraph['og:image'], 0, 4) === 'http') {
$elems = @$xpath->query(".//img", $this->body);
if ($elems->length === 0) {
$_new_elem = $this->body->ownerDocument->createDocumentFragment();
@$_new_elem->appendXML('<div><img src="'.htmlspecialchars($this->opengraph['og:image']).'" class="ff-og-image-inserted" /></div>');
$this->body->insertBefore($_new_elem, $this->body->firstChild);
}
} }
} }
@ -850,6 +924,10 @@ class ContentExtractor
return $this->opengraph; return $this->opengraph;
} }
public function getTwitterCard() {
return $this->twitterCard;
}
public function isNativeAd() { public function isNativeAd() {
return $this->nativeAd; return $this->nativeAd;
} }

View File

@ -81,7 +81,7 @@ define('JSONP', 3);
*/ */
public function setChannelElementsFromArray($elementArray) public function setChannelElementsFromArray($elementArray)
{ {
if(! is_array($elementArray)) return; if(!is_array($elementArray)) return;
foreach ($elementArray as $elementName => $content) foreach ($elementArray as $elementName => $content)
{ {
$this->setChannelElement($elementName, $content); $this->setChannelElement($elementName, $content);
@ -131,19 +131,33 @@ define('JSONP', 3);
$simplejson->language = null; $simplejson->language = null;
$simplejson->url = null; $simplejson->url = null;
$simplejson->effective_url = null; $simplejson->effective_url = null;
$simplejson->domain = null;
$simplejson->word_count = null;
$simplejson->og_url = null; $simplejson->og_url = null;
$simplejson->og_title = null; $simplejson->og_title = null;
$simplejson->og_description = null; $simplejson->og_description = null;
$simplejson->og_image = null; $simplejson->og_image = null;
$simplejson->og_type = null; $simplejson->og_type = null;
$simplejson->twitter_card = null;
$simplejson->twitter_site = null;
$simplejson->twitter_creator = null;
$simplejson->twitter_image = null;
$simplejson->twitter_title = null;
$simplejson->twitter_description = null;
$simplejson->content = null; $simplejson->content = null;
// actual values // actual values
$simplejson->url = $jsonitem->link; $simplejson->url = $jsonitem->link;
$simplejson->effective_url = $jsonitem->dc_identifier; $simplejson->effective_url = $jsonitem->dc_identifier;
$simplejson->domain = strtolower(@parse_url($simplejson->effective_url, PHP_URL_HOST));
if (substr($simplejson->domain, 0, 4) === 'www.') {
$simplejson->domain = substr($simplejson->domain, 4);
}
if (isset($jsonitem->title)) $simplejson->title = $jsonitem->title; if (isset($jsonitem->title)) $simplejson->title = $jsonitem->title;
if (isset($jsonitem->dc_language)) $simplejson->language = $jsonitem->dc_language; if (isset($jsonitem->dc_language)) $simplejson->language = $jsonitem->dc_language;
if (isset($jsonitem->content_encoded)) { if (isset($jsonitem->content_encoded)) {
$simplejson->content = $jsonitem->content_encoded; $simplejson->content = $jsonitem->content_encoded;
// from http://php.net/manual/en/function.str-word-count.php#107363
$simplejson->word_count = count(preg_split('!\s+!', strip_tags($simplejson->content), -1, PREG_SPLIT_NO_EMPTY));
if (isset($jsonitem->description)) { if (isset($jsonitem->description)) {
$simplejson->excerpt = $jsonitem->description; $simplejson->excerpt = $jsonitem->description;
} }
@ -161,6 +175,12 @@ define('JSONP', 3);
if (isset($jsonitem->og_description)) $simplejson->og_description = $jsonitem->og_description; if (isset($jsonitem->og_description)) $simplejson->og_description = $jsonitem->og_description;
if (isset($jsonitem->og_image)) $simplejson->og_image = $jsonitem->og_image; if (isset($jsonitem->og_image)) $simplejson->og_image = $jsonitem->og_image;
if (isset($jsonitem->og_type)) $simplejson->og_type = $jsonitem->og_type; if (isset($jsonitem->og_type)) $simplejson->og_type = $jsonitem->og_type;
if (isset($jsonitem->twitter_card)) $simplejson->twitter_card = $jsonitem->twitter_card;
if (isset($jsonitem->twitter_site)) $simplejson->twitter_site = $jsonitem->twitter_site;
if (isset($jsonitem->twitter_creator)) $simplejson->twitter_creator = $jsonitem->twitter_creator;
if (isset($jsonitem->twitter_image)) $simplejson->twitter_image = $jsonitem->twitter_image;
if (isset($jsonitem->twitter_title)) $simplejson->twitter_title = $jsonitem->twitter_title;
if (isset($jsonitem->twitter_description)) $simplejson->twitter_description = $jsonitem->twitter_description;
echo json_encode($simplejson); echo json_encode($simplejson);
} }
} }
@ -337,7 +357,8 @@ define('JSONP', 3);
{ {
$out = '<?xml version="1.0" encoding="utf-8"?>'."\n"; $out = '<?xml version="1.0" encoding="utf-8"?>'."\n";
if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL; if ($this->xsl) $out .= '<?xml-stylesheet type="text/xsl" href="'.htmlspecialchars($this->xsl).'"?>' . PHP_EOL;
$out .= '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:og="http://ogp.me/ns#">' . PHP_EOL; //$out .= '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:og="http://ogp.me/ns#" xmlns:twitter="https://dev.twitter.com/cards/markup">' . PHP_EOL;
$out .= '<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/">' . PHP_EOL;
echo $out; echo $out;
} }
elseif ($this->version == JSON || $this->version == JSONP) elseif ($this->version == JSON || $this->version == JSONP)
@ -495,6 +516,9 @@ define('JSONP', 3);
foreach ($itemElements as $thisElement) { foreach ($itemElements as $thisElement) {
foreach ($thisElement as $instance) { foreach ($thisElement as $instance) {
if ($this->version == RSS2) { if ($this->version == RSS2) {
// Let's not include twitter and open graph elements in regular RSS output
// These are aimed more at developers, and so JSON is more appropriate
if (preg_match('/^(twitter|og):/i', $instance['name'])) continue;
echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);
} elseif ($this->version == JSON || $this->version == JSONP) { } elseif ($this->version == JSON || $this->version == JSONP) {
$_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); $_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']);

View File

@ -1,8 +1,8 @@
<?php <?php
/* /*
htmLawed 1.1.20, 9 June 2015 htmLawed 1.1.22, 5 March 2016
OOP code, 9 June 2015 OOP code, 27 February 2016
Copyright Santosh Patnaik Copyright Santosh Patnaik
Dual LGPL v3 and GPL v2+ license Dual LGPL v3 and GPL v2+ license
A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
@ -115,31 +115,39 @@ return $t;
// eof // eof
} }
public static function hl_attrval($t, $p){ public static function hl_attrval($a, $t, $p){
// check attr val against $S // check attr val against $S
$o = 1; $l = strlen($t); static $ma = array('accesskey', 'class', 'rel');
foreach($p as $k=>$v){ $s = in_array($a, $ma) ? ' ' : '';
switch($k){ $r = array();
case 'maxlen':if($l > $v){$o = 0;} $t = !empty($s) ? explode($s, $t) : array($t);
break; case 'minlen': if($l < $v){$o = 0;} foreach($t as $tk=>$tv){
break; case 'maxval': if((float)($t) > $v){$o = 0;} $o = 1; $l = strlen($tv);
break; case 'minval': if((float)($t) < $v){$o = 0;} foreach($p as $k=>$v){
break; case 'match': if(!preg_match($v, $t)){$o = 0;} switch($k){
break; case 'nomatch': if(preg_match($v, $t)){$o = 0;} case 'maxlen': if($l > $v){$o = 0;}
break; case 'oneof': break; case 'minlen': if($l < $v){$o = 0;}
$m = 0; break; case 'maxval': if((float)($tv) > $v){$o = 0;}
foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}} break; case 'minval': if((float)($tv) < $v){$o = 0;}
$o = $m; break; case 'match': if(!preg_match($v, $tv)){$o = 0;}
break; case 'noneof': break; case 'nomatch': if(preg_match($v, $tv)){$o = 0;}
$m = 1; break; case 'oneof':
foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}} $m = 0;
$o = $m; foreach(explode('|', $v) as $n){if($tv == $n){$m = 1; break;}}
break; default: $o = $m;
break; break; case 'noneof':
$m = 1;
foreach(explode('|', $v) as $n){if($tv == $n){$m = 0; break;}}
$o = $m;
break; default:
break;
}
if(!$o){break;}
} }
if(!$o){break;} if($o){$r[] = $tv;}
} }
return ($o ? $t : (isset($p['default']) ? $p['default'] : 0)); $r = implode($s, $r);
return (isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0));
// eof // eof
} }
@ -526,7 +534,7 @@ foreach($aA as $k=>$v){
} }
} }
} }
if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($v, $rl[$k])) === 0){continue;} if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($k, $v, $rl[$k])) === 0){continue;}
$a[$k] = str_replace('"', '&quot;', $v); $a[$k] = str_replace('"', '&quot;', $v);
} }
} }
@ -628,16 +636,15 @@ if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';}
static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%');
if($e == 'font'){ if($e == 'font'){
$a2 = ''; $a2 = '';
if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ while(preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)){
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; $a = str_replace($m[0], ' ', $a);
$a2 .= strtolower($m[2]) == 'color' ? (' color: '. str_replace('"', '\'', trim($m[4])). ';') : (isset($fs[($m = trim($m[4]))]) ? ($a2 .= ' font-size: '. str_replace('"', '\'', $fs[$m]). ';') : '');
} }
if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ while(preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) or preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)){
$a2 .= ' color: '. str_replace('"', '\'', trim($m[2])). ';'; $a = str_replace($m[0], ' ', $a);
$a2 .= ' font-family: '. str_replace('"', '\'', trim($m[3])). ';';
} }
if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){ $e = 'span'; return ltrim(str_replace('<', '', $a2));
$a2 .= ' font-size: '. str_replace('"', '\'', $fs[$m]). ';';
}
$e = 'span'; return ltrim($a2);
} }
if($t == 2){$e = 0; return 0;} if($t == 2){$e = 0; return 0;}
return ''; return '';
@ -701,7 +708,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array(
public static function hl_version(){ public static function hl_version(){
// rel // rel
return '1.1.20'; return '1.1.22';
// eof // eof
} }

View File

@ -166,9 +166,10 @@ class HTML5
public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array())
{ {
$this->errors = array(); $this->errors = array();
$events = new DOMTreeBuilder(false, array_merge($this->getOptions(), $options)); $options = array_merge($this->getOptions(), $options);
$events = new DOMTreeBuilder(false, $options);
$scanner = new Scanner($input); $scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events); $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
$parser->parse(); $parser->parse();
$this->errors = $events->getErrors(); $this->errors = $events->getErrors();
@ -184,9 +185,10 @@ class HTML5
*/ */
public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array())
{ {
$events = new DOMTreeBuilder(true, array_merge($this->getOptions(), $options)); $options = array_merge($this->getOptions(), $options);
$events = new DOMTreeBuilder(true, $options);
$scanner = new Scanner($input); $scanner = new Scanner($input);
$parser = new Tokenizer($scanner, $events); $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML);
$parser->parse(); $parser->parse();
$this->errors = $events->getErrors(); $this->errors = $events->getErrors();

View File

@ -24,7 +24,7 @@ class Elements
const KNOWN_ELEMENT = 1; const KNOWN_ELEMENT = 1;
// From section 8.1.2: "script", "style" // From section 8.1.2: "script", "style"
// From 8.2.5.4.7 ("in body" insertion mode): "noembed", "noscript" // From 8.2.5.4.7 ("in body" insertion mode): "noembed"
// From 8.4 "style", "xmp", "iframe", "noembed", "noframes" // From 8.4 "style", "xmp", "iframe", "noembed", "noframes"
/** /**
* Indicates the contained text should be processed as raw text. * Indicates the contained text should be processed as raw text.
@ -79,7 +79,7 @@ class Elements
public static $html5 = array( public static $html5 = array(
"a" => 1, "a" => 1,
"abbr" => 1, "abbr" => 1,
"address" => 89, // NORMAL | VOID_TAG | AUTOCLOSE_P | BLOCK_TAG "address" => 65, // NORMAL | BLOCK_TAG
"area" => 9, // NORMAL | VOID_TAG "area" => 9, // NORMAL | VOID_TAG
"article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
@ -144,7 +144,7 @@ class Elements
"meta" => 9, // NORMAL | VOID_TAG "meta" => 9, // NORMAL | VOID_TAG
"meter" => 1, "meter" => 1,
"nav" => 17, // NORMAL | AUTOCLOSE_P, "nav" => 17, // NORMAL | AUTOCLOSE_P,
"noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG "noscript" => 65, // NORMAL | BLOCK_TAG
"object" => 1, "object" => 1,
"ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
"optgroup" => 1, "optgroup" => 1,
@ -557,7 +557,7 @@ class Elements
* @param string $name * @param string $name
* The name of the element. * The name of the element.
* *
* @return int The element mask. * @return int|bool The element mask or false if element does not exist.
*/ */
public static function element($name) public static function element($name)
{ {

View File

@ -69,7 +69,7 @@ interface EventHandler
* An array with all of the tag's attributes. * An array with all of the tag's attributes.
* @param boolean $selfClosing * @param boolean $selfClosing
* An indicator of whether or not this tag is self-closing (<foo/>) * An indicator of whether or not this tag is self-closing (<foo/>)
* @return numeric One of the Tokenizer::TEXTMODE_* constants. * @return int One of the Tokenizer::TEXTMODE_* constants.
*/ */
public function startTag($name, $attributes = array(), $selfClosing = false); public function startTag($name, $attributes = array(), $selfClosing = false);

View File

@ -43,6 +43,10 @@ class Tokenizer
protected $textMode = 0; // TEXTMODE_NORMAL; protected $textMode = 0; // TEXTMODE_NORMAL;
protected $untilTag = null; protected $untilTag = null;
const CONFORMANT_XML = 'xml';
const CONFORMANT_HTML = 'html';
protected $mode = self::CONFORMANT_HTML;
const WHITE = "\t\n\f "; const WHITE = "\t\n\f ";
/** /**
@ -57,11 +61,13 @@ class Tokenizer
* @param \Masterminds\HTML5\Parser\EventHandler $eventHandler * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler
* An event handler, initialized and ready to receive * An event handler, initialized and ready to receive
* events. * events.
* @param string $mode
*/ */
public function __construct($scanner, $eventHandler) public function __construct($scanner, $eventHandler, $mode = self::CONFORMANT_HTML)
{ {
$this->scanner = $scanner; $this->scanner = $scanner;
$this->events = $eventHandler; $this->events = $eventHandler;
$this->mode = $mode;
} }
/** /**
@ -299,7 +305,7 @@ class Tokenizer
} }
elseif ($tok == 'D' || $tok == 'd') { // Doctype elseif ($tok == 'D' || $tok == 'd') { // Doctype
return $this->doctype(''); return $this->doctype();
} }
elseif ($tok == '[') { // CDATA section elseif ($tok == '[') { // CDATA section
@ -335,7 +341,8 @@ class Tokenizer
return $this->bogusComment('</'); return $this->bogusComment('</');
} }
$name = strtolower($this->scanner->charsUntil("\n\f \t>")); $name = $this->scanner->charsUntil("\n\f \t>");
$name = $this->mode === self::CONFORMANT_XML ? $name: strtolower($name);
// Trash whitespace. // Trash whitespace.
$this->scanner->whitespace(); $this->scanner->whitespace();
@ -362,7 +369,8 @@ class Tokenizer
} }
// We know this is at least one char. // We know this is at least one char.
$name = strtolower($this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")); $name = $this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
$name = $this->mode === self::CONFORMANT_XML ? $name : strtolower($name);
$attributes = array(); $attributes = array();
$selfClose = false; $selfClose = false;

View File

@ -76,7 +76,6 @@ class TreeBuildingRules
case 'option': case 'option':
return $this->closeIfCurrentMatches($new, $current, array( return $this->closeIfCurrentMatches($new, $current, array(
'option', 'option',
'optgroup'
)); ));
case 'tr': case 'tr':
return $this->closeIfCurrentMatches($new, $current, array( return $this->closeIfCurrentMatches($new, $current, array(

View File

@ -48,10 +48,10 @@ class UTF8Utils
public static function countChars($string) public static function countChars($string)
{ {
// Get the length for the string we need. // Get the length for the string we need.
if (function_exists('iconv_strlen')) { if (function_exists('mb_strlen')) {
return iconv_strlen($string, 'utf-8');
} elseif (function_exists('mb_strlen')) {
return mb_strlen($string, 'utf-8'); return mb_strlen($string, 'utf-8');
} elseif (function_exists('iconv_strlen')) {
return iconv_strlen($string, 'utf-8');
} elseif (function_exists('utf8_decode')) { } elseif (function_exists('utf8_decode')) {
// MPB: Will this work? Won't certain decodes lead to two chars // MPB: Will this work? Won't certain decodes lead to two chars
// extrapolated out of 2-byte chars? // extrapolated out of 2-byte chars?
@ -94,10 +94,10 @@ class UTF8Utils
// application executing this library so we store the value, change it // application executing this library so we store the value, change it
// to our needs, and then change it back when we are done. This feels // to our needs, and then change it back when we are done. This feels
// a little excessive and it would be great if there was a better way. // a little excessive and it would be great if there was a better way.
$save = ini_get('mbstring.substitute_character'); $save = mb_substitute_character();
ini_set('mbstring.substitute_character', "none"); mb_substitute_character('none');
$data = mb_convert_encoding($data, 'UTF-8', $encoding); $data = mb_convert_encoding($data, 'UTF-8', $encoding);
ini_set('mbstring.substitute_character', $save); mb_substitute_character($save);
} // @todo Get iconv running in at least some environments if that is possible. } // @todo Get iconv running in at least some environments if that is possible.
elseif (function_exists('iconv') && $encoding != 'auto') { elseif (function_exists('iconv') && $encoding != 'auto') {
// fprintf(STDOUT, "iconv found\n"); // fprintf(STDOUT, "iconv found\n");

View File

@ -185,7 +185,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
{ {
$this->doctype(); $this->doctype();
if ($dom->documentElement) { if ($dom->documentElement) {
$this->traverser->node($dom->documentElement); foreach ($dom->childNodes as $node) {
$this->traverser->node($node);
}
$this->nl(); $this->nl();
} }
} }
@ -219,7 +221,11 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
$this->openTag($ele); $this->openTag($ele);
if (Elements::isA($name, Elements::TEXT_RAW)) { if (Elements::isA($name, Elements::TEXT_RAW)) {
foreach ($ele->childNodes as $child) { foreach ($ele->childNodes as $child) {
$this->wr($child->data); if ($child instanceof \DOMCharacterData) {
$this->wr($child->data);
} elseif ($child instanceof \DOMElement) {
$this->element($child);
}
} }
} else { } else {
// Handle children. // Handle children.
@ -347,7 +353,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface
// the XML, XMLNS, or XLink NS's should use the canonical // the XML, XMLNS, or XLink NS's should use the canonical
// prefix. It seems that DOM does this for us already, but there // prefix. It seems that DOM does this for us already, but there
// may be exceptions. // may be exceptions.
$name = $node->name; $name = $node->nodeName;
// Special handling for attributes in SVG and MathML. // Special handling for attributes in SVG and MathML.
// Using if/elseif instead of switch because it's faster in PHP. // Using if/elseif instead of switch because it's faster in PHP.

View File

@ -103,7 +103,6 @@ class Traverser
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
$this->rules->cdata($node); $this->rules->cdata($node);
break; break;
// FIXME: It appears that the parser doesn't do PI's.
case XML_PI_NODE: case XML_PI_NODE:
$this->rules->processorInstruction($node); $this->rules->processorInstruction($node);
break; break;

View File

@ -9,10 +9,13 @@ But after some initial refactoring work, we began a new parser.
- Composer support - Composer support
- Event-based (SAX-like) parser - Event-based (SAX-like) parser
- DOM tree builder - DOM tree builder
- Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)] - Interoperability with [QueryPath](https://github.com/technosophos/querypath)
- Runs on **PHP** 5.3.0 or newer and **HHVM** 3.2 or newer - Runs on **PHP** 5.3.0 or newer and **HHVM** 3.2 or newer
[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master) [![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php)
[![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5)
[![Code Coverage](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master)
## Installation ## Installation

View File

@ -1,4 +1,28 @@
# Release Notes # Release Notes
2.2.2 (2016-10-22)
- #116: In XML mode, tags are case sensitive
- #115: Fix PHP Notice in OutputRules
- #112: fix parsing of options of an optgroup
- #111: Adding test for the address tag
2.2.1 (2016-05-10)
- #109: Fixed issue where address tag could be written without closing tag (thanks sylus)
2.2.0 (2016-04-11)
- #105: Enable composer cache (for CI/CD)
- #100: Use mb_substitute_character inset of ini_set for environments where
ini_set is disable (e.g., shared hosting)
- #98: Allow link, meta, style tags in noscript tags
- #96: Fixed xml:href on svgs that use the "use" breaking
- #94: Counting UTF8 characters performance improvement
- #93: Use newer version of coveralls package
- #90: Remove duplicate test
- #87: Allow multiple root nodes
2.1.2 (2015-06-07) 2.1.2 (2015-06-07)
- #82: Support for PHP7 - #82: Support for PHP7
- #84: Improved boolean attribute handling - #84: Improved boolean attribute handling

View File

@ -7,11 +7,11 @@
* For environments which do not have these options, it reverts to standard sequential * For environments which do not have these options, it reverts to standard sequential
* requests (using file_get_contents()) * requests (using file_get_contents())
* *
* @version 1.6 * @version 1.7
* @date 2015-06-05 * @date 2016-11-28
* @see http://devel-m6w6.rhcloud.com/mdref/http * @see http://devel-m6w6.rhcloud.com/mdref/http
* @author Keyvan Minoukadeh * @author Keyvan Minoukadeh
* @copyright 2011-2015 Keyvan Minoukadeh * @copyright 2011-2016 Keyvan Minoukadeh
* @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3
*/ */
@ -21,8 +21,8 @@ class HumbleHttpAgent
const METHOD_CURL_MULTI = 2; const METHOD_CURL_MULTI = 2;
const METHOD_FILE_GET_CONTENTS = 4; const METHOD_FILE_GET_CONTENTS = 4;
//const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'; //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1';
const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2'; const UA_BROWSER = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36';
const UA_PHP = 'PHP/5.5'; const UA_PHP = 'PHP/5.6';
const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1'; const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1';
protected $requests = array(); protected $requests = array();
@ -103,20 +103,26 @@ class HumbleHttpAgent
) )
); );
// HTTP cURL // HTTP cURL
$this->curlOptions = array( if ($this->method === self::METHOD_CURL_MULTI) {
CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], $this->curlOptions = array(
CURLOPT_TIMEOUT => $this->requestOptions['timeout'] CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'],
CURLOPT_TIMEOUT => $this->requestOptions['timeout']
); );
}
// Use proxy? // Use proxy?
if ($this->requestOptions['proxyhost']) { if (isset($this->requestOptions['proxyhost']) && $this->requestOptions['proxyhost']) {
// For file_get_contents (see http://stackoverflow.com/a/1336419/407938) // For file_get_contents (see http://stackoverflow.com/a/1336419/407938)
$this->httpContext['http']['proxy'] = 'tcp://'.$this->requestOptions['proxyhost']; $this->httpContext['http']['proxy'] = 'tcp://'.$this->requestOptions['proxyhost'];
$this->httpContext['http']['request_fulluri'] = true; $this->httpContext['http']['request_fulluri'] = true;
// For cURL (see http://stackoverflow.com/a/9247672/407938) // For cURL (see http://stackoverflow.com/a/9247672/407938)
$this->curlOptions[CURLOPT_PROXY] = $this->requestOptions['proxyhost']; if ($this->method === self::METHOD_CURL_MULTI) {
$this->curlOptions[CURLOPT_PROXY] = $this->requestOptions['proxyhost'];
}
if (isset($this->requestOptions['proxyauth'])) { if (isset($this->requestOptions['proxyauth'])) {
$this->httpContext['http']['header'] .= "Proxy-Authorization: Basic ".base64_encode($this->requestOptions['proxyauth'])."\r\n"; $this->httpContext['http']['header'] .= "Proxy-Authorization: Basic ".base64_encode($this->requestOptions['proxyauth'])."\r\n";
$this->curlOptions[CURLOPT_PROXYUSERPWD] = $this->requestOptions['proxyauth']; if ($this->method === self::METHOD_CURL_MULTI) {
$this->curlOptions[CURLOPT_PROXYUSERPWD] = $this->requestOptions['proxyauth'];
}
} }
} }
} }
@ -842,6 +848,7 @@ class HumbleHttpAgent
} }
protected function getCookies($orig, $req_url) { protected function getCookies($orig, $req_url) {
if (!isset($this->cookieJar[$orig])) return null;
$jar = $this->cookieJar[$orig]; $jar = $this->cookieJar[$orig];
if (!isset($jar)) { if (!isset($jar)) {
return null; return null;

View File

@ -971,7 +971,7 @@ class Text_LanguageDetect
// assume that ascii characters are the most common // assume that ascii characters are the most common
// so try it first for efficiency // so try it first for efficiency
if ($unicode <= $blocks[0][1]) { if ($unicode <= hexdec($blocks[0][1])) {
return $blocks[0]; return $blocks[0];
} }
@ -989,11 +989,11 @@ class Text_LanguageDetect
while ($low <= $high) { while ($low <= $high) {
$mid = floor(($low + $high) / 2); $mid = floor(($low + $high) / 2);
if ($unicode < $blocks[$mid][0]) { if ($unicode < hexdec($blocks[$mid][0])) {
// if it's lower than the lower bound // if it's lower than the lower bound
$high = $mid - 1; $high = $mid - 1;
} elseif ($unicode > $blocks[$mid][1]) { } elseif ($unicode > hexdec($blocks[$mid][1])) {
// if it's higher than the upper bound // if it's higher than the upper bound
$low = $mid + 1; $low = $mid + 1;

View File

@ -102,7 +102,7 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect
* @access private * @access private
* @param string $string string to be parsed * @param string $string string to be parsed
*/ */
function Text_LanguageDetect_Parser($string) { function __construct($string) {
$this->_string = $string; $this->_string = $string;
} }

View File

@ -4,6 +4,7 @@
* Based on readability.js version 1.7.1 (without multi-page support) * Based on readability.js version 1.7.1 (without multi-page support)
* Updated to allow HTML5 parsing with html5lib * Updated to allow HTML5 parsing with html5lib
* Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds * Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds
* Updated to allow HTML5 parsing with Gumbo PHP
* ------------------------------------------------------ * ------------------------------------------------------
* Original URL: http://lab.arc90.com/experiments/readability/js/readability.js * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js
* Arc90's project URL: http://lab.arc90.com/experiments/readability/ * Arc90's project URL: http://lab.arc90.com/experiments/readability/
@ -12,7 +13,7 @@
* More information: http://fivefilters.org/content-only/ * More information: http://fivefilters.org/content-only/
* License: Apache License, Version 2.0 * License: Apache License, Version 2.0
* Requires: PHP5 * Requires: PHP5
* Date: 2015-06-01 * Date: 2017-02-05
* *
* Differences between the PHP port and the original * Differences between the PHP port and the original
* ------------------------------------------------------ * ------------------------------------------------------
@ -117,17 +118,23 @@ class Readability
$html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html); $html = preg_replace($this->regexps['replaceBrs'], '</p><p>', $html);
$html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html);
if (trim($html) == '') $html = '<html></html>'; if (trim($html) == '') $html = '<html></html>';
if ($parser=='html5lib' || $parser=='html5php') { // Check for the Gumbo PHP extension https://github.com/layershifter/gumbo-php
if (version_compare(PHP_VERSION, '5.3.0') >= 0) { if ($parser=='gumbo') {
//use Masterminds\HTML5; // Can we avoid this encoding/deocding step? Test on:
$html5class = 'Masterminds\HTML5'; // http://www.medialens.org/index.php/alerts/alert-archive/2017/837-undermining-democracy-corporate-media-bias-on-jeremy-corbyn-boris-johnson-and-syria.html
$html5 = new $html5class(array('disable_html_ns' => true)); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8");
$this->dom = $html5->loadHTML($html); $html = mb_convert_encoding($html, "UTF-8", 'HTML-ENTITIES');
//echo $html5->saveHTML($this->dom);exit; $this->dom = @Layershifter\Gumbo\Parser::load($html);
//$xpath = new DOMXPath($this->dom); } elseif ($parser=='html5lib' || $parser=='html5php') {
//$elems = $xpath->query("//a"); //use Masterminds\HTML5;
//print_r($elems);exit; //$html5class = 'Masterminds\HTML5';
} //$html5 = new $html5class(array('disable_html_ns' => true));
$html5 = new Masterminds\HTML5(array('disable_html_ns' => true));
$this->dom = $html5->loadHTML($html);
//echo $html5->saveHTML($this->dom);exit;
//$xpath = new DOMXPath($this->dom);
//$elems = $xpath->query("//a");
//print_r($elems);exit;
} }
if ($this->dom === null) { if ($this->dom === null) {
$this->dom = new DOMDocument(); $this->dom = new DOMDocument();

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,8 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @version 1.4.3
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -50,7 +50,7 @@ define('SIMPLEPIE_NAME', 'SimplePie');
/** /**
* SimplePie Version * SimplePie Version
*/ */
define('SIMPLEPIE_VERSION', '1.3.1'); define('SIMPLEPIE_VERSION', '1.4.3');
/** /**
* SimplePie Build * SimplePie Build
@ -445,6 +445,13 @@ class SimplePie
*/ */
public $feed_url; public $feed_url;
/**
* @var string Original feed URL, or new feed URL iff HTTP 301 Moved Permanently
* @see SimplePie::subscribe_url()
* @access private
*/
public $permanent_url = null;
/** /**
* @var object Instance of SimplePie_File to use as a feed * @var object Instance of SimplePie_File to use as a feed
* @see SimplePie::set_file() * @see SimplePie::set_file()
@ -466,6 +473,13 @@ class SimplePie
*/ */
public $timeout = 10; public $timeout = 10;
/**
* @var array Custom curl options
* @see SimplePie::set_curl_options()
* @access private
*/
public $curl_options = array();
/** /**
* @var bool Forces fsockopen() to be used for remote files instead * @var bool Forces fsockopen() to be used for remote files instead
* of cURL, even if a new enough version is installed * of cURL, even if a new enough version is installed
@ -489,6 +503,14 @@ class SimplePie
*/ */
public $cache = true; public $cache = true;
/**
* @var bool Force SimplePie to fallback to expired cache, if enabled,
* when feed is unavailable.
* @see SimplePie::force_cache_fallback()
* @access private
*/
public $force_cache_fallback = false;
/** /**
* @var int Cache duration (in seconds) * @var int Cache duration (in seconds)
* @see SimplePie::set_cache_duration() * @see SimplePie::set_cache_duration()
@ -594,6 +616,12 @@ class SimplePie
*/ */
public $item_limit = 0; public $item_limit = 0;
/**
* @var bool Stores if last-modified and/or etag headers were sent with the
* request when checking a feed.
*/
public $check_modified = false;
/** /**
* @var array Stores the default attributes to be stripped by strip_attributes(). * @var array Stores the default attributes to be stripped by strip_attributes().
* @see SimplePie::strip_attributes() * @see SimplePie::strip_attributes()
@ -601,6 +629,13 @@ class SimplePie
*/ */
public $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); public $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
/**
* @var array Stores the default attributes to add to different tags by add_attributes().
* @see SimplePie::add_attributes()
* @access private
*/
public $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
/** /**
* @var array Stores the default tags to be stripped by strip_htmltags(). * @var array Stores the default tags to be stripped by strip_htmltags().
* @see SimplePie::strip_htmltags() * @see SimplePie::strip_htmltags()
@ -624,9 +659,9 @@ class SimplePie
*/ */
public function __construct() public function __construct()
{ {
if (version_compare(PHP_VERSION, '5.2', '<')) if (version_compare(PHP_VERSION, '5.3', '<'))
{ {
trigger_error('PHP 4.x, 5.0 and 5.1 are no longer supported. Please upgrade to PHP 5.2 or newer.'); trigger_error('Please upgrade to PHP 5.3 or newer.');
die(); die();
} }
@ -637,7 +672,7 @@ class SimplePie
if (func_num_args() > 0) if (func_num_args() > 0)
{ {
$level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING;
trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_location() directly.', $level); trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_duration() directly.', $level);
$args = func_get_args(); $args = func_get_args();
switch (count($args)) { switch (count($args)) {
@ -728,6 +763,7 @@ class SimplePie
else else
{ {
$this->feed_url = $this->registry->call('Misc', 'fix_protocol', array($url, 1)); $this->feed_url = $this->registry->call('Misc', 'fix_protocol', array($url, 1));
$this->permanent_url = $this->feed_url;
} }
} }
@ -742,6 +778,7 @@ class SimplePie
if ($file instanceof SimplePie_File) if ($file instanceof SimplePie_File)
{ {
$this->feed_url = $file->url; $this->feed_url = $file->url;
$this->permanent_url = $this->feed_url;
$this->file =& $file; $this->file =& $file;
return true; return true;
} }
@ -780,6 +817,19 @@ class SimplePie
$this->timeout = (int) $timeout; $this->timeout = (int) $timeout;
} }
/**
* Set custom curl options
*
* This allows you to change default curl options
*
* @since 1.0 Beta 3
* @param array $curl_options Curl options to add to default settings
*/
public function set_curl_options(array $curl_options = array())
{
$this->curl_options = $curl_options;
}
/** /**
* Force SimplePie to use fsockopen() instead of cURL * Force SimplePie to use fsockopen() instead of cURL
* *
@ -805,6 +855,21 @@ class SimplePie
$this->cache = (bool) $enable; $this->cache = (bool) $enable;
} }
/**
* SimplePie to continue to fall back to expired cache, if enabled, when
* feed is unavailable.
*
* This tells SimplePie to ignore any file errors and fall back to cache
* instead. This only works if caching is enabled and cached content
* still exists.
* @param bool $enable Force use of cache on fail.
*/
public function force_cache_fallback($enable = false)
{
$this->force_cache_fallback= (bool) $enable;
}
/** /**
* Set the length of time (in seconds) that the contents of a feed will be * Set the length of time (in seconds) that the contents of a feed will be
* cached * cached
@ -1073,6 +1138,7 @@ class SimplePie
$this->strip_comments(false); $this->strip_comments(false);
$this->strip_htmltags(false); $this->strip_htmltags(false);
$this->strip_attributes(false); $this->strip_attributes(false);
$this->add_attributes(false);
$this->set_image_handler(false); $this->set_image_handler(false);
} }
} }
@ -1119,16 +1185,25 @@ class SimplePie
$this->sanitize->strip_attributes($attribs); $this->sanitize->strip_attributes($attribs);
} }
public function add_attributes($attribs = '')
{
if ($attribs === '')
{
$attribs = $this->add_attributes;
}
$this->sanitize->add_attributes($attribs);
}
/** /**
* Set the output encoding * Set the output encoding
* *
* Allows you to override SimplePie's output to match that of your webpage. * Allows you to override SimplePie's output to match that of your webpage.
* This is useful for times when your webpages are not being served as * This is useful for times when your webpages are not being served as
* UTF-8. This setting will be obeyed by {@see handle_content_type()}, and * UTF-8. This setting will be obeyed by {@see handle_content_type()}, and
* is similar to {@see set_input_encoding()}. * is similar to {@see set_input_encoding()}.
* *
* It should be noted, however, that not all character encodings can support * It should be noted, however, that not all character encodings can support
* all characters. If your page is being served as ISO-8859-1 and you try * all characters. If your page is being served as ISO-8859-1 and you try
* to display a Japanese feed, you'll likely see garbled characters. * to display a Japanese feed, you'll likely see garbled characters.
* Because of this, it is highly recommended to ensure that your webpages * Because of this, it is highly recommended to ensure that your webpages
* are served as UTF-8. * are served as UTF-8.
@ -1195,10 +1270,20 @@ class SimplePie
$this->item_limit = (int) $limit; $this->item_limit = (int) $limit;
} }
/**
* Enable throwing exceptions
*
* @param boolean $enable Should we throw exceptions, or use the old-style error property?
*/
public function enable_exceptions($enable = true)
{
$this->enable_exceptions = $enable;
}
/** /**
* Initialize the feed object * Initialize the feed object
* *
* This is what makes everything happen. Period. This is where all of the * This is what makes everything happen. Period. This is where all of the
* configuration options get processed, feeds are fetched, cached, and * configuration options get processed, feeds are fetched, cached, and
* parsed, and all of that other good stuff. * parsed, and all of that other good stuff.
* *
@ -1209,6 +1294,7 @@ class SimplePie
// Check absolute bare minimum requirements. // Check absolute bare minimum requirements.
if (!extension_loaded('xml') || !extension_loaded('pcre')) if (!extension_loaded('xml') || !extension_loaded('pcre'))
{ {
$this->error = 'XML or PCRE extensions not loaded!';
return false; return false;
} }
// Then check the xml extension is sane (i.e., libxml 2.7.x issue on PHP < 5.2.9 and libxml 2.7.0 to 2.7.2 on any version) if we don't have xmlreader. // Then check the xml extension is sane (i.e., libxml 2.7.x issue on PHP < 5.2.9 and libxml 2.7.0 to 2.7.2 on any version) if we don't have xmlreader.
@ -1236,7 +1322,7 @@ class SimplePie
// Pass whatever was set with config options over to the sanitizer. // Pass whatever was set with config options over to the sanitizer.
// Pass the classes in for legacy support; new classes should use the registry instead // Pass the classes in for legacy support; new classes should use the registry instead
$this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->registry->get_class('Cache')); $this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->registry->get_class('Cache'));
$this->sanitize->pass_file_data($this->registry->get_class('File'), $this->timeout, $this->useragent, $this->force_fsockopen); $this->sanitize->pass_file_data($this->registry->get_class('File'), $this->timeout, $this->useragent, $this->force_fsockopen, $this->curl_options);
if (!empty($this->multifeed_url)) if (!empty($this->multifeed_url))
{ {
@ -1265,6 +1351,7 @@ class SimplePie
$this->error = null; $this->error = null;
$this->data = array(); $this->data = array();
$this->check_modified = false;
$this->multifeed_objects = array(); $this->multifeed_objects = array();
$cache = false; $cache = false;
@ -1290,13 +1377,20 @@ class SimplePie
list($headers, $sniffed) = $fetched; list($headers, $sniffed) = $fetched;
} }
// Empty response check
if(empty($this->raw_data)){
$this->error = "A feed could not be found at `$this->feed_url`. Empty body.";
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__));
return false;
}
// Set up array of possible encodings // Set up array of possible encodings
$encodings = array(); $encodings = array();
// First check to see if input has been overridden. // First check to see if input has been overridden.
if ($this->input_encoding !== false) if ($this->input_encoding !== false)
{ {
$encodings[] = $this->input_encoding; $encodings[] = strtoupper($this->input_encoding);
} }
$application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity'); $application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity');
@ -1318,14 +1412,14 @@ class SimplePie
{ {
if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset))
{ {
$encodings[] = $charset[1]; $encodings[] = strtoupper($charset[1]);
} }
$encodings[] = 'US-ASCII'; $encodings[] = 'US-ASCII';
} }
// Text MIME-type default // Text MIME-type default
elseif (substr($sniffed, 0, 5) === 'text/') elseif (substr($sniffed, 0, 5) === 'text/')
{ {
$encodings[] = 'US-ASCII'; $encodings[] = 'UTF-8';
} }
} }
@ -1347,12 +1441,12 @@ class SimplePie
$parser = $this->registry->create('Parser'); $parser = $this->registry->create('Parser');
// If it's parsed fine // If it's parsed fine
if ($parser->parse($utf8_data, 'UTF-8')) if ($parser->parse($utf8_data, 'UTF-8', $this->permanent_url))
{ {
$this->data = $parser->get_data(); $this->data = $parser->get_data();
if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE)) if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE))
{ {
$this->error = "A feed could not be found at $this->feed_url. This does not appear to be a valid RSS or Atom feed."; $this->error = "A feed could not be found at `$this->feed_url`. This does not appear to be a valid RSS or Atom feed.";
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__));
return false; return false;
} }
@ -1376,11 +1470,27 @@ class SimplePie
if (isset($parser)) if (isset($parser))
{ {
// We have an error, just set SimplePie_Misc::error to it and quit // We have an error, just set SimplePie_Misc::error to it and quit
$this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column()); $this->error = $this->feed_url;
$this->error .= sprintf(' is invalid XML, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column());
} }
else else
{ {
$this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.'; $this->error = 'The data could not be converted to UTF-8.';
if (!extension_loaded('mbstring') && !extension_loaded('iconv') && !class_exists('\UConverter')) {
$this->error .= ' You MUST have either the iconv, mbstring or intl (PHP 5.5+) extension installed and enabled.';
} else {
$missingExtensions = array();
if (!extension_loaded('iconv')) {
$missingExtensions[] = 'iconv';
}
if (!extension_loaded('mbstring')) {
$missingExtensions[] = 'mbstring';
}
if (!class_exists('\UConverter')) {
$missingExtensions[] = 'intl (PHP 5.5+)';
}
$this->error .= ' Try installing/enabling the ' . implode(' or ', $missingExtensions) . ' extension.';
}
} }
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__));
@ -1436,7 +1546,10 @@ class SimplePie
// Check if the cache has been updated // Check if the cache has been updated
elseif ($cache->mtime() + $this->cache_duration < time()) elseif ($cache->mtime() + $this->cache_duration < time())
{ {
// If we have last-modified and/or etag set // Want to know if we tried to send last-modified and/or etag headers
// when requesting this file. (Note that it's up to the file to
// support this, but we don't always send the headers either.)
$this->check_modified = true;
if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag']))
{ {
$headers = array( $headers = array(
@ -1451,18 +1564,28 @@ class SimplePie
$headers['if-none-match'] = $this->data['headers']['etag']; $headers['if-none-match'] = $this->data['headers']['etag'];
} }
$file = $this->registry->create('File', array($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen)); $file = $this->registry->create('File', array($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
if ($file->success) if ($file->success)
{ {
if ($file->status_code === 304) if ($file->status_code === 304)
{ {
// Set raw_data to false here too, to signify that the cache
// is still valid.
$this->raw_data = false;
$cache->touch(); $cache->touch();
return true; return true;
} }
} }
else else
{ {
$this->check_modified = false;
if($this->force_cache_fallback)
{
$cache->touch();
return true;
}
unset($file); unset($file);
} }
} }
@ -1493,7 +1616,7 @@ class SimplePie
$headers = array( $headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
); );
$file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen)); $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options));
} }
} }
// If the file connection has an error, set SimplePie::error to that and quit // If the file connection has an error, set SimplePie::error to that and quit
@ -1510,19 +1633,75 @@ class SimplePie
if (!$locate->is_feed($file)) if (!$locate->is_feed($file))
{ {
// We need to unset this so that if SimplePie::set_file() has been called that object is untouched $copyStatusCode = $file->status_code;
unset($file); $copyContentType = $file->headers['content-type'];
try try
{ {
if (!($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds))) $microformats = false;
if (function_exists('Mf2\parse')) {
// Check for both h-feed and h-entry, as both a feed with no entries
// and a list of entries without an h-feed wrapper are both valid.
$position = 0;
while ($position = strpos($file->body, 'h-feed', $position))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if ($microformats = preg_match('/class="[^"]*h-feed/', $check))
{
break;
}
$position += 7;
}
$position = 0;
while ($position = strpos($file->body, 'h-entry', $position))
{
$start = $position < 200 ? 0 : $position - 200;
$check = substr($file->body, $start, 400);
if ($microformats = preg_match('/class="[^"]*h-entry/', $check))
{
break;
}
$position += 7;
}
}
// Now also do feed discovery, but if an h-entry was found don't
// overwrite the current value of file.
$discovered = $locate->find($this->autodiscovery,
$this->all_discovered_feeds);
if ($microformats)
{ {
$this->error = "A feed could not be found at $this->feed_url. A feed with an invalid mime type may fall victim to this error, or " . SIMPLEPIE_NAME . " was unable to auto-discover it.. Use force_feed() if you are certain this URL is a real feed."; if ($hub = $locate->get_rel_link('hub'))
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); {
return false; $self = $locate->get_rel_link('self');
$this->store_links($file, $hub, $self);
}
// Push the current file onto all_discovered feeds so the user can
// be shown this as one of the options.
if (isset($this->all_discovered_feeds)) {
$this->all_discovered_feeds[] = $file;
}
}
else
{
if ($discovered)
{
$file = $discovered;
}
else
{
// We need to unset this so that if SimplePie::set_file() has
// been called that object is untouched
unset($file);
$this->error = "A feed could not be found at `$this->feed_url`; the status code is `$copyStatusCode` and content-type is `$copyContentType`";
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__));
return false;
}
} }
} }
catch (SimplePie_Exception $e) catch (SimplePie_Exception $e)
{ {
// We need to unset this so that if SimplePie::set_file() has been called that object is untouched
unset($file);
// This is usually because DOMDocument doesn't exist // This is usually because DOMDocument doesn't exist
$this->error = $e->getMessage(); $this->error = $e->getMessage();
$this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine())); $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine()));
@ -1543,7 +1722,7 @@ class SimplePie
} }
$this->raw_data = $file->body; $this->raw_data = $file->body;
$this->permanent_url = $file->permanent_url;
$headers = $file->headers; $headers = $file->headers;
$sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file)); $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file));
$sniffed = $sniffer->get_type(); $sniffed = $sniffer->get_type();
@ -1730,25 +1909,43 @@ class SimplePie
/** /**
* Get the URL for the feed * Get the URL for the feed
* *
* May or may not be different from the URL passed to {@see set_feed_url()}, * When the 'permanent' mode is enabled, returns the original feed URL,
* except in the case of an `HTTP 301 Moved Permanently` status response,
* in which case the location of the first redirection is returned.
*
* When the 'permanent' mode is disabled (default),
* may or may not be different from the URL passed to {@see set_feed_url()},
* depending on whether auto-discovery was used. * depending on whether auto-discovery was used.
* *
* @since Preview Release (previously called `get_feed_url()` since SimplePie 0.8.) * @since Preview Release (previously called `get_feed_url()` since SimplePie 0.8.)
* @todo If we have a perm redirect we should return the new URL * @todo Support <itunes:new-feed-url>
* @todo When we make the above change, let's support <itunes:new-feed-url> as well
* @todo Also, |atom:link|@rel=self * @todo Also, |atom:link|@rel=self
* @param bool $permanent Permanent mode to return only the original URL or the first redirection
* iff it is a 301 redirection
* @return string|null * @return string|null
*/ */
public function subscribe_url() public function subscribe_url($permanent = false)
{ {
if ($this->feed_url !== null) if ($permanent)
{ {
return $this->sanitize($this->feed_url, SIMPLEPIE_CONSTRUCT_IRI); if ($this->permanent_url !== null)
{
// sanitize encodes ampersands which are required when used in a url.
return str_replace('&amp;', '&',
$this->sanitize($this->permanent_url,
SIMPLEPIE_CONSTRUCT_IRI));
}
} }
else else
{ {
return null; if ($this->feed_url !== null)
{
return str_replace('&amp;', '&',
$this->sanitize($this->feed_url,
SIMPLEPIE_CONSTRUCT_IRI));
}
} }
return null;
} }
/** /**
@ -1963,7 +2160,21 @@ class SimplePie
*/ */
public function sanitize($data, $type, $base = '') public function sanitize($data, $type, $base = '')
{ {
return $this->sanitize->sanitize($data, $type, $base); try
{
return $this->sanitize->sanitize($data, $type, $base);
}
catch (SimplePie_Exception $e)
{
if (!$this->enable_exceptions)
{
$this->error = $e->getMessage();
$this->registry->call('Misc', 'error', array($this->error, E_USER_WARNING, $e->getFile(), $e->getLine()));
return '';
}
throw $e;
}
} }
/** /**
@ -2014,7 +2225,7 @@ class SimplePie
* Get a category for the feed * Get a category for the feed
* *
* @since Unknown * @since Unknown
* @param int $key The category that you want to return. Remember that arrays begin with 0, not 1 * @param int $key The category that you want to return. Remember that arrays begin with 0, not 1
* @return SimplePie_Category|null * @return SimplePie_Category|null
*/ */
public function get_category($key = 0) public function get_category($key = 0)
@ -2099,7 +2310,7 @@ class SimplePie
* Get an author for the feed * Get an author for the feed
* *
* @since 1.1 * @since 1.1
* @param int $key The author that you want to return. Remember that arrays begin with 0, not 1 * @param int $key The author that you want to return. Remember that arrays begin with 0, not 1
* @return SimplePie_Author|null * @return SimplePie_Author|null
*/ */
public function get_author($key = 0) public function get_author($key = 0)
@ -2197,7 +2408,7 @@ class SimplePie
* Get a contributor for the feed * Get a contributor for the feed
* *
* @since 1.1 * @since 1.1
* @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1 * @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1
* @return SimplePie_Author|null * @return SimplePie_Author|null
*/ */
public function get_contributor($key = 0) public function get_contributor($key = 0)
@ -2283,7 +2494,7 @@ class SimplePie
* Get a single link for the feed * Get a single link for the feed
* *
* @since 1.0 (previously called `get_feed_link` since Preview Release, `get_feed_permalink()` since 0.8) * @since 1.0 (previously called `get_feed_link` since Preview Release, `get_feed_permalink()` since 0.8)
* @param int $key The link that you want to return. Remember that arrays begin with 0, not 1 * @param int $key The link that you want to return. Remember that arrays begin with 0, not 1
* @param string $rel The relationship of the link to return * @param string $rel The relationship of the link to return
* @return string|null Link URL * @return string|null Link URL
*/ */
@ -2393,6 +2604,12 @@ class SimplePie
{ {
return $this->data['links'][$rel]; return $this->data['links'][$rel];
} }
else if (isset($this->data['headers']['link']) &&
preg_match('/<([^>]+)>; rel='.preg_quote($rel).'/',
$this->data['headers']['link'], $match))
{
return array($match[1]);
}
else else
{ {
return null; return null;
@ -2794,7 +3011,7 @@ class SimplePie
* *
* @see get_item_quantity() * @see get_item_quantity()
* @since Beta 2 * @since Beta 2
* @param int $key The item that you want to return. Remember that arrays begin with 0, not 1 * @param int $key The item that you want to return. Remember that arrays begin with 0, not 1
* @return SimplePie_Item|null * @return SimplePie_Item|null
*/ */
public function get_item($key = 0) public function get_item($key = 0)
@ -2821,7 +3038,7 @@ class SimplePie
* @since Beta 2 * @since Beta 2
* @param int $start Index to start at * @param int $start Index to start at
* @param int $end Number of items to return. 0 for all items after `$start` * @param int $end Number of items to return. 0 for all items after `$start`
* @return array|null List of {@see SimplePie_Item} objects * @return SimplePie_Item[]|null List of {@see SimplePie_Item} objects
*/ */
public function get_items($start = 0, $end = 0) public function get_items($start = 0, $end = 0)
{ {
@ -2830,96 +3047,81 @@ class SimplePie
if (!empty($this->multifeed_objects)) if (!empty($this->multifeed_objects))
{ {
$this->data['items'] = SimplePie::merge_items($this->multifeed_objects, $start, $end, $this->item_limit); $this->data['items'] = SimplePie::merge_items($this->multifeed_objects, $start, $end, $this->item_limit);
if (empty($this->data['items']))
{
return array();
}
return $this->data['items'];
} }
else $this->data['items'] = array();
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'entry'))
{ {
$this->data['items'] = array(); $keys = array_keys($items);
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'entry')) foreach ($keys as $key)
{ {
$keys = array_keys($items); $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
foreach ($keys as $key)
{
$this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
}
} }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry')) }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry'))
{
$keys = array_keys($items);
foreach ($keys as $key)
{ {
$keys = array_keys($items); $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
foreach ($keys as $key)
{
$this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
}
} }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item')) }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item'))
{
$keys = array_keys($items);
foreach ($keys as $key)
{ {
$keys = array_keys($items); $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
foreach ($keys as $key)
{
$this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
}
} }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item')) }
if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item'))
{
$keys = array_keys($items);
foreach ($keys as $key)
{ {
$keys = array_keys($items); $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
foreach ($keys as $key)
{
$this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
}
} }
if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item')) }
if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item'))
{
$keys = array_keys($items);
foreach ($keys as $key)
{ {
$keys = array_keys($items); $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
foreach ($keys as $key)
{
$this->data['items'][] = $this->registry->create('Item', array($this, $items[$key]));
}
} }
} }
} }
if (!empty($this->data['items'])) if (empty($this->data['items']))
{ {
// If we want to order it by date, check if all items have a date, and then sort it return array();
if ($this->order_by_date && empty($this->multifeed_objects)) }
{
if (!isset($this->data['ordered_items']))
{
$do_sort = true;
foreach ($this->data['items'] as $item)
{
if (!$item->get_date('U'))
{
$do_sort = false;
break;
}
}
$item = null;
$this->data['ordered_items'] = $this->data['items'];
if ($do_sort)
{
usort($this->data['ordered_items'], array(get_class($this), 'sort_items'));
}
}
$items = $this->data['ordered_items'];
}
else
{
$items = $this->data['items'];
}
// Slice the data as desired if ($this->order_by_date)
if ($end === 0) {
if (!isset($this->data['ordered_items']))
{ {
return array_slice($items, $start); $this->data['ordered_items'] = $this->data['items'];
} usort($this->data['ordered_items'], array(get_class($this), 'sort_items'));
else }
{ $items = $this->data['ordered_items'];
return array_slice($items, $start, $end);
}
} }
else else
{ {
return array(); $items = $this->data['items'];
}
// Slice the data as desired
if ($end === 0)
{
return array_slice($items, $start);
}
else
{
return array_slice($items, $start, $end);
} }
} }
@ -2992,7 +3194,19 @@ class SimplePie
*/ */
public static function sort_items($a, $b) public static function sort_items($a, $b)
{ {
return $a->get_date('U') <= $b->get_date('U'); $a_date = $a->get_date('U');
$b_date = $b->get_date('U');
if ($a_date && $b_date) {
return $a_date > $b_date ? -1 : 1;
}
// Sort items without dates to the top.
if ($a_date) {
return 1;
}
if ($b_date) {
return -1;
}
return 0;
} }
/** /**
@ -3025,20 +3239,7 @@ class SimplePie
} }
} }
$do_sort = true; usort($items, array(get_class($urls[0]), 'sort_items'));
foreach ($items as $item)
{
if (!$item->get_date('U'))
{
$do_sort = false;
break;
}
}
$item = null;
if ($do_sort)
{
usort($items, array(get_class($urls[0]), 'sort_items'));
}
if ($end === 0) if ($end === 0)
{ {
@ -3055,4 +3256,42 @@ class SimplePie
return array(); return array();
} }
} }
/**
* Store PubSubHubbub links as headers
*
* There is no way to find PuSH links in the body of a microformats feed,
* so they are added to the headers when found, to be used later by get_links.
* @param SimplePie_File $file
* @param string $hub
* @param string $self
*/
private function store_links(&$file, $hub, $self) {
if (isset($file->headers['link']['hub']) ||
(isset($file->headers['link']) &&
preg_match('/rel=hub/', $file->headers['link'])))
{
return;
}
if ($hub)
{
if (isset($file->headers['link']))
{
if ($file->headers['link'] !== '')
{
$file->headers['link'] = ', ';
}
}
else
{
$file->headers['link'] = '';
}
$file->headers['link'] .= '<'.$hub.'>; rel=hub';
if ($self)
{
$file->headers['link'] .= ', <'.$self.'>; rel=self';
}
}
}
} }

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -62,8 +61,10 @@ class SimplePie_Cache
* @var array * @var array
*/ */
protected static $handlers = array( protected static $handlers = array(
'mysql' => 'SimplePie_Cache_MySQL', 'mysql' => 'SimplePie_Cache_MySQL',
'memcache' => 'SimplePie_Cache_Memcache', 'memcache' => 'SimplePie_Cache_Memcache',
'memcached' => 'SimplePie_Cache_Memcached',
'redis' => 'SimplePie_Cache_Redis'
); );
/** /**

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -136,11 +135,7 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
*/ */
public function mtime() public function mtime()
{ {
if (file_exists($this->name)) return @filemtime($this->name);
{
return filemtime($this->name);
}
return false;
} }
/** /**
@ -150,11 +145,7 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base
*/ */
public function touch() public function touch()
{ {
if (file_exists($this->name)) return @touch($this->name);
{
return touch($this->name);
}
return false;
} }
/** /**

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -95,10 +94,8 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
'prefix' => 'simplepie_', 'prefix' => 'simplepie_',
), ),
); );
$parsed = SimplePie_Cache::parse_URL($location); $this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
$this->options['host'] = empty($parsed['host']) ? $this->options['host'] : $parsed['host'];
$this->options['port'] = empty($parsed['port']) ? $this->options['port'] : $parsed['port'];
$this->options['extras'] = array_merge($this->options['extras'], $parsed['extras']);
$this->name = $this->options['extras']['prefix'] . md5("$name:$type"); $this->name = $this->options['extras']['prefix'] . md5("$name:$type");
$this->cache = new Memcache(); $this->cache = new Memcache();
@ -147,7 +144,7 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
if ($data !== false) if ($data !== false)
{ {
// essentially ignore the mtime because Memcache expires on it's own // essentially ignore the mtime because Memcache expires on its own
return time(); return time();
} }
@ -165,7 +162,7 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base
if ($data !== false) if ($data !== false)
{ {
return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->duration); return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']);
} }
return false; return false;

View File

@ -0,0 +1,166 @@
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Caches data to memcached
*
* Registered for URLs with the "memcached" protocol
*
* For example, `memcached://localhost:11211/?timeout=3600&prefix=sp_` will
* connect to memcached on `localhost` on port 11211. All tables will be
* prefixed with `sp_` and data will expire after 3600 seconds
*
* @package SimplePie
* @subpackage Caching
* @author Paul L. McNeely
* @uses Memcached
*/
class SimplePie_Cache_Memcached implements SimplePie_Cache_Base
{
/**
* Memcached instance
* @var Memcached
*/
protected $cache;
/**
* Options
* @var array
*/
protected $options;
/**
* Cache name
* @var string
*/
protected $name;
/**
* Create a new cache object
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $type) {
$this->options = array(
'host' => '127.0.0.1',
'port' => 11211,
'extras' => array(
'timeout' => 3600, // one hour
'prefix' => 'simplepie_',
),
);
$this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
$this->name = $this->options['extras']['prefix'] . md5("$name:$type");
$this->cache = new Memcached();
$this->cache->addServer($this->options['host'], (int)$this->options['port']);
}
/**
* Save data to the cache
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data) {
if ($data instanceof SimplePie) {
$data = $data->data;
}
return $this->setData(serialize($data));
}
/**
* Retrieve the data saved to the cache
* @return array Data for SimplePie::$data
*/
public function load() {
$data = $this->cache->get($this->name);
if ($data !== false) {
return unserialize($data);
}
return false;
}
/**
* Retrieve the last modified time for the cache
* @return int Timestamp
*/
public function mtime() {
$data = $this->cache->get($this->name . '_mtime');
return (int) $data;
}
/**
* Set the last modified time to the current time
* @return bool Success status
*/
public function touch() {
$data = $this->cache->get($this->name);
return $this->setData($data);
}
/**
* Remove the cache
* @return bool Success status
*/
public function unlink() {
return $this->cache->delete($this->name, 0);
}
/**
* Set the last modified time and data to Memcached
* @return bool Success status
*/
private function setData($data) {
if ($data !== false) {
$this->cache->set($this->name . '_mtime', time(), (int)$this->options['extras']['timeout']);
return $this->cache->set($this->name, $data, (int)$this->options['extras']['timeout']);
}
return false;
}
}

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -94,9 +93,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
'path' => '', 'path' => '',
'extras' => array( 'extras' => array(
'prefix' => '', 'prefix' => '',
'cache_purge_time' => 2592000
), ),
); );
$this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
$this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location));
// Path is prefixed with a "/" // Path is prefixed with a "/"
$this->options['dbname'] = substr($this->options['path'], 1); $this->options['dbname'] = substr($this->options['path'], 1);
@ -130,16 +131,20 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'cache_data` (`id` TEXT CHARACTER SET utf8 NOT NULL, `items` SMALLINT NOT NULL DEFAULT 0, `data` BLOB NOT NULL, `mtime` INT UNSIGNED NOT NULL, UNIQUE (`id`(125)))'); $query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'cache_data` (`id` TEXT CHARACTER SET utf8 NOT NULL, `items` SMALLINT NOT NULL DEFAULT 0, `data` BLOB NOT NULL, `mtime` INT UNSIGNED NOT NULL, UNIQUE (`id`(125)))');
if ($query === false) if ($query === false)
{ {
trigger_error("Can't create " . $this->options['extras']['prefix'] . "cache_data table, check permissions", E_USER_WARNING);
$this->mysql = null; $this->mysql = null;
return;
} }
} }
if (!in_array($this->options['extras']['prefix'] . 'items', $db)) if (!in_array($this->options['extras']['prefix'] . 'items', $db))
{ {
$query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` TEXT CHARACTER SET utf8 NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))'); $query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` MEDIUMBLOB NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))');
if ($query === false) if ($query === false)
{ {
trigger_error("Can't create " . $this->options['extras']['prefix'] . "items table, check permissions", E_USER_WARNING);
$this->mysql = null; $this->mysql = null;
return;
} }
} }
} }
@ -157,6 +162,17 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB
return false; return false;
} }
$query = $this->mysql->prepare('DELETE i, cd FROM `' . $this->options['extras']['prefix'] . 'cache_data` cd, ' .
'`' . $this->options['extras']['prefix'] . 'items` i ' .
'WHERE cd.id = i.feed_id ' .
'AND cd.mtime < (unix_timestamp() - :purge_time)');
$query->bindValue(':purge_time', $this->options['extras']['cache_purge_time']);
if (!$query->execute())
{
return false;
}
if ($data instanceof SimplePie) if ($data instanceof SimplePie)
{ {
$data = clone $data; $data = clone $data;

View File

@ -0,0 +1,166 @@
<?php
/**
* SimplePie Redis Cache Extension
*
* @package SimplePie
* @author Jan Kozak <galvani78@gmail.com>
* @link http://galvani.cz/
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
* @version 0.2.9
*/
/**
* Caches data to redis
*
* Registered for URLs with the "redis" protocol
*
* For example, `redis://localhost:6379/?timeout=3600&prefix=sp_&dbIndex=0` will
* connect to redis on `localhost` on port 6379. All tables will be
* prefixed with `simple_primary-` and data will expire after 3600 seconds
*
* @package SimplePie
* @subpackage Caching
* @uses Redis
*/
class SimplePie_Cache_Redis implements SimplePie_Cache_Base {
/**
* Redis instance
*
* @var \Redis
*/
protected $cache;
/**
* Options
*
* @var array
*/
protected $options;
/**
* Cache name
*
* @var string
*/
protected $name;
/**
* Cache Data
*
* @var type
*/
protected $data;
/**
* Create a new cache object
*
* @param string $location Location string (from SimplePie::$cache_location)
* @param string $name Unique ID for the cache
* @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data
*/
public function __construct($location, $name, $options = null) {
//$this->cache = \flow\simple\cache\Redis::getRedisClientInstance();
$parsed = SimplePie_Cache::parse_URL($location);
$redis = new Redis();
$redis->connect($parsed['host'], $parsed['port']);
$this->cache = $redis;
if (!is_null($options) && is_array($options)) {
$this->options = $options;
} else {
$this->options = array (
'prefix' => 'rss:simple_primary:',
'expire' => 0,
);
}
$this->name = $this->options['prefix'] . $name;
}
/**
* @param \Redis $cache
*/
public function setRedisClient(\Redis $cache) {
$this->cache = $cache;
}
/**
* Save data to the cache
*
* @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property
* @return bool Successfulness
*/
public function save($data) {
if ($data instanceof SimplePie) {
$data = $data->data;
}
$response = $this->cache->set($this->name, serialize($data));
if ($this->options['expire']) {
$this->cache->expire($this->name, $this->options['expire']);
}
return $response;
}
/**
* Retrieve the data saved to the cache
*
* @return array Data for SimplePie::$data
*/
public function load() {
$data = $this->cache->get($this->name);
if ($data !== false) {
return unserialize($data);
}
return false;
}
/**
* Retrieve the last modified time for the cache
*
* @return int Timestamp
*/
public function mtime() {
$data = $this->cache->get($this->name);
if ($data !== false) {
return time();
}
return false;
}
/**
* Set the last modified time to the current time
*
* @return bool Success status
*/
public function touch() {
$data = $this->cache->get($this->name);
if ($data !== false) {
$return = $this->cache->set($this->name, $data);
if ($this->options['expire']) {
return $this->cache->expire($this->name, $this->ttl);
}
return $return;
}
return false;
}
/**
* Remove the cache
*
* @return bool Success status
*/
public function unlink() {
return $this->cache->set($this->name, null);
}
}

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -256,7 +255,7 @@ class SimplePie_Content_Type_Sniffer
public function feed_or_html() public function feed_or_html()
{ {
$len = strlen($this->file->body); $len = strlen($this->file->body);
$pos = strspn($this->file->body, "\x09\x0A\x0D\x20"); $pos = strspn($this->file->body, "\x09\x0A\x0D\x20\xEF\xBB\xBF");
while ($pos < $len) while ($pos < $len)
{ {

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -169,7 +168,6 @@ class SimplePie_Decode_HTML_Entities
case "\x09": case "\x09":
case "\x0A": case "\x0A":
case "\x0B": case "\x0B":
case "\x0B":
case "\x0C": case "\x0C":
case "\x20": case "\x20":
case "\x3C": case "\x3C":

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -451,7 +450,7 @@ class SimplePie_Enclosure
/** /**
* Get the duration of the enclosure * Get the duration of the enclosure
* *
* @param string $convert Convert seconds into hh:mm:ss * @param bool $convert Convert seconds into hh:mm:ss
* @return string|int|null 'hh:mm:ss' string if `$convert` was specified, otherwise integer (or null if none found) * @return string|int|null 'hh:mm:ss' string if `$convert` was specified, otherwise integer (or null if none found)
*/ */
public function get_duration($convert = false) public function get_duration($convert = false)
@ -942,7 +941,7 @@ class SimplePie_Enclosure
* - `height` (integer): The height of the embedded media. Accepts any * - `height` (integer): The height of the embedded media. Accepts any
* numeric pixel value (such as `360`) or `auto`. Defaults to `auto`, * numeric pixel value (such as `360`) or `auto`. Defaults to `auto`,
* and it is recommended that you use this default. * and it is recommended that you use this default.
* - `loop` (boolean): Do you want the media to loop when its done? * - `loop` (boolean): Do you want the media to loop when it's done?
* Defaults to `false`. * Defaults to `false`.
* - `mediaplayer` (string): The location of the included * - `mediaplayer` (string): The location of the included
* `mediaplayer.swf` file. This allows for the playback of Flash Video * `mediaplayer.swf` file. This allows for the playback of Flash Video

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.4-dev * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -64,8 +63,9 @@ class SimplePie_File
var $redirects = 0; var $redirects = 0;
var $error; var $error;
var $method = SIMPLEPIE_FILE_SOURCE_NONE; var $method = SIMPLEPIE_FILE_SOURCE_NONE;
var $permanent_url;
public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $curl_options = array())
{ {
if (class_exists('idna_convert')) if (class_exists('idna_convert'))
{ {
@ -74,6 +74,7 @@ class SimplePie_File
$url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']);
} }
$this->url = $url; $this->url = $url;
$this->permanent_url = $url;
$this->useragent = $useragent; $this->useragent = $useragent;
if (preg_match('/^http(s)?:\/\//i', $url)) if (preg_match('/^http(s)?:\/\//i', $url))
{ {
@ -102,6 +103,7 @@ class SimplePie_File
curl_setopt($fp, CURLOPT_URL, $url); curl_setopt($fp, CURLOPT_URL, $url);
curl_setopt($fp, CURLOPT_HEADER, 1); curl_setopt($fp, CURLOPT_HEADER, 1);
curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1); curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($fp, CURLOPT_FAILONERROR, 1);
curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
curl_setopt($fp, CURLOPT_REFERER, $url); curl_setopt($fp, CURLOPT_REFERER, $url);
@ -112,6 +114,9 @@ class SimplePie_File
curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects); curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects);
} }
foreach ($curl_options as $curl_param => $curl_value) {
curl_setopt($fp, $curl_param, $curl_value);
}
$this->headers = curl_exec($fp); $this->headers = curl_exec($fp);
if (curl_errno($fp) === 23 || curl_errno($fp) === 61) if (curl_errno($fp) === 23 || curl_errno($fp) === 61)
@ -126,7 +131,10 @@ class SimplePie_File
} }
else else
{ {
$info = curl_getinfo($fp); // Use the updated url provided by curl_getinfo after any redirects.
if ($info = curl_getinfo($fp)) {
$this->url = $info['url'];
}
curl_close($fp); curl_close($fp);
$this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1); $this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1);
$this->headers = array_pop($this->headers); $this->headers = array_pop($this->headers);
@ -134,13 +142,16 @@ class SimplePie_File
if ($parser->parse()) if ($parser->parse())
{ {
$this->headers = $parser->headers; $this->headers = $parser->headers;
$this->body = $parser->body; $this->body = trim($parser->body);
$this->status_code = $parser->status_code; $this->status_code = $parser->status_code;
if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects)
{ {
$this->redirects++; $this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); $previousStatusCode = $this->status_code;
$this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
$this->permanent_url = ($previousStatusCode == 301) ? $location : $url;
return;
} }
} }
} }
@ -222,7 +233,10 @@ class SimplePie_File
{ {
$this->redirects++; $this->redirects++;
$location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url);
return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); $previousStatusCode = $this->status_code;
$this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen);
$this->permanent_url = ($previousStatusCode == 301) ? $location : $url;
return;
} }
if (isset($this->headers['content-encoding'])) if (isset($this->headers['content-encoding']))
{ {
@ -239,7 +253,7 @@ class SimplePie_File
} }
else else
{ {
$this->body = $decoder->data; $this->body = trim($decoder->data);
} }
break; break;
@ -282,7 +296,7 @@ class SimplePie_File
else else
{ {
$this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS; $this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS;
if (!$this->body = file_get_contents($url)) if (empty($url) || !($this->body = trim(file_get_contents($url))))
{ {
$this->error = 'file_get_contents could not read the file'; $this->error = 'file_get_contents could not read the file';
$this->success = false; $this->success = false;

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -259,6 +258,15 @@ class SimplePie_IRI
$this->set_iri($iri); $this->set_iri($iri);
} }
/**
* Clean up
*/
public function __destruct() {
$this->set_iri(null, true);
$this->set_path(null, true);
$this->set_authority(null, true);
}
/** /**
* Create a new IRI object by resolving a relative IRI * Create a new IRI object by resolving a relative IRI
* *
@ -768,24 +776,20 @@ class SimplePie_IRI
*/ */
public function is_valid() public function is_valid()
{ {
$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; if ($this->ipath === '') return true;
if ($this->ipath !== '' &&
( $isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
$isauthority && ( $this->port !== null;
$this->ipath[0] !== '/' || if ($isauthority && $this->ipath[0] === '/') return true;
substr($this->ipath, 0, 2) === '//'
) || if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false;
(
$this->scheme === null && // Relative urls cannot have a colon in the first path segment (and the
!$isauthority && // slashes themselves are not included so skip the first character).
strpos($this->ipath, ':') !== false && if (!$this->scheme && !$isauthority &&
(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) strpos($this->ipath, ':') !== false &&
) strpos($this->ipath, '/', 1) !== false &&
) strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false;
)
{
return false;
}
return true; return true;
} }
@ -797,9 +801,14 @@ class SimplePie_IRI
* @param string $iri * @param string $iri
* @return bool * @return bool
*/ */
public function set_iri($iri) public function set_iri($iri, $clear_cache = false)
{ {
static $cache; static $cache;
if ($clear_cache)
{
$cache = null;
return;
}
if (!$cache) if (!$cache)
{ {
$cache = array(); $cache = array();
@ -879,9 +888,14 @@ class SimplePie_IRI
* @param string $authority * @param string $authority
* @return bool * @return bool
*/ */
public function set_authority($authority) public function set_authority($authority, $clear_cache = false)
{ {
static $cache; static $cache;
if ($clear_cache)
{
$cache = null;
return;
}
if (!$cache) if (!$cache)
$cache = array(); $cache = array();
@ -1049,9 +1063,14 @@ class SimplePie_IRI
* @param string $ipath * @param string $ipath
* @return bool * @return bool
*/ */
public function set_path($ipath) public function set_path($ipath, $clear_cache = false)
{ {
static $cache; static $cache;
if ($clear_cache)
{
$cache = null;
return;
}
if (!$cache) if (!$cache)
{ {
$cache = array(); $cache = array();

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -203,14 +202,13 @@ class SimplePie_Item
* *
* Uses `<atom:id>`, `<guid>`, `<dc:identifier>` or the `about` attribute * Uses `<atom:id>`, `<guid>`, `<dc:identifier>` or the `about` attribute
* for RDF. If none of these are supplied (or `$hash` is true), creates an * for RDF. If none of these are supplied (or `$hash` is true), creates an
* MD5 hash based on the permalink and title. If either of those are not * MD5 hash based on the permalink, title and content.
* supplied, creates a hash based on the full feed data.
* *
* @since Beta 2 * @since Beta 2
* @param boolean $hash Should we force using a hash instead of the supplied ID? * @param boolean $hash Should we force using a hash instead of the supplied ID?
* @return string * @return string
*/ */
public function get_id($hash = false) public function get_id($hash = false, $fn = '')
{ {
if (!$hash) if (!$hash)
{ {
@ -238,23 +236,10 @@ class SimplePie_Item
{ {
return $this->sanitize($this->data['attribs'][SIMPLEPIE_NAMESPACE_RDF]['about'], SIMPLEPIE_CONSTRUCT_TEXT); return $this->sanitize($this->data['attribs'][SIMPLEPIE_NAMESPACE_RDF]['about'], SIMPLEPIE_CONSTRUCT_TEXT);
} }
elseif (($return = $this->get_permalink()) !== null)
{
return $return;
}
elseif (($return = $this->get_title()) !== null)
{
return $return;
}
}
if ($this->get_permalink() !== null || $this->get_title() !== null)
{
return md5($this->get_permalink() . $this->get_title());
}
else
{
return md5(serialize($this->data));
} }
if ($fn === '' || !is_callable($fn)) $fn = 'md5';
return call_user_func($fn,
$this->get_permalink().$this->get_title().$this->get_content());
} }
/** /**
@ -322,41 +307,50 @@ class SimplePie_Item
*/ */
public function get_description($description_only = false) public function get_description($description_only = false)
{ {
if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) if (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) &&
($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) &&
($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT)))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT)))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT)))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML)))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML); return $return;
} }
elseif (!$description_only) elseif (!$description_only)
@ -385,17 +379,20 @@ class SimplePie_Item
*/ */
public function get_content($content_only = false) public function get_content($content_only = false)
{ {
if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) if (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) &&
($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_10_content_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_content_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) &&
($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); return $return;
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) &&
($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0]))))
{ {
return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); return $return;
} }
elseif (!$content_only) elseif (!$content_only)
{ {
@ -407,6 +404,30 @@ class SimplePie_Item
} }
} }
/**
* Get the media:thumbnail of the item
*
* Uses `<media:thumbnail>`
*
*
* @return array|null
*/
public function get_thumbnail()
{
if (!isset($this->data['thumbnail']))
{
if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'thumbnail'))
{
$this->data['thumbnail'] = $return[0]['attribs'][''];
}
else
{
$this->data['thumbnail'] = null;
}
}
return $this->data['thumbnail'];
}
/** /**
* Get a category for the item * Get a category for the item
* *
@ -433,7 +454,7 @@ class SimplePie_Item
* Uses `<atom:category>`, `<category>` or `<dc:subject>` * Uses `<atom:category>`, `<category>` or `<dc:subject>`
* *
* @since Beta 3 * @since Beta 3
* @return array|null List of {@see SimplePie_Category} objects * @return SimplePie_Category[]|null List of {@see SimplePie_Category} objects
*/ */
public function get_categories() public function get_categories()
{ {
@ -446,15 +467,15 @@ class SimplePie_Item
$label = null; $label = null;
if (isset($category['attribs']['']['term'])) if (isset($category['attribs']['']['term']))
{ {
$term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_TEXT); $term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if (isset($category['attribs']['']['scheme'])) if (isset($category['attribs']['']['scheme']))
{ {
$scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_TEXT); $scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if (isset($category['attribs']['']['label'])) if (isset($category['attribs']['']['label']))
{ {
$label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_HTML);
} }
$categories[] = $this->registry->create('Category', array($term, $scheme, $label)); $categories[] = $this->registry->create('Category', array($term, $scheme, $label));
} }
@ -462,10 +483,10 @@ class SimplePie_Item
{ {
// This is really the label, but keep this as the term also for BC. // This is really the label, but keep this as the term also for BC.
// Label will also work on retrieving because that falls back to term. // Label will also work on retrieving because that falls back to term.
$term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT); $term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML);
if (isset($category['attribs']['']['domain'])) if (isset($category['attribs']['']['domain']))
{ {
$scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_TEXT); $scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_HTML);
} }
else else
{ {
@ -475,11 +496,11 @@ class SimplePie_Item
} }
foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category)
{ {
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null));
} }
foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category)
{ {
$categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null));
} }
if (!empty($categories)) if (!empty($categories))
@ -616,7 +637,7 @@ class SimplePie_Item
$email = null; $email = null;
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'])) if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data']))
{ {
$name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); $name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'])) if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data']))
{ {
@ -624,7 +645,7 @@ class SimplePie_Item
} }
if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'])) if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data']))
{ {
$email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); $email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if ($name !== null || $email !== null || $uri !== null) if ($name !== null || $email !== null || $uri !== null)
{ {
@ -638,7 +659,7 @@ class SimplePie_Item
$email = null; $email = null;
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'])) if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data']))
{ {
$name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); $name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'])) if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data']))
{ {
@ -646,7 +667,7 @@ class SimplePie_Item
} }
if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'])) if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data']))
{ {
$email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); $email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML);
} }
if ($name !== null || $email !== null || $url !== null) if ($name !== null || $email !== null || $url !== null)
{ {
@ -655,19 +676,19 @@ class SimplePie_Item
} }
if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'author')) if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'author'))
{ {
$authors[] = $this->registry->create('Author', array(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))); $authors[] = $this->registry->create('Author', array(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_HTML)));
} }
foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author)
{ {
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null));
} }
foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author)
{ {
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null));
} }
foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author)
{ {
$authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null));
} }
if (!empty($authors)) if (!empty($authors))
@ -738,6 +759,18 @@ class SimplePie_Item
{ {
$this->data['date']['raw'] = $return[0]['data']; $this->data['date']['raw'] = $return[0]['data'];
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'pubDate'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'date'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'date'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated')) elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated'))
{ {
$this->data['date']['raw'] = $return[0]['data']; $this->data['date']['raw'] = $return[0]['data'];
@ -754,18 +787,6 @@ class SimplePie_Item
{ {
$this->data['date']['raw'] = $return[0]['data']; $this->data['date']['raw'] = $return[0]['data'];
} }
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'pubDate'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'date'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'date'))
{
$this->data['date']['raw'] = $return[0]['data'];
}
if (!empty($this->data['date']['raw'])) if (!empty($this->data['date']['raw']))
{ {
@ -821,7 +842,7 @@ class SimplePie_Item
if (!empty($this->data['updated']['raw'])) if (!empty($this->data['updated']['raw']))
{ {
$parser = $this->registry->call('Parse_Date', 'get'); $parser = $this->registry->call('Parse_Date', 'get');
$this->data['updated']['parsed'] = $parser->parse($this->data['date']['raw']); $this->data['updated']['parsed'] = $parser->parse($this->data['updated']['raw']);
} }
else else
{ {
@ -1080,8 +1101,8 @@ class SimplePie_Item
* *
* @since Beta 2 * @since Beta 2
* @todo Add support for end-user defined sorting of enclosures by type/handler (so we can prefer the faster-loading FLV over MP4). * @todo Add support for end-user defined sorting of enclosures by type/handler (so we can prefer the faster-loading FLV over MP4).
* @todo If an element exists at a level, but it's value is empty, we should fall back to the value from the parent (if it exists). * @todo If an element exists at a level, but its value is empty, we should fall back to the value from the parent (if it exists).
* @return array|null List of SimplePie_Enclosure items * @return SimplePie_Enclosure[]|null List of SimplePie_Enclosure items
*/ */
public function get_enclosures() public function get_enclosures()
{ {
@ -2658,7 +2679,9 @@ class SimplePie_Item
// PLAYER // PLAYER
if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'])) if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player']))
{ {
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) {
$player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
}
} }
else else
{ {
@ -2733,7 +2756,9 @@ class SimplePie_Item
{ {
foreach ($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) foreach ($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail)
{ {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); if (isset($thumbnail['attribs']['']['url'])) {
$thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI);
}
} }
if (is_array($thumbnails)) if (is_array($thumbnails))
{ {

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -121,34 +120,41 @@ class SimplePie_Locator
{ {
if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local))
{ {
return $working; return $working[0];
} }
if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local))
{ {
return $working; return $working[0];
} }
if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere))
{ {
return $working; return $working[0];
} }
if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere))
{ {
return $working; return $working[0];
} }
} }
return null; return null;
} }
public function is_feed($file) public function is_feed($file, $check_html = false)
{ {
if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE)
{ {
$sniffer = $this->registry->create('Content_Type_Sniffer', array($file)); $sniffer = $this->registry->create('Content_Type_Sniffer', array($file));
$sniffed = $sniffer->get_type(); $sniffed = $sniffer->get_type();
if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml'))) $mime_types = array('application/rss+xml', 'application/rdf+xml',
'text/rdf', 'application/atom+xml', 'text/xml',
'application/xml', 'application/x-rss+xml');
if ($check_html)
{
$mime_types[] = 'text/html';
}
if (in_array($sniffed, $mime_types))
{ {
return true; return true;
} }
@ -226,7 +232,7 @@ class SimplePie_Locator
} }
if ($link->hasAttribute('href') && $link->hasAttribute('rel')) if ($link->hasAttribute('href') && $link->hasAttribute('rel'))
{ {
$rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel'))))); $rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel')))));
$line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1; $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1;
if ($this->base_location < $line) if ($this->base_location < $line)
@ -242,14 +248,14 @@ class SimplePie_Locator
continue; continue;
} }
if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href]))
{ {
$this->checked_feeds++; $this->checked_feeds++;
$headers = array( $headers = array(
'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1',
); );
$feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent)); $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true))
{ {
$feeds[$href] = $feed; $feeds[$href] = $feed;
} }
@ -275,9 +281,9 @@ class SimplePie_Locator
{ {
$href = trim($link->getAttribute('href')); $href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href)); $parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme']))
{ {
if ($this->base_location < $link->getLineNo()) if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo())
{ {
$href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base));
} }
@ -312,6 +318,57 @@ class SimplePie_Locator
return null; return null;
} }
public function get_rel_link($rel)
{
if ($this->dom === null)
{
throw new SimplePie_Exception('DOMDocument not found, unable to use '.
'locator');
}
if (!class_exists('DOMXpath'))
{
throw new SimplePie_Exception('DOMXpath not found, unable to use '.
'get_rel_link');
}
$xpath = new DOMXpath($this->dom);
$query = '//a[@rel and @href] | //link[@rel and @href]';
foreach ($xpath->query($query) as $link)
{
$href = trim($link->getAttribute('href'));
$parsed = $this->registry->call('Misc', 'parse_url', array($href));
if ($parsed['scheme'] === '' ||
preg_match('/^https?$/i', $parsed['scheme']))
{
if (method_exists($link, 'getLineNo') &&
$this->base_location < $link->getLineNo())
{
$href =
$this->registry->call('Misc', 'absolutize_url',
array(trim($link->getAttribute('href')),
$this->base));
}
else
{
$href =
$this->registry->call('Misc', 'absolutize_url',
array(trim($link->getAttribute('href')),
$this->http_base));
}
if ($href === false)
{
return null;
}
$rel_values = explode(' ', strtolower($link->getAttribute('rel')));
if (in_array($rel, $rel_values))
{
return $href;
}
}
}
return null;
}
public function extension(&$array) public function extension(&$array)
{ {
foreach ($array as $key => $value) foreach ($array as $key => $value)
@ -330,7 +387,7 @@ class SimplePie_Locator
$feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent)); $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{ {
return $feed; return array($feed);
} }
else else
{ {
@ -358,7 +415,7 @@ class SimplePie_Locator
$feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent)); $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent));
if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed))
{ {
return $feed; return array($feed);
} }
else else
{ {

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -124,7 +123,7 @@ class SimplePie_Misc
{ {
$attribs[$j][2] = $attribs[$j][1]; $attribs[$j][2] = $attribs[$j][1];
} }
$return[$i]['attribs'][strtolower($attribs[$j][1])]['data'] = SimplePie_Misc::entities_decode(end($attribs[$j]), 'UTF-8'); $return[$i]['attribs'][strtolower($attribs[$j][1])]['data'] = SimplePie_Misc::entities_decode(end($attribs[$j]));
} }
} }
} }
@ -138,7 +137,7 @@ class SimplePie_Misc
foreach ($element['attribs'] as $key => $value) foreach ($element['attribs'] as $key => $value)
{ {
$key = strtolower($key); $key = strtolower($key);
$full .= " $key=\"" . htmlspecialchars($value['data']) . '"'; $full .= " $key=\"" . htmlspecialchars($value['data'], ENT_COMPAT, 'UTF-8') . '"';
} }
if ($element['self_closing']) if ($element['self_closing'])
{ {
@ -224,6 +223,23 @@ class SimplePie_Misc
} }
} }
public static function array_merge_recursive($array1, $array2)
{
foreach ($array2 as $key => $value)
{
if (is_array($value))
{
$array1[$key] = SimplePie_Misc::array_merge_recursive($array1[$key], $value);
}
else
{
$array1[$key] = $value;
}
}
return $array1;
}
public static function parse_url($url) public static function parse_url($url)
{ {
$iri = new SimplePie_IRI($url); $iri = new SimplePie_IRI($url);
@ -317,11 +333,16 @@ class SimplePie_Misc
{ {
return $return; return $return;
} }
// This is last, as behaviour of this varies with OS userland and PHP version // This is third, as behaviour of this varies with OS userland and PHP version
elseif (function_exists('iconv') && ($return = SimplePie_Misc::change_encoding_iconv($data, $input, $output))) elseif (function_exists('iconv') && ($return = SimplePie_Misc::change_encoding_iconv($data, $input, $output)))
{ {
return $return; return $return;
} }
// This is last, as behaviour of this varies with OS userland and PHP version
elseif (class_exists('\UConverter') && ($return = SimplePie_Misc::change_encoding_uconverter($data, $input, $output)))
{
return $return;
}
// If we can't do anything, just fail // If we can't do anything, just fail
else else
{ {
@ -372,6 +393,17 @@ class SimplePie_Misc
return @iconv($input, $output, $data); return @iconv($input, $output, $data);
} }
/**
* @param string $data
* @param string $input
* @param string $output
* @return string|false
*/
protected static function change_encoding_uconverter($data, $input, $output)
{
return @\UConverter::transcode($data, $output, $input);
}
/** /**
* Normalize an encoding name * Normalize an encoding name
* *
@ -1926,7 +1958,7 @@ class SimplePie_Misc
return (bool) preg_match('/^([A-Za-z0-9\-._~\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}!$&\'()*+,;=@]|(%[0-9ABCDEF]{2}))+$/u', $string); return (bool) preg_match('/^([A-Za-z0-9\-._~\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}!$&\'()*+,;=@]|(%[0-9ABCDEF]{2}))+$/u', $string);
} }
public static function space_seperated_tokens($string) public static function space_separated_tokens($string)
{ {
$space_characters = "\x20\x09\x0A\x0B\x0C\x0D"; $space_characters = "\x20\x09\x0A\x0B\x0C\x0D";
$string_length = strlen($string); $string_length = strlen($string);

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -173,7 +172,7 @@ class SimplePie_Parse_Date
'aug' => 8, 'aug' => 8,
'august' => 8, 'august' => 8,
'sep' => 9, 'sep' => 9,
'september' => 8, 'september' => 9,
'oct' => 10, 'oct' => 10,
'october' => 10, 'october' => 10,
'nov' => 11, 'nov' => 11,
@ -331,6 +330,7 @@ class SimplePie_Parse_Date
'CCT' => 23400, 'CCT' => 23400,
'CDT' => -18000, 'CDT' => -18000,
'CEDT' => 7200, 'CEDT' => 7200,
'CEST' => 7200,
'CET' => 3600, 'CET' => 3600,
'CGST' => -7200, 'CGST' => -7200,
'CGT' => -10800, 'CGT' => -10800,
@ -630,7 +630,7 @@ class SimplePie_Parse_Date
/** /**
* Parse a superset of W3C-DTF (allows hyphens and colons to be omitted, as * Parse a superset of W3C-DTF (allows hyphens and colons to be omitted, as
* well as allowing any of upper or lower case "T", horizontal tabs, or * well as allowing any of upper or lower case "T", horizontal tabs, or
* spaces to be used as the time seperator (including more than one)) * spaces to be used as the time separator (including more than one))
* *
* @access protected * @access protected
* @return int Timestamp * @return int Timestamp
@ -690,7 +690,7 @@ class SimplePie_Parse_Date
} }
// Convert the number of seconds to an integer, taking decimals into account // Convert the number of seconds to an integer, taking decimals into account
$second = round($match[6] + $match[7] / pow(10, strlen($match[7]))); $second = round((int)$match[6] + (int)$match[7] / pow(10, strlen($match[7])));
return gmmktime($match[4], $match[5], $second, $match[2], $match[3], $match[1]) - $timezone; return gmmktime($match[4], $match[5], $second, $match[2], $match[3], $match[1]) - $timezone;
} }
@ -720,7 +720,7 @@ class SimplePie_Parse_Date
{ {
$output .= substr($string, $position, $pos - $position); $output .= substr($string, $position, $pos - $position);
$position = $pos + 1; $position = $pos + 1;
if ($string[$pos - 1] !== '\\') if ($pos === 0 || $string[$pos - 1] !== '\\')
{ {
$depth++; $depth++;
while ($depth && $position < $length) while ($depth && $position < $length)

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -113,7 +112,7 @@ class SimplePie_Registry
*/ */
public function register($type, $class, $legacy = false) public function register($type, $class, $legacy = false)
{ {
if (!is_subclass_of($class, $this->default[$type])) if (!@is_subclass_of($class, $this->default[$type]))
{ {
return false; return false;
} }

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue
@ -61,7 +60,8 @@ class SimplePie_Sanitize
var $image_handler = ''; var $image_handler = '';
var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
var $encode_instead_of_strip = false; var $encode_instead_of_strip = false;
var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
var $strip_comments = false; var $strip_comments = false;
var $output_encoding = 'UTF-8'; var $output_encoding = 'UTF-8';
var $enable_cache = true; var $enable_cache = true;
@ -160,7 +160,7 @@ class SimplePie_Sanitize
$this->encode_instead_of_strip = (bool) $encode; $this->encode_instead_of_strip = (bool) $encode;
} }
public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
{ {
if ($attribs) if ($attribs)
{ {
@ -179,6 +179,25 @@ class SimplePie_Sanitize
} }
} }
public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')))
{
if ($attribs)
{
if (is_array($attribs))
{
$this->add_attributes = $attribs;
}
else
{
$this->add_attributes = explode(',', $attribs);
}
}
else
{
$this->add_attributes = false;
}
}
public function strip_comments($strip = false) public function strip_comments($strip = false)
{ {
$this->strip_comments = (bool) $strip; $this->strip_comments = (bool) $strip;
@ -247,18 +266,24 @@ class SimplePie_Sanitize
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
{ {
if (!class_exists('DOMDocument'))
{
throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
}
$document = new DOMDocument(); $document = new DOMDocument();
$document->encoding = 'UTF-8'; $document->encoding = 'UTF-8';
$data = $this->preprocess($data, $type); $data = $this->preprocess($data, $type);
set_error_handler(array('SimplePie_Misc', 'silence_errors')); set_error_handler(array('SimplePie_Misc', 'silence_errors'));
$document->loadHTML($data); $document->loadHTML($data);
restore_error_handler(); restore_error_handler();
$xpath = new DOMXPath($document);
// Strip comments // Strip comments
if ($this->strip_comments) if ($this->strip_comments)
{ {
$xpath = new DOMXPath($document);
$comments = $xpath->query('//comment()'); $comments = $xpath->query('//comment()');
foreach ($comments as $comment) foreach ($comments as $comment)
@ -274,7 +299,7 @@ class SimplePie_Sanitize
{ {
foreach ($this->strip_htmltags as $tag) foreach ($this->strip_htmltags as $tag)
{ {
$this->strip_tag($tag, $document, $type); $this->strip_tag($tag, $document, $xpath, $type);
} }
} }
@ -282,7 +307,15 @@ class SimplePie_Sanitize
{ {
foreach ($this->strip_attributes as $attrib) foreach ($this->strip_attributes as $attrib)
{ {
$this->strip_attr($attrib, $document); $this->strip_attr($attrib, $xpath);
}
}
if ($this->add_attributes)
{
foreach ($this->add_attributes as $tag => $valuePairs)
{
$this->add_attr($tag, $valuePairs, $document);
} }
} }
@ -310,7 +343,7 @@ class SimplePie_Sanitize
} }
else else
{ {
$file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
$headers = $file->headers; $headers = $file->headers;
if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
@ -329,19 +362,17 @@ class SimplePie_Sanitize
} }
} }
// Remove the DOCTYPE // Get content node
// Seems to cause segfaulting if we don't do this $div = $document->getElementsByTagName('body')->item(0)->firstChild;
if ($document->firstChild instanceof DOMDocumentType)
{
$document->removeChild($document->firstChild);
}
// Move everything from the body to the root
$real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0);
$document->replaceChild($real_body, $document->firstChild);
// Finally, convert to a HTML string // Finally, convert to a HTML string
$data = trim($document->saveHTML()); if (version_compare(PHP_VERSION, '5.3.6', '>='))
{
$data = trim($document->saveHTML($div));
}
else
{
$data = trim($document->saveXML($div));
}
if ($this->remove_div) if ($this->remove_div)
{ {
@ -379,6 +410,7 @@ class SimplePie_Sanitize
protected function preprocess($html, $type) protected function preprocess($html, $type)
{ {
$ret = ''; $ret = '';
$html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
{ {
// Atom XHTML constructs are wrapped with a div by default // Atom XHTML constructs are wrapped with a div by default
@ -451,9 +483,8 @@ class SimplePie_Sanitize
} }
} }
protected function strip_tag($tag, $document, $type) protected function strip_tag($tag, $document, $xpath, $type)
{ {
$xpath = new DOMXPath($document);
$elements = $xpath->query('body//' . $tag); $elements = $xpath->query('body//' . $tag);
if ($this->encode_instead_of_strip) if ($this->encode_instead_of_strip)
{ {
@ -536,9 +567,8 @@ class SimplePie_Sanitize
} }
} }
protected function strip_attr($attrib, $document) protected function strip_attr($attrib, $xpath)
{ {
$xpath = new DOMXPath($document);
$elements = $xpath->query('//*[@' . $attrib . ']'); $elements = $xpath->query('//*[@' . $attrib . ']');
foreach ($elements as $element) foreach ($elements as $element)
@ -546,4 +576,16 @@ class SimplePie_Sanitize
$element->removeAttribute($attrib); $element->removeAttribute($attrib);
} }
} }
protected function add_attr($tag, $valuePairs, $document)
{
$elements = $document->getElementsByTagName($tag);
foreach ($elements as $element)
{
foreach ($valuePairs as $attrib => $value)
{
$element->setAttribute($attrib, $value);
}
}
}
} }

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -5,7 +5,7 @@
* A PHP-Based RSS and Atom Feed Framework. * A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution. * Takes the hard work out of managing a complete RSS/Atom solution.
* *
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without modification, are * Redistribution and use in source and binary forms, with or without modification, are
@ -33,8 +33,7 @@
* POSSIBILITY OF SUCH DAMAGE. * POSSIBILITY OF SUCH DAMAGE.
* *
* @package SimplePie * @package SimplePie
* @version 1.3.1 * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman * @author Ryan Parman
* @author Geoffrey Sneddon * @author Geoffrey Sneddon
* @author Ryan McCue * @author Ryan McCue

View File

@ -1,10 +1,10 @@
<?php <?php
// Full-Text RSS: Create Full-Text Feeds // Full-Text RSS: Create Full-Text Feeds
// Author: Keyvan Minoukadeh // Author: Keyvan Minoukadeh
// Copyright (c) 2015 Keyvan Minoukadeh // Copyright (c) 2017 Keyvan Minoukadeh
// License: AGPLv3 // License: AGPLv3
// Version: 3.6 // Version: 3.7
// Date: 2016-02-17 // Date: 2017-02-12
// More info: http://fivefilters.org/content-only/ // More info: http://fivefilters.org/content-only/
// Help: http://help.fivefilters.org // Help: http://help.fivefilters.org
@ -272,7 +272,7 @@ if (file_exists('custom_init.php')) require 'custom_init.php';
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Check URL against list of blacklisted URLs // Check URL against list of blacklisted URLs
/////////////////////////////////////////////// ///////////////////////////////////////////////
if (!url_allowed($url)) die('URL blocked'); if (!url_allowed($url)) die($options->blocked_message);
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Max entries // Max entries
@ -302,12 +302,20 @@ if (isset($_REQUEST['links']) && in_array($_REQUEST['links'], array('preserve',
$links = 'preserve'; $links = 'preserve';
} }
///////////////////////////////////////////////
// Image handling
///////////////////////////////////////////////
$images = true;
if (isset($_REQUEST['images']) && in_array($_REQUEST['images'], array('0', 'remove'))) {
$images = false;
}
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Favour item titles in feed? // Favour item titles in feed?
/////////////////////////////////////////////// ///////////////////////////////////////////////
$favour_feed_titles = true; $favour_feed_titles = true;
if ($options->favour_feed_titles == 'user') { if ($options->favour_feed_titles == 'user') {
$favour_feed_titles = !isset($_REQUEST['use_extracted_title']); $favour_feed_titles = (!isset($_REQUEST['use_extracted_title']) || $_REQUEST['use_extracted_title'] === '0');
} else { } else {
$favour_feed_titles = $options->favour_feed_titles; $favour_feed_titles = $options->favour_feed_titles;
} }
@ -317,7 +325,7 @@ if ($options->favour_feed_titles == 'user') {
/////////////////////////////////////////////// ///////////////////////////////////////////////
$favour_effective_url = false; $favour_effective_url = false;
if ($options->favour_effective_url == 'user') { if ($options->favour_effective_url == 'user') {
$favour_effective_url = isset($_REQUEST['use_effective_url']); $favour_effective_url = (isset($_REQUEST['use_effective_url']) && $_REQUEST['use_effective_url'] !== '0');
} else { } else {
$favour_effective_url = $options->favour_effective_url; $favour_effective_url = $options->favour_effective_url;
} }
@ -333,6 +341,17 @@ if ($options->content === 'user') {
} }
} }
///////////////////////////////////////////////
// HTML5 output?
///////////////////////////////////////////////
if ($options->html5_output === 'user') {
if (isset($_REQUEST['content']) && $_REQUEST['content'] === 'html5') {
$options->html5_output = true;
} else {
$options->html5_output = false;
}
}
/////////////////////////////////////////////// ///////////////////////////////////////////////
// Include summaries in output? // Include summaries in output?
/////////////////////////////////////////////// ///////////////////////////////////////////////
@ -367,7 +386,7 @@ if ($options->detect_language === 'user') {
$detect_language = $options->detect_language; $detect_language = $options->detect_language;
} }
$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0); $use_cld = extension_loaded('cld');
///////////////////////////////////// /////////////////////////////////////
// Check for valid format // Check for valid format
@ -468,8 +487,7 @@ if (isset($_REQUEST['inputhtml']) && _FF_FTR_MODE == 'simple') {
////////////////////////////////// //////////////////////////////////
if ($options->caching) { if ($options->caching) {
debug('Caching is enabled...'); debug('Caching is enabled...');
$cache_id = md5($max.$url.(int)$valid_key.$accept.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary. $cache_id = md5($max.$url.(int)$valid_key.$accept.$links.$images.(int)$favour_feed_titles.(int)$options->content.(int)$options->html5_output.(int)$options->summary.(int)$xss_filter.(int)$favour_effective_url.(int)$exclude_on_fail.$format.$detect_language.$parser.$user_submitted_config._FF_FTR_MODE);
(int)$xss_filter.(int)$favour_effective_url.(int)$exclude_on_fail.$format.$detect_language.$parser.$user_submitted_config._FF_FTR_MODE);
$check_cache = true; $check_cache = true;
if ($options->apc && $options->smart_cache) { if ($options->apc && $options->smart_cache) {
apc_add("cache.$cache_id", 0, $options->cache_time*60); apc_add("cache.$cache_id", 0, $options->cache_time*60);
@ -554,6 +572,7 @@ SiteConfig::use_apc($options->apc);
$extractor->fingerprints = $options->fingerprints; $extractor->fingerprints = $options->fingerprints;
$extractor->allowedParsers = $options->allowed_parsers; $extractor->allowedParsers = $options->allowed_parsers;
$extractor->parserOverride = $parser; $extractor->parserOverride = $parser;
if (!$images) $extractor->stripImages = true;
if ($options->user_submitted_config && $user_submitted_config) { if ($options->user_submitted_config && $user_submitted_config) {
$extractor->setUserSubmittedConfig($user_submitted_config); $extractor->setUserSubmittedConfig($user_submitted_config);
} }
@ -792,7 +811,7 @@ foreach ($items as $key => $item) {
// if user has asked to see parsed HTML, show it and exit. // if user has asked to see parsed HTML, show it and exit.
if ($debug_show_parsed_html) { if ($debug_show_parsed_html) {
debug("Here's the full HTML after it's been parsed by Full-Text RSS:"); debug("Here's the full HTML after it's been parsed by Full-Text RSS:");
die($readability->dom->saveXML($readability->dom->documentElement)); die(make_html($readability->dom->documentElement));
} }
// is this a native ad? // is this a native ad?
if ($extract_result && $extractor->isNativeAd()) { if ($extract_result && $extractor->isNativeAd()) {
@ -801,6 +820,8 @@ foreach ($items as $key => $item) {
continue; // skip this feed item entry continue; // skip this feed item entry
} }
} }
$base_url = get_base_url($readability->dom);
if (!$base_url) $base_url = $effective_url;
$content_block = ($extract_result) ? $extractor->getContent() : null; $content_block = ($extract_result) ? $extractor->getContent() : null;
$extracted_title = ($extract_result) ? $extractor->getTitle() : ''; $extracted_title = ($extract_result) ? $extractor->getTitle() : '';
// Deal with multi-page articles // Deal with multi-page articles
@ -814,8 +835,8 @@ foreach ($items as $key => $item) {
while ($next_page_url = $extractor->getNextPageUrl()) { while ($next_page_url = $extractor->getNextPageUrl()) {
debug('--------'); debug('--------');
debug('Processing next page: '.$next_page_url); debug('Processing next page: '.$next_page_url);
// If we've got URL, resolve against $url // If we've got URL, resolve against $base_url
if ($next_page_url = make_absolute_str($effective_url, $next_page_url)) { if ($next_page_url = make_absolute_str($base_url, $next_page_url)) {
// check it's not what we have already! // check it's not what we have already!
if (!in_array($next_page_url, $multi_page_urls)) { if (!in_array($next_page_url, $multi_page_urls)) {
// it's not, so let's attempt to fetch it // it's not, so let's attempt to fetch it
@ -870,19 +891,24 @@ foreach ($items as $key => $item) {
if ($do_content_extraction) { if ($do_content_extraction) {
// if we failed to extract content... // if we failed to extract content...
if (!$extract_result) { if (!$extract_result) {
if ($exclude_on_fail) { if ($exclude_on_fail && (_FF_FTR_MODE != 'simple')) {
debug('Failed to extract, so skipping (due to exclude on fail parameter)'); debug('Failed to extract, so skipping (due to exclude on fail parameter)');
continue; // skip this and move to next item continue; // skip this and move to next item
} }
//TODO: get text sample for language detection if (_FF_FTR_MODE === 'simple') {
$html = $options->error_message; $html = '';
// keep the original item description } else {
$html .= $item->get_description(); //TODO: get text sample for language detection
$html = $options->error_message;
// keep the original item description
$html .= $item->get_description();
}
} else { } else {
$readability->clean($content_block, 'select'); $readability->clean($content_block, 'select');
if ($options->rewrite_relative_urls) { if ($options->rewrite_relative_urls) {
$base_url = get_base_url($readability->dom); // we've got $base_url already above
if (!$base_url) $base_url = $effective_url; //$base_url = get_base_url($readability->dom);
//if (!$base_url) $base_url = $effective_url;
// rewrite URLs // rewrite URLs
make_absolute($base_url, $content_block); make_absolute($base_url, $content_block);
} }
@ -908,20 +934,32 @@ foreach ($items as $key => $item) {
// convert content block to HTML string // convert content block to HTML string
// Need to preserve things like body: //img[@id='feature'] // Need to preserve things like body: //img[@id='feature']
if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer', 'li', 'td'))) { if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer', 'li', 'td'))) {
$html = $content_block->innerHTML; //$html = $content_block->innerHTML;
$html = make_html($content_block, true); // true = innerHTML
//} elseif (in_array(strtolower($content_block->tagName), array('td', 'li'))) { //} elseif (in_array(strtolower($content_block->tagName), array('td', 'li'))) {
// $html = '<div>'.$content_block->innerHTML.'</div>'; // $html = '<div>'.$content_block->innerHTML.'</div>';
} else { } else {
$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML //$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML
$html = make_html($content_block); // outerHTML
} }
//unset($content_block); //unset($content_block);
// post-processing cleanup // post-processing cleanup
$html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html); $html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html);
if ($links == 'remove') { if ($links == 'remove') {
$html = preg_replace('!</?a[^>]*>!', '', $html); $html = preg_replace('!<a\s+[^>]*>!', '', $html);
$html = preg_replace('!</a>!', '', $html);
} }
// get text sample for language detection // get text sample for language detection
$text_sample = strip_tags(substr($html, 0, 500)); $_og = $extractor->getOpenGraph();
$text_sample = '';
if (isset($_og['og:title'])) {
$text_sample .= $_og['og:title'];
}
if (isset($_og['og:description'])) {
$text_sample .= ' '.$_og['og:description'];
}
$text_sample .= mb_substr($content_block->textContent, 0, 3000);
unset($_og);
$html = make_substitutions($options->message_to_prepend).$html; $html = make_substitutions($options->message_to_prepend).$html;
$html .= make_substitutions($options->message_to_append); $html .= make_substitutions($options->message_to_append);
} }
@ -1007,10 +1045,17 @@ foreach ($items as $key => $item) {
// add open graph // add open graph
if ($opengraph = $extractor->getOpenGraph()) { if ($opengraph = $extractor->getOpenGraph()) {
foreach ($opengraph as $og_prop => $og_val) { foreach ($opengraph as $_prop => $_val) {
$newitem->addElement($og_prop, $og_val); $newitem->addElement($_prop, $_val);
} }
} }
// add Twitter Card
if ($twitterCard = $extractor->getTwitterCard()) {
foreach ($twitterCard as $_prop => $_val) {
$newitem->addElement($_prop, $_val);
}
}
unset($_prop, $_val);
// add language // add language
if ($detect_language) { if ($detect_language) {
@ -1184,6 +1229,7 @@ function get_self_url() {
if (isset($_GET['accept'])) $self .= '&accept='.urlencode($_GET['accept']); if (isset($_GET['accept'])) $self .= '&accept='.urlencode($_GET['accept']);
if (isset($_GET['max'])) $self .= '&max='.(int)$_GET['max']; if (isset($_GET['max'])) $self .= '&max='.(int)$_GET['max'];
if (isset($_GET['links'])) $self .= '&links='.urlencode($_GET['links']); if (isset($_GET['links'])) $self .= '&links='.urlencode($_GET['links']);
if (isset($_GET['images'])) $self .= '&images='.urlencode($_GET['images']);
if (isset($_GET['exc'])) $self .= '&exc='.urlencode($_GET['exc']); if (isset($_GET['exc'])) $self .= '&exc='.urlencode($_GET['exc']);
if (isset($_GET['format'])) $self .= '&format='.urlencode($_GET['format']); if (isset($_GET['format'])) $self .= '&format='.urlencode($_GET['format']);
if (isset($_GET['callback'])) $self .= '&callback='.urlencode($_GET['callback']); if (isset($_GET['callback'])) $self .= '&callback='.urlencode($_GET['callback']);
@ -1409,6 +1455,32 @@ function make_absolute_str($base, $url) {
return false; return false;
} }
} }
function make_html($dom, $inner=false) {
global $options;
static $html5 = null;
if ($options->html5_output) {
if ($html5 === null) {
$html5 = new Masterminds\HTML5(array('disable_html_ns' => true));
}
if (!$inner) {
return $html5->saveHTML($dom);
} else {
$_inner = '';
if ($dom->hasChildNodes()) {
foreach ($dom->childNodes as $child) {
$_inner .= $html5->saveHTML($child);
}
}
return $_inner;
}
} else {
if (!$inner) {
return $dom->ownerDocument->saveXML($dom);
} else {
return $dom->innerHTML;
}
}
}
// returns single page response, or false if not found // returns single page response, or false if not found
function get_single_page($item, $html, $url) { function get_single_page($item, $html, $url) {
global $http, $extractor; global $http, $extractor;
@ -1457,8 +1529,10 @@ function get_single_page($item, $html, $url) {
} }
} }
} }
// If we've got URL, resolve against $url $base_url = get_base_url($readability->dom);
if (isset($single_page_url) && ($single_page_url = make_absolute_str($url, $single_page_url))) { if (!$base_url) $base_url = $url;
// If we've got URL, resolve against $base_url
if (isset($single_page_url) && ($single_page_url = make_absolute_str($base_url, $single_page_url))) {
// check it's not what we have already! // check it's not what we have already!
if ($single_page_url != $url) { if ($single_page_url != $url) {
// it's not, so let's try to fetch it... // it's not, so let's try to fetch it...

View File

@ -1,7 +1,7 @@
Full-Text RSS site config files Full-Text RSS site config files
================ ================
[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules found, it tries to detect the content block automatically. [Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules are found, it tries to detect the content block automatically.
This repository contains the site-specific extraction rules we rely on in Full-Text RSS. This repository contains the site-specific extraction rules we rely on in Full-Text RSS.

View File

@ -1,9 +1,6 @@
# Puppet file intended to install server componenets for self-hosted FiveFilters.org web services # Puppet file intended to install server componenets for FiveFilters.org web services
# This file is intended for base images of: # This file is intended for base images of:
# Ubuntu 15.10 # Ubuntu 16.04
# Please see here for more information on how to use this:
# http://help.fivefilters.org/customer/en/portal/articles/1143210-hosting
Exec { path => "/bin:/usr/bin:/usr/local/bin" } Exec { path => "/bin:/usr/bin:/usr/local/bin" }
@ -31,6 +28,10 @@ class init {
APT::Periodic::Unattended-Upgrade "1";', APT::Periodic::Unattended-Upgrade "1";',
require => Package["unattended-upgrades"] require => Package["unattended-upgrades"]
} }
#exec { "configure-unattended-upgrades":
# require => Package["unattended-upgrades"],
# command => "sudo dpkg-reconfigure unattended-upgrades",
#}
} }
# make sure apt-update run before package # make sure apt-update run before package
@ -57,6 +58,11 @@ class apache {
notify => Exec["restart-apache"] notify => Exec["restart-apache"]
} }
exec { "enable-prefork":
require => Package["apache2"],
command => "sudo a2dismod mpm_event && sudo a2enmod mpm_prefork",
}
file { "/etc/apache2/sites-available/fivefilters.conf": file { "/etc/apache2/sites-available/fivefilters.conf":
ensure => present, ensure => present,
content => "<VirtualHost *:80> content => "<VirtualHost *:80>
@ -104,24 +110,34 @@ class apache {
} }
class php { class php {
package { "php5": ensure => latest } package { "php7.0": ensure => latest }
package { "libapache2-mod-php5": ensure => latest } #package { "php-apc": ensure => latest }
package { "php5-cli": ensure => latest } package { "libapache2-mod-php7.0": ensure => latest }
package { "php5-tidy": ensure => latest } package { "php7.0-cli": ensure => latest }
package { "php5-curl": ensure => latest } package { "php7.0-tidy": ensure => latest }
package { "libcurl4-gnutls-dev": ensure => latest } package { "php7.0-curl": ensure => latest }
#package { "libcurl4-gnutls-dev": ensure => latest }
package { "libcurl4-openssl-dev": ensure => latest }
package { "libpcre3-dev": ensure => latest } package { "libpcre3-dev": ensure => latest }
package { "make": ensure=>latest } package { "make": ensure=>latest }
package { "php-pear": ensure => latest } package { "php-pear": ensure => latest }
package { "php5-dev": ensure => latest } package { "php7.0-dev": ensure => latest }
package { "php5-intl": ensure => latest } package { "php7.0-intl": ensure => latest }
package { "php5-gd": ensure => latest } package { "php7.0-gd": ensure => latest }
package { "php5-imagick": ensure => latest } package { "php7.0-mbstring": ensure => latest }
package { "php5-json": ensure => latest } package { "php-imagick": ensure => latest }
package { "php7.0-json": ensure => latest }
#package { "php-http": ensure => latest } #package { "php-http": ensure => latest }
package { "php5-raphf": ensure => latest } package { "php-raphf": ensure => latest }
package { "php5-propro": ensure => latest } package { "php-propro": ensure => latest }
file { "/etc/php5/mods-available/fivefilters-php.ini": package { "php7.0-zip": ensure => latest }
# for gumbo-php
package { "libgumbo1": ensure => latest }
package { "libgumbo-dev": ensure => latest }
package { "libxml2": ensure => latest }
package { "libxml2-dev": ensure => latest }
file { "/etc/php/7.0/mods-available/fivefilters-php.ini":
ensure => present, ensure => present,
content => "engine = On content => "engine = On
expose_php = Off expose_php = Off
@ -134,17 +150,17 @@ class php {
default_socket_timeout = 120 default_socket_timeout = 120
file_uploads = Off file_uploads = Off
date.timezoe = 'UTC'", date.timezoe = 'UTC'",
require => Package["php5"], require => Package["php7.0"],
before => Exec["enable-fivefilters-php"], before => Exec["enable-fivefilters-php"],
} }
exec { "enable-fivefilters-php": exec { "enable-fivefilters-php":
command => "sudo php5enmod fivefilters-php", command => "sudo phpenmod fivefilters-php",
} }
} }
class php_pecl_http { class php_pecl_http {
# Important: this file needs to be in place before we install the HTTP extension # Important: this file needs to be in place before we install the HTTP extension
file { "/etc/php5/mods-available/http.ini": file { "/etc/php/7.0/mods-available/http.ini":
ensure => present, ensure => present,
#owner => root, group => root, mode => 444, #owner => root, group => root, mode => 444,
content => "; priority=25 content => "; priority=25
@ -156,7 +172,7 @@ extension=http.so",
} }
exec { "enable-http": exec { "enable-http":
command => "sudo php5enmod http", command => "sudo phpenmod http",
require => Class["php"], require => Class["php"],
} }
@ -171,10 +187,9 @@ extension=http.so",
} }
exec { "install-http-pecl": exec { "install-http-pecl":
command => "pecl install https://pecl.php.net/get/pecl_http-2.5.5.tgz", # For some reason this command doesn't return a success code, even though
#command => "sudo pecl install pecl_http", # it appears to succeed. So we use || /bin/true
# the above is now version 3.0 - requires PHP7 command => "sudo pecl install channel://pecl.php.net/pecl_http-3.1.0.tgz || /bin/true",
#command => "pecl install http://pecl.php.net/get/pecl_http-1.7.6.tgz",
#creates => "/tmp/needed/directory", #creates => "/tmp/needed/directory",
require => Exec["enable-http"] require => Exec["enable-http"]
} }
@ -182,12 +197,12 @@ extension=http.so",
class php_pecl_apcu { class php_pecl_apcu {
exec { "install-apcu-pecl": exec { "install-apcu-pecl":
command => "sudo pecl install channel://pecl.php.net/APCu-4.0.10", command => "sudo pecl install channel://pecl.php.net/APCu-5.1.8",
#creates => "/tmp/needed/directory", #creates => "/tmp/needed/directory",
require => Class["php"] require => Class["php"]
} }
file { "/etc/php5/mods-available/apcu.ini": file { "/etc/php/7.0/mods-available/apcu.ini":
ensure => present, ensure => present,
#owner => root, group => root, mode => 444, #owner => root, group => root, mode => 444,
content => "extension=apcu.so", content => "extension=apcu.so",
@ -195,63 +210,66 @@ class php_pecl_apcu {
before => Exec["enable-apcu"] before => Exec["enable-apcu"]
} }
exec { "enable-apcu": exec { "enable-apcu":
command => "sudo php5enmod apcu", command => "sudo phpenmod apcu",
notify => Exec["restart-apache"], notify => Exec["restart-apache"],
} }
} }
class php_cld { class php_gumbo {
# see https://github.com/lstrojny/php-cld # see https://github.com/layershifter/gumbo-php
package { "git": ensure => latest } package { "git": ensure => latest }
package { "build-essential": ensure => latest } package { "build-essential": ensure => latest }
file { "/tmp/cld": file { "/tmp/gumbo":
ensure => absent, ensure => absent,
before => Exec["download-cld"], before => Exec["download-gumbo"],
recurse => true, recurse => true,
force => true force => true
} }
exec { "download-cld": exec { "download-gumbo":
command => "git clone git://github.com/lstrojny/php-cld.git /tmp/cld", command => "git clone git://github.com/layershifter/gumbo-php.git /tmp/gumbo",
require => [Package["git"], Class["php"]], require => [Package["git"], Class["php"]]
before => Exec["build-cld"]
} }
exec { "checkout-cld-version": exec { "install-gumbo-extension":
# recent version does not work, so we switch to an older one command => "phpize && ./configure && make && sudo make install",
command => "git reset --hard fd5aa5721b01bfe547ff6674fa0daa9c3b791ca3", cwd => "/tmp/gumbo",
cwd => "/tmp/cld",
require => Exec["download-cld"],
before => Exec["build-cld"]
}
exec { "build-cld":
command => "./build.sh",
#new cld:command => "sh compile_libs.sh",
cwd => "/tmp/cld/vendor/libcld",
require => Package["build-essential"],
provider => "shell"
}
exec { "install-cld-extension":
command => "phpize && ./configure --with-libcld-dir=/tmp/cld/vendor/libcld && make && sudo make install",
cwd => "/tmp/cld",
provider => "shell", provider => "shell",
require => Exec["build-cld"] require => Exec["download-gumbo"]
} }
file { "/etc/php5/mods-available/cld.ini": file { "/etc/php/7.0/mods-available/gumbo.ini":
ensure => present, ensure => present,
#owner => root, group => root, mode => 444, #owner => root, group => root, mode => 444,
content => "extension=cld.so", content => "extension=gumbo.so",
require => Exec["install-cld-extension"], require => Exec["install-gumbo-extension"],
before => Exec["enable-cld"], before => Exec["enable-gumbo"]
} }
exec { "enable-cld": exec { "enable-gumbo":
command => "sudo php5enmod cld", command => "sudo phpenmod gumbo",
notify => Exec["restart-apache"],
require => Exec["install-gumbo-extension"]
}
}
class php_pecl_apc_bc {
exec { "install-apc-bc-pecl":
command => "sudo pecl install channel://pecl.php.net/apcu_bc-1.0.3",
#creates => "/tmp/needed/directory",
require => Class["php_pecl_apcu"]
}
file { "/etc/php/7.0/mods-available/z_apc_bc.ini":
ensure => present,
#owner => root, group => root, mode => 444,
content => "extension=apc.so",
require => Exec["install-apc-bc-pecl"],
before => Exec["enable-apc-bc"]
}
exec { "enable-apc-bc":
command => "sudo phpenmod z_apc_bc",
notify => Exec["restart-apache"], notify => Exec["restart-apache"],
} }
} }
@ -261,12 +279,17 @@ class final {
command => "echo 'vm.swappiness = 10' >> /etc/sysctl.conf && sudo sysctl -p", command => "echo 'vm.swappiness = 10' >> /etc/sysctl.conf && sudo sysctl -p",
provider => "shell" provider => "shell"
} }
exec { "enable-php":
command => "sudo a2enmod php7.0 && sudo service apache2 restart",
provider => "shell"
}
} }
include init include init
include apache include apache
include php include php
include php_pecl_apcu include php_pecl_apcu
include php_cld include php_pecl_apc_bc
include php_pecl_http include php_pecl_http
include php_gumbo
include final include final