diff --git a/changelog.txt b/changelog.txt index 85bf771..9918041 100644 --- a/changelog.txt +++ b/changelog.txt @@ -2,6 +2,26 @@ FiveFilters.org: Full-Text RSS http://fivefilters.org/content-only/ CHANGELOG ------------------------------------ +3.7 (2017-02-12) + - Request HTML5 output using HTML5-PHP - new config option $options->html5_output and new request parameter &content=html5 + - Improve support for lazy-loading images + - Feed preview now displays RTL content correctly (added dir='auto' to feed.xsl) + - New request parameter images=0 to remove all images from extracted content + - Open Graph and Twitter card metadata now returned in JSON output (no longer in RSS output) + - Metadata now returned in extract.php even if article extraction fails + - Additional data returned in extract.php for developers: 'domain', 'word_count' + - HTML5-PHP library updated + - SimplePie library updated (fixes PHP 7.1 issue) + - New VPS Puppet script (ubuntu-16.04.pp) - installs PHP 7 and Gumbo PHP extension for faster HTML5 parsing + - Bug fix: Language detection now works correctly with PHP 7 + - Bug fix: Take base href URL into account when following next_page/single_page links (thanks Lukas!) + - Bug fix: VPS Puppet script installs new version of PECL HTTP extension that fixes problem when requesting punycode encoded domains + - Site config files updated for better extraction + - Compatibility test file updated (will tell you if Gumbo PHP will be used) + - Tidy won't be used to repair HTML if using an HTML5 parser (unless explicitly requested in site config file - tidy: yes) + - New config option $options->blocked_message - set what a user will see when a URL is blocked by Full-Text RSS + - Other fixes/improvements + 3.6 (2016-02-21) - Insert og:image (if we find one) at the top of the article when no images have been extracted - Additional lazy image load handling - helps preserve more images designed for JS-enabled browsers diff --git a/config.php b/config.php index a5d60b3..06f6ec2 100644 --- a/config.php +++ b/config.php @@ -59,6 +59,20 @@ $options->max_entries = 10; // from the output. $options->content = 'user'; +// HTML5 output +// ---------------------- +// By default, Full-Text RSS uses libxml to convert the parsed DOM tree back into HTML. +// If this is enabled, we'll use HTML5-PHP to produce the HTML. This will be a little +// slower, but might produce better results, adhering to the HTML5 spec. +// +// Note: in a future release we might make HTML5 output the default. +// +// Possible values... +// HTML5 (slower): true +// libxml (faster): false +// libxml unless user overrides (&content=html5): 'user' (default) +$options->html5_output = 'user'; + // Excerpts // ---------------------- // By default Full-Text RSS does not include excerpts in the output. @@ -242,6 +256,16 @@ $options->allowed_urls = array(); // Note: for feeds, this option applies to both feed URLs and item URLs within those feeds. $options->blocked_urls = array(); +// Blocked message +// ----------------------- +// If a request is blocked outright because of the two rules above, this is the message +// that is shown. Please note: +// * If the input URL is a feed and it's not blocked, feed items that are blocked will +// be skipped, and this message will not be shown. +// * If the input URL itself is blocked (feed or not), we will output this message instead +// of producing a feed. +$options->blocked_message = 'URL blocked'; + // Key holder(s) only? // ---------------------- // Set this to true if you want to restrict access only to @@ -313,7 +337,7 @@ $options->xss_filter = 'user'; // Use effective URL in place of item URL // ---------------------- // When we extract content for feed items, we often end up at a different URL than the -// one in the original feed. This is often a result of URL shorteners being used or +// one in the original feed. This is often a result of URL shorteners or // tracking services being used by the feed publisher. We include the final // (effective) URL we reached to get the content inside the dc:identifier field. // If you enable this, we'll also use this URL in place of the original item URL @@ -500,7 +524,7 @@ $options->cache_cleanup = 100; /// DO NOT CHANGE ANYTHING BELOW THIS /////////// ///////////////////////////////////////////////// -if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.6'); +if (!defined('_FF_FTR_VERSION')) define('_FF_FTR_VERSION', '3.7'); if (basename(__FILE__) == 'config.php') { if (file_exists(dirname(__FILE__).'/custom_config.php')) { diff --git a/css/feed.xsl b/css/feed.xsl index 89fd7a0..77b10b7 100644 --- a/css/feed.xsl +++ b/css/feed.xsl @@ -10,13 +10,13 @@ -
+

(full-text feed)

You are viewing an auto-generated full-text RSS feed. RSS feeds allow you to stay up to date with the latest news and features you want from websites.
Subscribe to this feed.

Below is the latest content available from this feed.

-
+
    diff --git a/extract.php b/extract.php index bed72da..b7a3dab 100644 --- a/extract.php +++ b/extract.php @@ -50,8 +50,12 @@ $_POST['accept'] = 'html'; $_POST['format'] = 'json'; // Enable excerpts $_POST['summary'] = '1'; +// Guess language if it's not already marked up +$_POST['lang'] = '2'; // Don't produce result if extraction fails -$_POST['exc'] = '1'; +// Additional meta elements might still be useful (e.g. og/twitter elements), +// so we're commeting this out from FTR 3.7 +//$_POST['exc'] = '1'; // Enable XSS filtering (unless explicitly disabled) if (isset($_POST['xss']) && $_POST['xss'] !== '0') { $_POST['xss'] = '1'; diff --git a/ftr_compatibility_test.php b/ftr_compatibility_test.php index 43c361f..98ac060 100644 --- a/ftr_compatibility_test.php +++ b/ftr_compatibility_test.php @@ -16,7 +16,7 @@ SimplePie.org. We have kept most of their checks intact as we use SimplePie in o http://github.com/simplepie/simplepie/tree/master/compatibility_test/ */ -$app_name = 'Full-Text RSS 3.6'; +$app_name = 'Full-Text RSS 3.7'; // Full-Text RSS is not yet compatible with HHVM, that's why we check for it with HHVM_VERSION. //$php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.2.0', '>=') && !defined('HHVM_VERSION')); @@ -31,6 +31,7 @@ $curl_ok = function_exists('curl_exec'); $parallel_ok = ((extension_loaded('http') && class_exists('http\Client\Request')) || ($curl_ok && function_exists('curl_multi_init'))); $allow_url_fopen_ok = (bool)ini_get('allow_url_fopen'); $filter_ok = extension_loaded('filter'); +$gumbo_ok = class_exists('Layershifter\Gumbo\Parser'); if (extension_loaded('xmlreader')) { $xml_ok = true; @@ -376,15 +377,16 @@ div.chunk { ?>

    HTML parser

    -

    Full-Text RSS uses the fast libxml parser (the default PHP parser) but it can also make use of HTML5-PHP (an HTML5 parser written in PHP) if your version of PHP supports it. The latter might produce better results for some sites, especially if Tidy is not available on your server, however, it is slower than libxml.

    +

    uses the fast libxml parser (the default PHP parser) but it will automatically make use of Gumbo (a fast HTML5 parser) if the Gumbo PHP extension is installed. Alternatively, HTML5-PHP (an HTML5 parser written in PHP) can be used by passing &parser=html5 as a parameter. The latter might produce better results than libxml for some sites, but is a little slower.

    = 0) { - echo '

    HTML5-PHP can be used on this server.

    '; + if ($gumbo_ok) { + echo '

    Gumbo PHP will be used on this server.

    '; } else { - echo '

    You need at least PHP 5.3 to be able to use HTML5-PHP.

    '; + echo '

    libxml will be used by default, unless HTML5 parsing is requested.

    '; } ?> +

    Automatic site config updates

    Full-Text RSS can be configured to update its site config files (which determine how content should be extracted for certain sites) by downloading the latest set from our GitHub repository. This functionaility is not required, and can be done manually. To configure this to occur automatically, you will need zip support enabled in PHP - we make use of the ZipArchive class.

    content - 0, 1 (default) - If set to 0, the extracted content will not be included in the output. + 0, 1 (default), html5 + If set to 0, the extracted content will not be included in the output. If set to html5, we'll output HTML5. @@ -280,6 +280,12 @@ if (!defined('_FF_FTR_INDEX')) { preserve (default), footnotes, remove Links can either be preserved, made into footnotes, or removed. None of these options affect the link text, only the hyperlink itself. + + + images + 1 (default), 0 + If set to 0, images and associated elements (img, figure, figcaption) will be removed from the output. + xss @@ -313,7 +319,7 @@ if (!defined('_FF_FTR_INDEX')) { parser html5php, libxml - The default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter. + The default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter. Note: if the Gumbo PHP extension is available, that will be used regardless of this parameter or site config file directives. @@ -333,9 +339,11 @@ if (!defined('_FF_FTR_INDEX')) {

    Response (example)

    -

    Simple JSON output containing extracted article title, content, and more. It was produced from the following input URL: http://chomsky.info/articles/20131105.htm

    +

    Simple JSON output containing extracted article title, content, and more. It was produced from the following input URL: http://www.truthdig.com/report/print/make_america_ungovernable_20170205

    - {
        "title": "De-Americanizing the World",
        "excerpt": "During the latest episode of the Washington farce that has astonish…",
        "date": null,
        "author": "Noam Chomsky",
        "language": "en",
        "url": "http://chomsky.info/articles/20131105.htm",
        "effective_url": "http://chomsky.info/articles/20131105.htm",
        "content": "<p>During the latest episode of the Washington farce that has aston…"
    }
    + + {
        "title": "Make America Ungovernable",
        "excerpt": "By Chris Hedges Mr. Fish / Truthdig Donald Trump’s regime is rapidl…",
        "date": "2017-02-05T23:34:57+00:00",
        "author": null,
        "language": "en",
        "url": "http://www.truthdig.com/report/item/make_america_ungovernable_20170…",
        "effective_url": "http://www.truthdig.com/report/print/make_america_ungovernable_2017…",
        "domain": "truthdig.com",
        "word_count": 2284,
        "og_url": "http://www.truthdig.com/report/print/make_america_ungovernable_2017…",
        "og_title": "Make America Ungovernable: Chris Hedges",
        "og_description": "The window to overthrow the Trump regime is rapidly closing. We mus…",
        "og_image": null,
        "og_type": "article",
        "twitter_card": null,
        "twitter_site": "@truthdig",
        "twitter_creator": "@truthdig",
        "twitter_image": null,
        "twitter_title": "Make America Ungovernable | Truthdig: Drilling Beneath the Headline…",
        "twitter_description": "The window to overthrow the Trump regime is rapidly closing. We mus…",
        "content": "<h4 class="date">Posted on Feb 5</h4>…"
    }
    +

    Note: For brevity the output above is truncated.


    @@ -378,12 +386,12 @@ if (!defined('_FF_FTR_INDEX')) { summary 0 (default), 1 If set to 1, an excerpt will be included for each item in the output. - + content - 0, 1 (default) - If set to 0, the extracted content will not be included in the output. + 0, 1 (default), html5 + If set to 0, the extracted content will not be included in the output. If set to html5, we'll output HTML5. @@ -392,6 +400,12 @@ if (!defined('_FF_FTR_INDEX')) { Links can either be preserved, made into footnotes, or removed. None of these options affect the link text, only the hyperlink itself. + + images + 1 (default), 0 + If set to 0, images and associated elements (img, figure, figcaption) will be removed from the output. + + exc 0 (default), 1 @@ -448,7 +462,7 @@ if (!defined('_FF_FTR_INDEX')) { parser html5php, libxml - The default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter. + The default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter. Note: if the Gumbo PHP extension is available, that will be used regardless of this parameter or site config file directives. @@ -480,9 +494,15 @@ if (!defined('_FF_FTR_INDEX')) { use_extracted_title - [no value] - By default, if the input URL points to a feed, item titles in the generated feed will not be changed - we assume item titles in feeds are not truncated. If you'd like them to be replaced with titles Full-Text RSS extracts, use this parameter in the request (the value does not matter). To enable/disable this for for all feeds, see the config file - specifically $options->favour_feed_titles - + 0 (default), 1 + By default, if the input URL points to a feed, item titles in the generated feed will not be changed - we assume item titles in feeds are not truncated. If you'd like them to be replaced with titles Full-Text RSS extracts, use this parameter in the request. To enable/disable this for for all feeds, see the config file - specifically $options->favour_feed_titles + + + + use_effective_url + 0 (default), 1 + When we extract content for feed items, we often end up at a different URL than the one in the original feed. This is often a result of URL shorteners or tracking services being used by the feed publisher. We include the final (effective) URL we reached to get the content inside the dc:identifier field. If you enable this, we'll also use this URL in place of the original item URL in the new feed we produce. To enable/disable this for for all feeds, see the config file - specifically $options->favour_effective_url + max @@ -496,7 +516,7 @@ if (!defined('_FF_FTR_INDEX')) {

    Response (example)

    JSON output produced for the BBC feed http://feeds.bbci.co.uk/news/rss.xml. You can also request regular RSS.

    - {
        "rss": {
            "@attributes": {
                "version": "2.0"
            }
    ,
            "channel": {
                "title": "BBC News - Home",
                "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                "description": "The latest stories from the Home section of the BBC News web site.",
                "ttl": 15,
                "image": {
                    "title": "BBC News - Home",
                    "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                    "url": "http://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif"
                }
    ,
                "item": [
                    {
                        "title": "Russia's Putin visits annexed Crimea",
                        "link": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "description": "President Putin: \"[Crimeans have] proved their loyalty to a histor…",
                        "content_encoded": "<!-- Adding hypertab -->&#13;\n&#13;\n&#13;\n<!-- end of hypertab -…",
                        "pubDate": "Fri, 09 May 2014 15:02:04 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-europe-27344029",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751301_ycst2i…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751302_ycst2i…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Harris 'assaulted daughter's friend'",
                        "link": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&ns_source=…",
                        "guid": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&amp;ns_sou…",
                        "description": "Rolf Harris arrives at court flanked by his wife and daughter Rolf …",
                        "content_encoded": "<!-- Embedding the video player -->&#13;\n<!-- This is the embedd…",
                        "pubDate": "Fri, 09 May 2014 15:21:52 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/uk-27340134",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740642_hi0221…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740643_hi0221…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Nigeria 'ignored' school warning",
                        "link": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "description": "Nigeria's military had advance warning of the attack on a school at…",
                        "content_encoded": "<div class=\"caption full-width\">&#13;\n <img src=\"http://news.b…",
                        "pubDate": "Fri, 09 May 2014 15:48:34 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-africa-27344863",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749855_747495…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749856_747495…"
                                }

                            }

                        ]

                    }

                ]

            }

        }

    }
    + {
        "rss": {
            "@attributes": {
                "version": "2.0"
            }
    ,
            "channel": {
                "title": "BBC News - Home",
                "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                "description": "The latest stories from the Home section of the BBC News web site.",
                "ttl": 15,
                "image": {
                    "title": "BBC News - Home",
                    "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                    "url": "http://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif"
                }
    ,
                "item": [
                    {
                        "title": "Russia's Putin visits annexed Crimea",
                        "link": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "description": "President Putin: \"[Crimeans have] proved their loyalty to a histor…",
                        "content_encoded": "<!-- Adding hypertab -->&#13;\n&#13;\n&#13;\n<!-- end of hypertab -…",
                        "pubDate": "Fri, 09 May 2014 15:02:04 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-europe-27344029",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751301_ycst2i…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751302_ycst2i…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Harris 'assaulted daughter's friend'",
                        "link": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&ns_source=…",
                        "guid": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&amp;ns_sou…",
                        "description": "Rolf Harris arrives at court flanked by his wife and daughter Rolf …",
                        "content_encoded": "<!-- Embedding the video player -->&#13;\n<!-- This is the embedd…",
                        "pubDate": "Fri, 09 May 2014 15:21:52 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/uk-27340134",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740642_hi0221…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740643_hi0221…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Nigeria 'ignored' school warning",
                        "link": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "description": "Nigeria's military had advance warning of the attack on a school at…",
                        "content_encoded": "<div class=\"caption full-width\">&#13;\n <img src=\"http://news.b…",
                        "pubDate": "Fri, 09 May 2014 15:48:34 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-africa-27344863",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749855_747495…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749856_747495…"
                                }

                            }

                        ]

                    }

                ]

            }

        }

    }

    Note: For brevity the output above is truncated.


    diff --git a/libraries/content-extractor/ContentExtractor.php b/libraries/content-extractor/ContentExtractor.php index 4ce954a..2519f2e 100644 --- a/libraries/content-extractor/ContentExtractor.php +++ b/libraries/content-extractor/ContentExtractor.php @@ -5,10 +5,10 @@ * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) * to extract content from HTML files. * - * @version 1.2 - * @date 2016-02-21 + * @version 1.3 + * @date 2017-02-12 * @author Keyvan Minoukadeh - * @copyright 2016 Keyvan Minoukadeh + * @copyright 2017 Keyvan Minoukadeh * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 */ @@ -43,10 +43,12 @@ class ContentExtractor protected $success = false; protected $nextPageUrl; protected $opengraph = array(); + protected $twitterCard = array(); public $allowedParsers = array('libxml', 'html5php'); public $defaultParser = 'libxml'; public $parserOverride = null; public $fingerprints = array(); + public $stripImages = false; public $readability; public $debug = false; public $debugVerbose = false; @@ -81,6 +83,7 @@ class ContentExtractor $this->nextPageUrl = null; $this->success = false; $this->opengraph = array(); + $this->twitterCard = array(); } public function findHostUsingFingerprints($html) { @@ -188,22 +191,6 @@ class ContentExtractor unset($_count); } - // use tidy (if it exists)? - // This fixes problems with some sites which would otherwise - // trouble DOMDocument's HTML parsing. (Although sometimes it - // makes matters worse, which is why you can override it in site config files.) - $tidied = false; - if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) { - $this->debug('Using Tidy'); - $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8'); - if (tidy_clean_repair($tidy)) { - $original_html = $html; - $tidied = true; - $html = $tidy->value; - } - unset($tidy); - } - // load and parse html if ($this->parserOverride) { // from querystring: &parser=xxx @@ -218,6 +205,34 @@ class ContentExtractor $this->debug("HTML parser $_parser not listed, using ".$this->defaultParser." instead"); $_parser = $this->defaultParser; } + // Full-Text RSS 3.7... + if (class_exists('Layershifter\Gumbo\Parser')) { + $this->debug("Gumbo PHP extension will be used for HTML parsing"); + $_parser = 'gumbo'; // fast HTML5 parser + } + + // use tidy (if it exists)? + // This fixes problems with some sites which would otherwise + // trouble DOMDocument's HTML parsing. (Although sometimes it + // makes matters worse, which is why you can override it in site config files.) + $tidied = false; + if ($this->config->tidy() && function_exists('tidy_parse_string') && $smart_tidy) { + // if we're using HTML5 parser and no explicit tidy declaration in site config file + // we'll skip tidy + if (($_parser == 'gumbo' || $_parser == 'html5php') && ($this->config->tidy === null)) { + // No Tidy + } else { + $this->debug('Using Tidy'); + $tidy = tidy_parse_string($html, self::$tidy_config, 'UTF8'); + if (tidy_clean_repair($tidy)) { + $original_html = $html; + $tidied = true; + $html = $tidy->value; + } + unset($tidy); + } + } + $this->debug("Attempting to parse HTML with $_parser"); $this->readability = new Readability($html, $url, $_parser); @@ -321,7 +336,7 @@ class ContentExtractor } } - // try to open graph properties + // try to get open graph elements $elems = @$xpath->query("//head//meta[@property='og:title' or @property='og:type' or @property='og:url' or @property='og:image' or @property='og:description']", $this->readability->dom); // check for matches if ($elems && $elems->length > 0) { @@ -339,6 +354,25 @@ class ContentExtractor unset($_prop, $_val); } + // try to get Twitter Card elements + // TODO: add more, but multiple colons, e.g. twitter:site:id cause problems for RSS validation (namespace). For the others, maybe only return in JSON output + $elems = @$xpath->query("//head//meta[@name='twitter:card' or @name='twitter:site' or @name='twitter:creator' or @name='twitter:description' or @name='twitter:title' or @name='twitter:image']", $this->readability->dom); + // check for matches + if ($elems && $elems->length > 0) { + $this->debug('Extracting Twiter Card elements'); + foreach ($elems as $elem) { + if ($elem->hasAttribute('content')) { + $_prop = strtolower($elem->getAttribute('name')); + $_val = $elem->getAttribute('content'); + // currently one of each is returned, so we keep the first one + if (!isset($this->twitterCard[$_prop])) { + $this->twitterCard[$_prop] = $_val; + } + } + } + unset($_prop, $_val); + } + // try to get date foreach ($this->config->date as $pattern) { $elems = @$xpath->evaluate($pattern, $this->readability->dom); @@ -707,6 +741,21 @@ class ContentExtractor } } + // Find date in Open Graph meta element + // http://ogp.me/#no_vertical + if ($detect_date) { + $elems = @$xpath->query("//meta[@property='article:published_time' and @content]", $this->readability->dom); + if ($elems && $elems->length == 1) { + $this->date = strtotime(trim($elems->item(0)->getAttribute('content'))); + if ($this->date) { + $this->debug('Date found (article:published_time): '.date('Y-m-d H:i:s', $this->date)); + $detect_date = false; + } else { + $this->date = null; + } + } + } + // Find date in pubdate marked time element // For the same reason given above, we only use this // if there's exactly one element. @@ -765,16 +814,29 @@ class ContentExtractor } } // prevent self-closing iframes - if ($this->body->tagName === 'iframe') { - if (!$this->body->hasChildNodes()) { - $this->body->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); - } - } else { - $elems = $this->body->getElementsByTagName('iframe'); - for ($i = $elems->length-1; $i >= 0; $i--) { - $e = $elems->item($i); - if (!$e->hasChildNodes()) { - $e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); + // better to do this or to look for all elements not matching known void elements? + // Will requesting HTML5 output using HTML5-PHP fix this issue? + $_dont_self_close = array('iframe', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + foreach ($_dont_self_close as $_tagname) { + if ($this->body->tagName === $_tagname) { + if (!$this->body->hasChildNodes()) { + if ($_tagname === 'iframe') { + $this->body->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); + } else { + $this->body->appendChild($this->body->ownerDocument->createTextNode('')); + } + } + } else { + $elems = $this->body->getElementsByTagName($_tagname); + for ($i = $elems->length-1; $i >= 0; $i--) { + $e = $elems->item($i); + if (!$e->hasChildNodes()) { + if ($_tagname === 'iframe') { + $e->appendChild($this->body->ownerDocument->createTextNode('[embedded content]')); + } else { + $e->appendChild($this->body->ownerDocument->createTextNode('')); + } + } } } } @@ -782,7 +844,7 @@ class ContentExtractor // the plugin replaces the src attribute to point to a 1x1 gif and puts the original src // inside the data-lazy-src attribute. It also places the original image inside a noscript element // next to the amended one. - $elems = @$xpath->query("//img[@data-lazy-src]", $this->body); + $elems = @$xpath->query(".//img[@data-lazy-src]", $this->body); for ($i = $elems->length-1; $i >= 0; $i--) { $e = $elems->item($i); // let's see if we can grab image from noscript @@ -800,19 +862,31 @@ class ContentExtractor // now let's deal with another lazy load technique. Example: // Image and video hosting by TinyPic - $elems = @$xpath->query("//img[@data-src and contains(@class, 'lazyload') and contains(@src, 'data:image')]", $this->body); + $elems = @$xpath->query(".//img[@data-src and (contains(@src, 'data:image') or contains(@src, '.gif'))]", $this->body); for ($i = $elems->length-1; $i >= 0; $i--) { $e = $elems->item($i); $e->setAttribute('src', $e->getAttribute('data-src')); $e->removeAttribute('data-src'); } - // If there's an og:image, but we have no images in the article, let's place it at the beginning of the article. - if ($this->body->hasChildNodes() && isset($this->opengraph['og:image']) && substr($this->opengraph['og:image'], 0, 4) === 'http') { - $elems = @$xpath->query("//img", $this->body); - if ($elems->length === 0) { - $_new_elem = $this->body->ownerDocument->createDocumentFragment(); - @$_new_elem->appendXML('
    '); - $this->body->insertBefore($_new_elem, $this->body->firstChild); + // Strip images? + if ($this->stripImages && $this->body->hasChildNodes()) { + $elems = @$xpath->query("//figure | //img | //figcaption", $this->body); + // check for matches + if ($elems && $elems->length > 0) { + $this->debug('Stripping images: '.$elems->length.' img/figure/figcaption elements'); + for ($i=$elems->length-1; $i >= 0; $i--) { + @$elems->item($i)->parentNode->removeChild($elems->item($i)); + } + } + } else { + // If there's an og:image, but we have no images in the article, let's place it at the beginning of the article. + if ($this->body->hasChildNodes() && isset($this->opengraph['og:image']) && substr($this->opengraph['og:image'], 0, 4) === 'http') { + $elems = @$xpath->query(".//img", $this->body); + if ($elems->length === 0) { + $_new_elem = $this->body->ownerDocument->createDocumentFragment(); + @$_new_elem->appendXML('
    '); + $this->body->insertBefore($_new_elem, $this->body->firstChild); + } } } @@ -850,6 +924,10 @@ class ContentExtractor return $this->opengraph; } + public function getTwitterCard() { + return $this->twitterCard; + } + public function isNativeAd() { return $this->nativeAd; } diff --git a/libraries/feedwriter/FeedWriter.php b/libraries/feedwriter/FeedWriter.php index c5f8c42..7061b02 100644 --- a/libraries/feedwriter/FeedWriter.php +++ b/libraries/feedwriter/FeedWriter.php @@ -81,7 +81,7 @@ define('JSONP', 3); */ public function setChannelElementsFromArray($elementArray) { - if(! is_array($elementArray)) return; + if(!is_array($elementArray)) return; foreach ($elementArray as $elementName => $content) { $this->setChannelElement($elementName, $content); @@ -131,19 +131,33 @@ define('JSONP', 3); $simplejson->language = null; $simplejson->url = null; $simplejson->effective_url = null; + $simplejson->domain = null; + $simplejson->word_count = null; $simplejson->og_url = null; $simplejson->og_title = null; $simplejson->og_description = null; $simplejson->og_image = null; $simplejson->og_type = null; + $simplejson->twitter_card = null; + $simplejson->twitter_site = null; + $simplejson->twitter_creator = null; + $simplejson->twitter_image = null; + $simplejson->twitter_title = null; + $simplejson->twitter_description = null; $simplejson->content = null; // actual values $simplejson->url = $jsonitem->link; $simplejson->effective_url = $jsonitem->dc_identifier; + $simplejson->domain = strtolower(@parse_url($simplejson->effective_url, PHP_URL_HOST)); + if (substr($simplejson->domain, 0, 4) === 'www.') { + $simplejson->domain = substr($simplejson->domain, 4); + } if (isset($jsonitem->title)) $simplejson->title = $jsonitem->title; if (isset($jsonitem->dc_language)) $simplejson->language = $jsonitem->dc_language; if (isset($jsonitem->content_encoded)) { $simplejson->content = $jsonitem->content_encoded; + // from http://php.net/manual/en/function.str-word-count.php#107363 + $simplejson->word_count = count(preg_split('!\s+!', strip_tags($simplejson->content), -1, PREG_SPLIT_NO_EMPTY)); if (isset($jsonitem->description)) { $simplejson->excerpt = $jsonitem->description; } @@ -161,6 +175,12 @@ define('JSONP', 3); if (isset($jsonitem->og_description)) $simplejson->og_description = $jsonitem->og_description; if (isset($jsonitem->og_image)) $simplejson->og_image = $jsonitem->og_image; if (isset($jsonitem->og_type)) $simplejson->og_type = $jsonitem->og_type; + if (isset($jsonitem->twitter_card)) $simplejson->twitter_card = $jsonitem->twitter_card; + if (isset($jsonitem->twitter_site)) $simplejson->twitter_site = $jsonitem->twitter_site; + if (isset($jsonitem->twitter_creator)) $simplejson->twitter_creator = $jsonitem->twitter_creator; + if (isset($jsonitem->twitter_image)) $simplejson->twitter_image = $jsonitem->twitter_image; + if (isset($jsonitem->twitter_title)) $simplejson->twitter_title = $jsonitem->twitter_title; + if (isset($jsonitem->twitter_description)) $simplejson->twitter_description = $jsonitem->twitter_description; echo json_encode($simplejson); } } @@ -337,7 +357,8 @@ define('JSONP', 3); { $out = ''."\n"; if ($this->xsl) $out .= 'xsl).'"?>' . PHP_EOL; - $out .= '' . PHP_EOL; + //$out .= '' . PHP_EOL; + $out .= '' . PHP_EOL; echo $out; } elseif ($this->version == JSON || $this->version == JSONP) @@ -495,6 +516,9 @@ define('JSONP', 3); foreach ($itemElements as $thisElement) { foreach ($thisElement as $instance) { if ($this->version == RSS2) { + // Let's not include twitter and open graph elements in regular RSS output + // These are aimed more at developers, and so JSON is more appropriate + if (preg_match('/^(twitter|og):/i', $instance['name'])) continue; echo $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); } elseif ($this->version == JSON || $this->version == JSONP) { $_json_node = $this->makeNode($instance['name'], $instance['content'], $instance['attributes']); diff --git a/libraries/htmLawed/htmLawed.php b/libraries/htmLawed/htmLawed.php index 35215c1..7895000 100644 --- a/libraries/htmLawed/htmLawed.php +++ b/libraries/htmLawed/htmLawed.php @@ -1,8 +1,8 @@ $v){ - switch($k){ - case 'maxlen':if($l > $v){$o = 0;} - break; case 'minlen': if($l < $v){$o = 0;} - break; case 'maxval': if((float)($t) > $v){$o = 0;} - break; case 'minval': if((float)($t) < $v){$o = 0;} - break; case 'match': if(!preg_match($v, $t)){$o = 0;} - break; case 'nomatch': if(preg_match($v, $t)){$o = 0;} - break; case 'oneof': - $m = 0; - foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}} - $o = $m; - break; case 'noneof': - $m = 1; - foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}} - $o = $m; - break; default: - break; +static $ma = array('accesskey', 'class', 'rel'); +$s = in_array($a, $ma) ? ' ' : ''; +$r = array(); +$t = !empty($s) ? explode($s, $t) : array($t); +foreach($t as $tk=>$tv){ + $o = 1; $l = strlen($tv); + foreach($p as $k=>$v){ + switch($k){ + case 'maxlen': if($l > $v){$o = 0;} + break; case 'minlen': if($l < $v){$o = 0;} + break; case 'maxval': if((float)($tv) > $v){$o = 0;} + break; case 'minval': if((float)($tv) < $v){$o = 0;} + break; case 'match': if(!preg_match($v, $tv)){$o = 0;} + break; case 'nomatch': if(preg_match($v, $tv)){$o = 0;} + break; case 'oneof': + $m = 0; + foreach(explode('|', $v) as $n){if($tv == $n){$m = 1; break;}} + $o = $m; + break; case 'noneof': + $m = 1; + foreach(explode('|', $v) as $n){if($tv == $n){$m = 0; break;}} + $o = $m; + break; default: + break; + } + if(!$o){break;} } - if(!$o){break;} + if($o){$r[] = $tv;} } -return ($o ? $t : (isset($p['default']) ? $p['default'] : 0)); +$r = implode($s, $r); +return (isset($r[0]) ? $r : (isset($p['default']) ? $p['default'] : 0)); // eof } @@ -526,7 +534,7 @@ foreach($aA as $k=>$v){ } } } - if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($v, $rl[$k])) === 0){continue;} + if(isset($rl[$k]) && is_array($rl[$k]) && ($v = htmLawed::hl_attrval($k, $v, $rl[$k])) === 0){continue;} $a[$k] = str_replace('"', '"', $v); } } @@ -628,16 +636,15 @@ if($e == 'u'){$e = 'span'; return 'text-decoration: underline;';} static $fs = array('0'=>'xx-small', '1'=>'xx-small', '2'=>'small', '3'=>'medium', '4'=>'large', '5'=>'x-large', '6'=>'xx-large', '7'=>'300%', '-1'=>'smaller', '-2'=>'60%', '+1'=>'larger', '+2'=>'150%', '+3'=>'200%', '+4'=>'300%'); if($e == 'font'){ $a2 = ''; - if(preg_match('`face\s*=\s*(\'|")([^=]+?)\\1`i', $a, $m) or preg_match('`face\s*=(\s*)(\S+)`i', $a, $m)){ - $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[2])). ';'; + while(preg_match('`(^|\s)(color|size)\s*=\s*(\'|")?(.+?)(\\3|\s|$)`i', $a, $m)){ + $a = str_replace($m[0], ' ', $a); + $a2 .= strtolower($m[2]) == 'color' ? (' color: '. str_replace('"', '\'', trim($m[4])). ';') : (isset($fs[($m = trim($m[4]))]) ? ($a2 .= ' font-size: '. str_replace('"', '\'', $fs[$m]). ';') : ''); } - if(preg_match('`color\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m)){ - $a2 .= ' color: '. str_replace('"', '\'', trim($m[2])). ';'; + while(preg_match('`(^|\s)face\s*=\s*(\'|")?([^=]+?)\\2`i', $a, $m) or preg_match('`(^|\s)face\s*=(\s*)(\S+)`i', $a, $m)){ + $a = str_replace($m[0], ' ', $a); + $a2 .= ' font-family: '. str_replace('"', '\'', trim($m[3])). ';'; } - if(preg_match('`size\s*=\s*(\'|")?(.+?)(\\1|\s|$)`i', $a, $m) && isset($fs[($m = trim($m[2]))])){ - $a2 .= ' font-size: '. str_replace('"', '\'', $fs[$m]). ';'; - } - $e = 'span'; return ltrim($a2); + $e = 'span'; return ltrim(str_replace('<', '', $a2)); } if($t == 2){$e = 0; return 0;} return ''; @@ -701,7 +708,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( public static function hl_version(){ // rel -return '1.1.20'; +return '1.1.22'; // eof } diff --git a/libraries/html5php/HTML5.php b/libraries/html5php/HTML5.php index 1c46c2b..990e8f4 100644 --- a/libraries/html5php/HTML5.php +++ b/libraries/html5php/HTML5.php @@ -166,9 +166,10 @@ class HTML5 public function parse(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) { $this->errors = array(); - $events = new DOMTreeBuilder(false, array_merge($this->getOptions(), $options)); + $options = array_merge($this->getOptions(), $options); + $events = new DOMTreeBuilder(false, $options); $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $events); + $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); $parser->parse(); $this->errors = $events->getErrors(); @@ -184,9 +185,10 @@ class HTML5 */ public function parseFragment(\Masterminds\HTML5\Parser\InputStream $input, array $options = array()) { - $events = new DOMTreeBuilder(true, array_merge($this->getOptions(), $options)); + $options = array_merge($this->getOptions(), $options); + $events = new DOMTreeBuilder(true, $options); $scanner = new Scanner($input); - $parser = new Tokenizer($scanner, $events); + $parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML: Tokenizer::CONFORMANT_HTML); $parser->parse(); $this->errors = $events->getErrors(); diff --git a/libraries/html5php/HTML5/Elements.php b/libraries/html5php/HTML5/Elements.php index 6cf72aa..0e880e7 100644 --- a/libraries/html5php/HTML5/Elements.php +++ b/libraries/html5php/HTML5/Elements.php @@ -24,7 +24,7 @@ class Elements const KNOWN_ELEMENT = 1; // From section 8.1.2: "script", "style" - // From 8.2.5.4.7 ("in body" insertion mode): "noembed", "noscript" + // From 8.2.5.4.7 ("in body" insertion mode): "noembed" // From 8.4 "style", "xmp", "iframe", "noembed", "noframes" /** * Indicates the contained text should be processed as raw text. @@ -79,7 +79,7 @@ class Elements public static $html5 = array( "a" => 1, "abbr" => 1, - "address" => 89, // NORMAL | VOID_TAG | AUTOCLOSE_P | BLOCK_TAG + "address" => 65, // NORMAL | BLOCK_TAG "area" => 9, // NORMAL | VOID_TAG "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG @@ -144,7 +144,7 @@ class Elements "meta" => 9, // NORMAL | VOID_TAG "meter" => 1, "nav" => 17, // NORMAL | AUTOCLOSE_P, - "noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG + "noscript" => 65, // NORMAL | BLOCK_TAG "object" => 1, "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG "optgroup" => 1, @@ -557,7 +557,7 @@ class Elements * @param string $name * The name of the element. * - * @return int The element mask. + * @return int|bool The element mask or false if element does not exist. */ public static function element($name) { diff --git a/libraries/html5php/HTML5/Parser/EventHandler.php b/libraries/html5php/HTML5/Parser/EventHandler.php index 2d55347..3da71a3 100644 --- a/libraries/html5php/HTML5/Parser/EventHandler.php +++ b/libraries/html5php/HTML5/Parser/EventHandler.php @@ -69,7 +69,7 @@ interface EventHandler * An array with all of the tag's attributes. * @param boolean $selfClosing * An indicator of whether or not this tag is self-closing () - * @return numeric One of the Tokenizer::TEXTMODE_* constants. + * @return int One of the Tokenizer::TEXTMODE_* constants. */ public function startTag($name, $attributes = array(), $selfClosing = false); diff --git a/libraries/html5php/HTML5/Parser/Tokenizer.php b/libraries/html5php/HTML5/Parser/Tokenizer.php index a779191..02b2aff 100644 --- a/libraries/html5php/HTML5/Parser/Tokenizer.php +++ b/libraries/html5php/HTML5/Parser/Tokenizer.php @@ -43,6 +43,10 @@ class Tokenizer protected $textMode = 0; // TEXTMODE_NORMAL; protected $untilTag = null; + const CONFORMANT_XML = 'xml'; + const CONFORMANT_HTML = 'html'; + protected $mode = self::CONFORMANT_HTML; + const WHITE = "\t\n\f "; /** @@ -57,11 +61,13 @@ class Tokenizer * @param \Masterminds\HTML5\Parser\EventHandler $eventHandler * An event handler, initialized and ready to receive * events. + * @param string $mode */ - public function __construct($scanner, $eventHandler) + public function __construct($scanner, $eventHandler, $mode = self::CONFORMANT_HTML) { $this->scanner = $scanner; $this->events = $eventHandler; + $this->mode = $mode; } /** @@ -299,7 +305,7 @@ class Tokenizer } elseif ($tok == 'D' || $tok == 'd') { // Doctype - return $this->doctype(''); + return $this->doctype(); } elseif ($tok == '[') { // CDATA section @@ -335,7 +341,8 @@ class Tokenizer return $this->bogusComment('scanner->charsUntil("\n\f \t>")); + $name = $this->scanner->charsUntil("\n\f \t>"); + $name = $this->mode === self::CONFORMANT_XML ? $name: strtolower($name); // Trash whitespace. $this->scanner->whitespace(); @@ -362,7 +369,8 @@ class Tokenizer } // We know this is at least one char. - $name = strtolower($this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")); + $name = $this->scanner->charsWhile(":_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); + $name = $this->mode === self::CONFORMANT_XML ? $name : strtolower($name); $attributes = array(); $selfClose = false; diff --git a/libraries/html5php/HTML5/Parser/TreeBuildingRules.php b/libraries/html5php/HTML5/Parser/TreeBuildingRules.php index 2af3c66..6236208 100644 --- a/libraries/html5php/HTML5/Parser/TreeBuildingRules.php +++ b/libraries/html5php/HTML5/Parser/TreeBuildingRules.php @@ -76,7 +76,6 @@ class TreeBuildingRules case 'option': return $this->closeIfCurrentMatches($new, $current, array( 'option', - 'optgroup' )); case 'tr': return $this->closeIfCurrentMatches($new, $current, array( diff --git a/libraries/html5php/HTML5/Parser/UTF8Utils.php b/libraries/html5php/HTML5/Parser/UTF8Utils.php index d319252..44affb6 100644 --- a/libraries/html5php/HTML5/Parser/UTF8Utils.php +++ b/libraries/html5php/HTML5/Parser/UTF8Utils.php @@ -48,10 +48,10 @@ class UTF8Utils public static function countChars($string) { // Get the length for the string we need. - if (function_exists('iconv_strlen')) { - return iconv_strlen($string, 'utf-8'); - } elseif (function_exists('mb_strlen')) { + if (function_exists('mb_strlen')) { return mb_strlen($string, 'utf-8'); + } elseif (function_exists('iconv_strlen')) { + return iconv_strlen($string, 'utf-8'); } elseif (function_exists('utf8_decode')) { // MPB: Will this work? Won't certain decodes lead to two chars // extrapolated out of 2-byte chars? @@ -94,10 +94,10 @@ class UTF8Utils // application executing this library so we store the value, change it // to our needs, and then change it back when we are done. This feels // a little excessive and it would be great if there was a better way. - $save = ini_get('mbstring.substitute_character'); - ini_set('mbstring.substitute_character', "none"); + $save = mb_substitute_character(); + mb_substitute_character('none'); $data = mb_convert_encoding($data, 'UTF-8', $encoding); - ini_set('mbstring.substitute_character', $save); + mb_substitute_character($save); } // @todo Get iconv running in at least some environments if that is possible. elseif (function_exists('iconv') && $encoding != 'auto') { // fprintf(STDOUT, "iconv found\n"); diff --git a/libraries/html5php/HTML5/Serializer/OutputRules.php b/libraries/html5php/HTML5/Serializer/OutputRules.php index 4ad74f6..a22683c 100644 --- a/libraries/html5php/HTML5/Serializer/OutputRules.php +++ b/libraries/html5php/HTML5/Serializer/OutputRules.php @@ -185,7 +185,9 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface { $this->doctype(); if ($dom->documentElement) { - $this->traverser->node($dom->documentElement); + foreach ($dom->childNodes as $node) { + $this->traverser->node($node); + } $this->nl(); } } @@ -219,7 +221,11 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface $this->openTag($ele); if (Elements::isA($name, Elements::TEXT_RAW)) { foreach ($ele->childNodes as $child) { - $this->wr($child->data); + if ($child instanceof \DOMCharacterData) { + $this->wr($child->data); + } elseif ($child instanceof \DOMElement) { + $this->element($child); + } } } else { // Handle children. @@ -347,7 +353,7 @@ class OutputRules implements \Masterminds\HTML5\Serializer\RulesInterface // the XML, XMLNS, or XLink NS's should use the canonical // prefix. It seems that DOM does this for us already, but there // may be exceptions. - $name = $node->name; + $name = $node->nodeName; // Special handling for attributes in SVG and MathML. // Using if/elseif instead of switch because it's faster in PHP. diff --git a/libraries/html5php/HTML5/Serializer/Traverser.php b/libraries/html5php/HTML5/Serializer/Traverser.php index 9c700da..e95dc36 100644 --- a/libraries/html5php/HTML5/Serializer/Traverser.php +++ b/libraries/html5php/HTML5/Serializer/Traverser.php @@ -103,7 +103,6 @@ class Traverser case XML_CDATA_SECTION_NODE: $this->rules->cdata($node); break; - // FIXME: It appears that the parser doesn't do PI's. case XML_PI_NODE: $this->rules->processorInstruction($node); break; diff --git a/libraries/html5php/README.md b/libraries/html5php/README.md index bbe003f..505a85f 100644 --- a/libraries/html5php/README.md +++ b/libraries/html5php/README.md @@ -9,10 +9,13 @@ But after some initial refactoring work, we began a new parser. - Composer support - Event-based (SAX-like) parser - DOM tree builder -- Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)] +- Interoperability with [QueryPath](https://github.com/technosophos/querypath) - Runs on **PHP** 5.3.0 or newer and **HHVM** 3.2 or newer -[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master) +[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) +[![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) +[![Code Coverage](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/Masterminds/html5-php/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/Masterminds/html5-php/?branch=master) ## Installation @@ -36,7 +39,7 @@ install. ## Basic Usage -HTML5-PHP has a high-level API and a low-level API. +HTML5-PHP has a high-level API and a low-level API. Here is how you use the high-level `HTML5` library API: @@ -144,14 +147,14 @@ The serializer is broken into three parts: - The `OutputRules` contain the rules to turn DOM elements into strings. The rules are an implementation of the interface `RulesInterface` allowing for -different rule sets to be used. +different rule sets to be used. - The `Traverser`, which is a special-purpose tree walker. It visits each node node in the tree and uses the `OutputRules` to transform the node into a string. - `HTML5` manages the `Traverser` and stores the resultant data in the correct place. -The serializer (`save()`, `saveHTML()`) follows the +The serializer (`save()`, `saveHTML()`) follows the [section 8.9 of the HTML 5.0 spec](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments). So tags are serialized according to these rules: @@ -166,8 +169,8 @@ issues known issues that are not presently on the roadmap: - Namespaces: HTML5 only [supports a selected list of namespaces](http://www.w3.org/TR/html5/infrastructure.html#namespaces) and they do not operate in the same way as XML namespaces. A `:` has no special - meaning. - By default the parser does not support XML style namespaces via `:`; + meaning. + By default the parser does not support XML style namespaces via `:`; to enable the XML namespaces see the [XML Namespaces section](#xml-namespaces) - Scripts: This parser does not contain a JavaScript or a CSS interpreter. While one may be supplied, not all features will be @@ -184,13 +187,13 @@ issues known issues that are not presently on the roadmap: * Per the spec, many legacy tags are admitted and correctly handled, even though they are technically not part of HTML5. - Attribute names and values: Due to the implementation details of the - PHP implementation of DOM, attribute names that do not follow the + PHP implementation of DOM, attribute names that do not follow the XML 1.0 standard are not inserted into the DOM. (Effectively, they are ignored.) If you've got a clever fix for this, jump in! - Processor Instructions: The HTML5 spec does not allow processor instructions. We do. Since this is a server-side library, we think this is useful. And that means, dear reader, that in some cases you - can parse the HTML from a mixed PHP/HTML document. This, however, + can parse the HTML from a mixed PHP/HTML document. This, however, is an incidental feature, not a core feature. - HTML manifests: Unsupported. - PLAINTEXT: Unsupported. diff --git a/libraries/html5php/RELEASE.md b/libraries/html5php/RELEASE.md index e3d70d3..56d5fa1 100644 --- a/libraries/html5php/RELEASE.md +++ b/libraries/html5php/RELEASE.md @@ -1,7 +1,31 @@ # Release Notes + +2.2.2 (2016-10-22) + +- #116: In XML mode, tags are case sensitive +- #115: Fix PHP Notice in OutputRules +- #112: fix parsing of options of an optgroup +- #111: Adding test for the address tag + +2.2.1 (2016-05-10) + +- #109: Fixed issue where address tag could be written without closing tag (thanks sylus) + +2.2.0 (2016-04-11) + +- #105: Enable composer cache (for CI/CD) +- #100: Use mb_substitute_character inset of ini_set for environments where + ini_set is disable (e.g., shared hosting) +- #98: Allow link, meta, style tags in noscript tags +- #96: Fixed xml:href on svgs that use the "use" breaking +- #94: Counting UTF8 characters performance improvement +- #93: Use newer version of coveralls package +- #90: Remove duplicate test +- #87: Allow multiple root nodes + 2.1.2 (2015-06-07) - #82: Support for PHP7 -- #84: Improved boolean attribute handling +- #84: Improved boolean attribute handling 2.1.1 (2015-03-23) - #78: Fixes bug where unmatched entity like string drops everything after &. diff --git a/libraries/humble-http-agent/HumbleHttpAgent.php b/libraries/humble-http-agent/HumbleHttpAgent.php index 4f3b83d..605a6ad 100644 --- a/libraries/humble-http-agent/HumbleHttpAgent.php +++ b/libraries/humble-http-agent/HumbleHttpAgent.php @@ -7,11 +7,11 @@ * For environments which do not have these options, it reverts to standard sequential * requests (using file_get_contents()) * - * @version 1.6 - * @date 2015-06-05 + * @version 1.7 + * @date 2016-11-28 * @see http://devel-m6w6.rhcloud.com/mdref/http * @author Keyvan Minoukadeh - * @copyright 2011-2015 Keyvan Minoukadeh + * @copyright 2011-2016 Keyvan Minoukadeh * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 */ @@ -21,8 +21,8 @@ class HumbleHttpAgent const METHOD_CURL_MULTI = 2; const METHOD_FILE_GET_CONTENTS = 4; //const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'; - const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.92 Safari/535.2'; - const UA_PHP = 'PHP/5.5'; + const UA_BROWSER = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36'; + const UA_PHP = 'PHP/5.6'; const REF_GOOGLE = 'http://www.google.co.uk/url?sa=t&source=web&cd=1'; protected $requests = array(); @@ -103,20 +103,26 @@ class HumbleHttpAgent ) ); // HTTP cURL - $this->curlOptions = array( - CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], - CURLOPT_TIMEOUT => $this->requestOptions['timeout'] + if ($this->method === self::METHOD_CURL_MULTI) { + $this->curlOptions = array( + CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], + CURLOPT_TIMEOUT => $this->requestOptions['timeout'] ); + } // Use proxy? - if ($this->requestOptions['proxyhost']) { + if (isset($this->requestOptions['proxyhost']) && $this->requestOptions['proxyhost']) { // For file_get_contents (see http://stackoverflow.com/a/1336419/407938) $this->httpContext['http']['proxy'] = 'tcp://'.$this->requestOptions['proxyhost']; $this->httpContext['http']['request_fulluri'] = true; // For cURL (see http://stackoverflow.com/a/9247672/407938) - $this->curlOptions[CURLOPT_PROXY] = $this->requestOptions['proxyhost']; + if ($this->method === self::METHOD_CURL_MULTI) { + $this->curlOptions[CURLOPT_PROXY] = $this->requestOptions['proxyhost']; + } if (isset($this->requestOptions['proxyauth'])) { $this->httpContext['http']['header'] .= "Proxy-Authorization: Basic ".base64_encode($this->requestOptions['proxyauth'])."\r\n"; - $this->curlOptions[CURLOPT_PROXYUSERPWD] = $this->requestOptions['proxyauth']; + if ($this->method === self::METHOD_CURL_MULTI) { + $this->curlOptions[CURLOPT_PROXYUSERPWD] = $this->requestOptions['proxyauth']; + } } } } @@ -842,6 +848,7 @@ class HumbleHttpAgent } protected function getCookies($orig, $req_url) { + if (!isset($this->cookieJar[$orig])) return null; $jar = $this->cookieJar[$orig]; if (!isset($jar)) { return null; diff --git a/libraries/language-detect/LanguageDetect.php b/libraries/language-detect/LanguageDetect.php index 382d869..a6922fa 100644 --- a/libraries/language-detect/LanguageDetect.php +++ b/libraries/language-detect/LanguageDetect.php @@ -971,7 +971,7 @@ class Text_LanguageDetect // assume that ascii characters are the most common // so try it first for efficiency - if ($unicode <= $blocks[0][1]) { + if ($unicode <= hexdec($blocks[0][1])) { return $blocks[0]; } @@ -989,11 +989,11 @@ class Text_LanguageDetect while ($low <= $high) { $mid = floor(($low + $high) / 2); - if ($unicode < $blocks[$mid][0]) { + if ($unicode < hexdec($blocks[$mid][0])) { // if it's lower than the lower bound $high = $mid - 1; - } elseif ($unicode > $blocks[$mid][1]) { + } elseif ($unicode > hexdec($blocks[$mid][1])) { // if it's higher than the upper bound $low = $mid + 1; diff --git a/libraries/language-detect/LanguageDetect/Parser.php b/libraries/language-detect/LanguageDetect/Parser.php index fb0e1e2..e859218 100644 --- a/libraries/language-detect/LanguageDetect/Parser.php +++ b/libraries/language-detect/LanguageDetect/Parser.php @@ -102,7 +102,7 @@ class Text_LanguageDetect_Parser extends Text_LanguageDetect * @access private * @param string $string string to be parsed */ - function Text_LanguageDetect_Parser($string) { + function __construct($string) { $this->_string = $string; } diff --git a/libraries/readability/Readability.php b/libraries/readability/Readability.php index 8c38e3c..8a3fb73 100644 --- a/libraries/readability/Readability.php +++ b/libraries/readability/Readability.php @@ -4,6 +4,7 @@ * Based on readability.js version 1.7.1 (without multi-page support) * Updated to allow HTML5 parsing with html5lib * Updated with lightClean mode to preserve more images and youtube/vimeo/viddler embeds +* Updated to allow HTML5 parsing with Gumbo PHP * ------------------------------------------------------ * Original URL: http://lab.arc90.com/experiments/readability/js/readability.js * Arc90's project URL: http://lab.arc90.com/experiments/readability/ @@ -12,7 +13,7 @@ * More information: http://fivefilters.org/content-only/ * License: Apache License, Version 2.0 * Requires: PHP5 -* Date: 2015-06-01 +* Date: 2017-02-05 * * Differences between the PHP port and the original * ------------------------------------------------------ @@ -117,17 +118,23 @@ class Readability $html = preg_replace($this->regexps['replaceBrs'], '

    ', $html); $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); if (trim($html) == '') $html = ''; - if ($parser=='html5lib' || $parser=='html5php') { - if (version_compare(PHP_VERSION, '5.3.0') >= 0) { - //use Masterminds\HTML5; - $html5class = 'Masterminds\HTML5'; - $html5 = new $html5class(array('disable_html_ns' => true)); - $this->dom = $html5->loadHTML($html); - //echo $html5->saveHTML($this->dom);exit; - //$xpath = new DOMXPath($this->dom); - //$elems = $xpath->query("//a"); - //print_r($elems);exit; - } + // Check for the Gumbo PHP extension https://github.com/layershifter/gumbo-php + if ($parser=='gumbo') { + // Can we avoid this encoding/deocding step? Test on: + // http://www.medialens.org/index.php/alerts/alert-archive/2017/837-undermining-democracy-corporate-media-bias-on-jeremy-corbyn-boris-johnson-and-syria.html + $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); + $html = mb_convert_encoding($html, "UTF-8", 'HTML-ENTITIES'); + $this->dom = @Layershifter\Gumbo\Parser::load($html); + } elseif ($parser=='html5lib' || $parser=='html5php') { + //use Masterminds\HTML5; + //$html5class = 'Masterminds\HTML5'; + //$html5 = new $html5class(array('disable_html_ns' => true)); + $html5 = new Masterminds\HTML5(array('disable_html_ns' => true)); + $this->dom = $html5->loadHTML($html); + //echo $html5->saveHTML($this->dom);exit; + //$xpath = new DOMXPath($this->dom); + //$elems = $xpath->query("//a"); + //print_r($elems);exit; } if ($this->dom === null) { $this->dom = new DOMDocument(); diff --git a/libraries/simplepie/autoloader.php b/libraries/simplepie/autoloader.php index c16a8f8..fd7690d 100644 --- a/libraries/simplepie/autoloader.php +++ b/libraries/simplepie/autoloader.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie.php b/libraries/simplepie/library/SimplePie.php index b33c635..63ab10b 100644 --- a/libraries/simplepie/library/SimplePie.php +++ b/libraries/simplepie/library/SimplePie.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,8 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @version 1.4.3 + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -50,7 +50,7 @@ define('SIMPLEPIE_NAME', 'SimplePie'); /** * SimplePie Version */ -define('SIMPLEPIE_VERSION', '1.3.1'); +define('SIMPLEPIE_VERSION', '1.4.3'); /** * SimplePie Build @@ -445,6 +445,13 @@ class SimplePie */ public $feed_url; + /** + * @var string Original feed URL, or new feed URL iff HTTP 301 Moved Permanently + * @see SimplePie::subscribe_url() + * @access private + */ + public $permanent_url = null; + /** * @var object Instance of SimplePie_File to use as a feed * @see SimplePie::set_file() @@ -466,6 +473,13 @@ class SimplePie */ public $timeout = 10; + /** + * @var array Custom curl options + * @see SimplePie::set_curl_options() + * @access private + */ + public $curl_options = array(); + /** * @var bool Forces fsockopen() to be used for remote files instead * of cURL, even if a new enough version is installed @@ -489,6 +503,14 @@ class SimplePie */ public $cache = true; + /** + * @var bool Force SimplePie to fallback to expired cache, if enabled, + * when feed is unavailable. + * @see SimplePie::force_cache_fallback() + * @access private + */ + public $force_cache_fallback = false; + /** * @var int Cache duration (in seconds) * @see SimplePie::set_cache_duration() @@ -594,6 +616,12 @@ class SimplePie */ public $item_limit = 0; + /** + * @var bool Stores if last-modified and/or etag headers were sent with the + * request when checking a feed. + */ + public $check_modified = false; + /** * @var array Stores the default attributes to be stripped by strip_attributes(). * @see SimplePie::strip_attributes() @@ -601,6 +629,13 @@ class SimplePie */ public $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); + /** + * @var array Stores the default attributes to add to different tags by add_attributes(). + * @see SimplePie::add_attributes() + * @access private + */ + public $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); + /** * @var array Stores the default tags to be stripped by strip_htmltags(). * @see SimplePie::strip_htmltags() @@ -624,9 +659,9 @@ class SimplePie */ public function __construct() { - if (version_compare(PHP_VERSION, '5.2', '<')) + if (version_compare(PHP_VERSION, '5.3', '<')) { - trigger_error('PHP 4.x, 5.0 and 5.1 are no longer supported. Please upgrade to PHP 5.2 or newer.'); + trigger_error('Please upgrade to PHP 5.3 or newer.'); die(); } @@ -637,7 +672,7 @@ class SimplePie if (func_num_args() > 0) { $level = defined('E_USER_DEPRECATED') ? E_USER_DEPRECATED : E_USER_WARNING; - trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_location() directly.', $level); + trigger_error('Passing parameters to the constructor is no longer supported. Please use set_feed_url(), set_cache_location(), and set_cache_duration() directly.', $level); $args = func_get_args(); switch (count($args)) { @@ -728,6 +763,7 @@ class SimplePie else { $this->feed_url = $this->registry->call('Misc', 'fix_protocol', array($url, 1)); + $this->permanent_url = $this->feed_url; } } @@ -742,6 +778,7 @@ class SimplePie if ($file instanceof SimplePie_File) { $this->feed_url = $file->url; + $this->permanent_url = $this->feed_url; $this->file =& $file; return true; } @@ -780,6 +817,19 @@ class SimplePie $this->timeout = (int) $timeout; } + /** + * Set custom curl options + * + * This allows you to change default curl options + * + * @since 1.0 Beta 3 + * @param array $curl_options Curl options to add to default settings + */ + public function set_curl_options(array $curl_options = array()) + { + $this->curl_options = $curl_options; + } + /** * Force SimplePie to use fsockopen() instead of cURL * @@ -805,6 +855,21 @@ class SimplePie $this->cache = (bool) $enable; } + /** + * SimplePie to continue to fall back to expired cache, if enabled, when + * feed is unavailable. + * + * This tells SimplePie to ignore any file errors and fall back to cache + * instead. This only works if caching is enabled and cached content + * still exists. + + * @param bool $enable Force use of cache on fail. + */ + public function force_cache_fallback($enable = false) + { + $this->force_cache_fallback= (bool) $enable; + } + /** * Set the length of time (in seconds) that the contents of a feed will be * cached @@ -1073,6 +1138,7 @@ class SimplePie $this->strip_comments(false); $this->strip_htmltags(false); $this->strip_attributes(false); + $this->add_attributes(false); $this->set_image_handler(false); } } @@ -1119,16 +1185,25 @@ class SimplePie $this->sanitize->strip_attributes($attribs); } + public function add_attributes($attribs = '') + { + if ($attribs === '') + { + $attribs = $this->add_attributes; + } + $this->sanitize->add_attributes($attribs); + } + /** * Set the output encoding * * Allows you to override SimplePie's output to match that of your webpage. * This is useful for times when your webpages are not being served as - * UTF-8. This setting will be obeyed by {@see handle_content_type()}, and + * UTF-8. This setting will be obeyed by {@see handle_content_type()}, and * is similar to {@see set_input_encoding()}. * * It should be noted, however, that not all character encodings can support - * all characters. If your page is being served as ISO-8859-1 and you try + * all characters. If your page is being served as ISO-8859-1 and you try * to display a Japanese feed, you'll likely see garbled characters. * Because of this, it is highly recommended to ensure that your webpages * are served as UTF-8. @@ -1195,10 +1270,20 @@ class SimplePie $this->item_limit = (int) $limit; } + /** + * Enable throwing exceptions + * + * @param boolean $enable Should we throw exceptions, or use the old-style error property? + */ + public function enable_exceptions($enable = true) + { + $this->enable_exceptions = $enable; + } + /** * Initialize the feed object * - * This is what makes everything happen. Period. This is where all of the + * This is what makes everything happen. Period. This is where all of the * configuration options get processed, feeds are fetched, cached, and * parsed, and all of that other good stuff. * @@ -1209,6 +1294,7 @@ class SimplePie // Check absolute bare minimum requirements. if (!extension_loaded('xml') || !extension_loaded('pcre')) { + $this->error = 'XML or PCRE extensions not loaded!'; return false; } // Then check the xml extension is sane (i.e., libxml 2.7.x issue on PHP < 5.2.9 and libxml 2.7.0 to 2.7.2 on any version) if we don't have xmlreader. @@ -1236,7 +1322,7 @@ class SimplePie // Pass whatever was set with config options over to the sanitizer. // Pass the classes in for legacy support; new classes should use the registry instead $this->sanitize->pass_cache_data($this->cache, $this->cache_location, $this->cache_name_function, $this->registry->get_class('Cache')); - $this->sanitize->pass_file_data($this->registry->get_class('File'), $this->timeout, $this->useragent, $this->force_fsockopen); + $this->sanitize->pass_file_data($this->registry->get_class('File'), $this->timeout, $this->useragent, $this->force_fsockopen, $this->curl_options); if (!empty($this->multifeed_url)) { @@ -1265,6 +1351,7 @@ class SimplePie $this->error = null; $this->data = array(); + $this->check_modified = false; $this->multifeed_objects = array(); $cache = false; @@ -1289,6 +1376,13 @@ class SimplePie list($headers, $sniffed) = $fetched; } + + // Empty response check + if(empty($this->raw_data)){ + $this->error = "A feed could not be found at `$this->feed_url`. Empty body."; + $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); + return false; + } // Set up array of possible encodings $encodings = array(); @@ -1296,7 +1390,7 @@ class SimplePie // First check to see if input has been overridden. if ($this->input_encoding !== false) { - $encodings[] = $this->input_encoding; + $encodings[] = strtoupper($this->input_encoding); } $application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity'); @@ -1318,14 +1412,14 @@ class SimplePie { if (isset($headers['content-type']) && preg_match('/;\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) { - $encodings[] = $charset[1]; + $encodings[] = strtoupper($charset[1]); } $encodings[] = 'US-ASCII'; } // Text MIME-type default elseif (substr($sniffed, 0, 5) === 'text/') { - $encodings[] = 'US-ASCII'; + $encodings[] = 'UTF-8'; } } @@ -1347,12 +1441,12 @@ class SimplePie $parser = $this->registry->create('Parser'); // If it's parsed fine - if ($parser->parse($utf8_data, 'UTF-8')) + if ($parser->parse($utf8_data, 'UTF-8', $this->permanent_url)) { $this->data = $parser->get_data(); if (!($this->get_type() & ~SIMPLEPIE_TYPE_NONE)) { - $this->error = "A feed could not be found at $this->feed_url. This does not appear to be a valid RSS or Atom feed."; + $this->error = "A feed could not be found at `$this->feed_url`. This does not appear to be a valid RSS or Atom feed."; $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); return false; } @@ -1376,11 +1470,27 @@ class SimplePie if (isset($parser)) { // We have an error, just set SimplePie_Misc::error to it and quit - $this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column()); + $this->error = $this->feed_url; + $this->error .= sprintf(' is invalid XML, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column()); } else { - $this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.'; + $this->error = 'The data could not be converted to UTF-8.'; + if (!extension_loaded('mbstring') && !extension_loaded('iconv') && !class_exists('\UConverter')) { + $this->error .= ' You MUST have either the iconv, mbstring or intl (PHP 5.5+) extension installed and enabled.'; + } else { + $missingExtensions = array(); + if (!extension_loaded('iconv')) { + $missingExtensions[] = 'iconv'; + } + if (!extension_loaded('mbstring')) { + $missingExtensions[] = 'mbstring'; + } + if (!class_exists('\UConverter')) { + $missingExtensions[] = 'intl (PHP 5.5+)'; + } + $this->error .= ' Try installing/enabling the ' . implode(' or ', $missingExtensions) . ' extension.'; + } } $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); @@ -1436,7 +1546,10 @@ class SimplePie // Check if the cache has been updated elseif ($cache->mtime() + $this->cache_duration < time()) { - // If we have last-modified and/or etag set + // Want to know if we tried to send last-modified and/or etag headers + // when requesting this file. (Note that it's up to the file to + // support this, but we don't always send the headers either.) + $this->check_modified = true; if (isset($this->data['headers']['last-modified']) || isset($this->data['headers']['etag'])) { $headers = array( @@ -1451,18 +1564,28 @@ class SimplePie $headers['if-none-match'] = $this->data['headers']['etag']; } - $file = $this->registry->create('File', array($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen)); + $file = $this->registry->create('File', array($this->feed_url, $this->timeout/10, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options)); if ($file->success) { if ($file->status_code === 304) { + // Set raw_data to false here too, to signify that the cache + // is still valid. + $this->raw_data = false; $cache->touch(); return true; } } else { + $this->check_modified = false; + if($this->force_cache_fallback) + { + $cache->touch(); + return true; + } + unset($file); } } @@ -1493,7 +1616,7 @@ class SimplePie $headers = array( 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', ); - $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen)); + $file = $this->registry->create('File', array($this->feed_url, $this->timeout, 5, $headers, $this->useragent, $this->force_fsockopen, $this->curl_options)); } } // If the file connection has an error, set SimplePie::error to that and quit @@ -1510,19 +1633,75 @@ class SimplePie if (!$locate->is_feed($file)) { - // We need to unset this so that if SimplePie::set_file() has been called that object is untouched - unset($file); + $copyStatusCode = $file->status_code; + $copyContentType = $file->headers['content-type']; try { - if (!($file = $locate->find($this->autodiscovery, $this->all_discovered_feeds))) + $microformats = false; + if (function_exists('Mf2\parse')) { + // Check for both h-feed and h-entry, as both a feed with no entries + // and a list of entries without an h-feed wrapper are both valid. + $position = 0; + while ($position = strpos($file->body, 'h-feed', $position)) + { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($file->body, $start, 400); + if ($microformats = preg_match('/class="[^"]*h-feed/', $check)) + { + break; + } + $position += 7; + } + $position = 0; + while ($position = strpos($file->body, 'h-entry', $position)) + { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($file->body, $start, 400); + if ($microformats = preg_match('/class="[^"]*h-entry/', $check)) + { + break; + } + $position += 7; + } + } + // Now also do feed discovery, but if an h-entry was found don't + // overwrite the current value of file. + $discovered = $locate->find($this->autodiscovery, + $this->all_discovered_feeds); + if ($microformats) { - $this->error = "A feed could not be found at $this->feed_url. A feed with an invalid mime type may fall victim to this error, or " . SIMPLEPIE_NAME . " was unable to auto-discover it.. Use force_feed() if you are certain this URL is a real feed."; - $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); - return false; + if ($hub = $locate->get_rel_link('hub')) + { + $self = $locate->get_rel_link('self'); + $this->store_links($file, $hub, $self); + } + // Push the current file onto all_discovered feeds so the user can + // be shown this as one of the options. + if (isset($this->all_discovered_feeds)) { + $this->all_discovered_feeds[] = $file; + } + } + else + { + if ($discovered) + { + $file = $discovered; + } + else + { + // We need to unset this so that if SimplePie::set_file() has + // been called that object is untouched + unset($file); + $this->error = "A feed could not be found at `$this->feed_url`; the status code is `$copyStatusCode` and content-type is `$copyContentType`"; + $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, __FILE__, __LINE__)); + return false; + } } } catch (SimplePie_Exception $e) { + // We need to unset this so that if SimplePie::set_file() has been called that object is untouched + unset($file); // This is usually because DOMDocument doesn't exist $this->error = $e->getMessage(); $this->registry->call('Misc', 'error', array($this->error, E_USER_NOTICE, $e->getFile(), $e->getLine())); @@ -1543,7 +1722,7 @@ class SimplePie } $this->raw_data = $file->body; - + $this->permanent_url = $file->permanent_url; $headers = $file->headers; $sniffer = $this->registry->create('Content_Type_Sniffer', array(&$file)); $sniffed = $sniffer->get_type(); @@ -1729,26 +1908,44 @@ class SimplePie /** * Get the URL for the feed + * + * When the 'permanent' mode is enabled, returns the original feed URL, + * except in the case of an `HTTP 301 Moved Permanently` status response, + * in which case the location of the first redirection is returned. * - * May or may not be different from the URL passed to {@see set_feed_url()}, + * When the 'permanent' mode is disabled (default), + * may or may not be different from the URL passed to {@see set_feed_url()}, * depending on whether auto-discovery was used. * * @since Preview Release (previously called `get_feed_url()` since SimplePie 0.8.) - * @todo If we have a perm redirect we should return the new URL - * @todo When we make the above change, let's support as well + * @todo Support * @todo Also, |atom:link|@rel=self + * @param bool $permanent Permanent mode to return only the original URL or the first redirection + * iff it is a 301 redirection * @return string|null */ - public function subscribe_url() + public function subscribe_url($permanent = false) { - if ($this->feed_url !== null) + if ($permanent) { - return $this->sanitize($this->feed_url, SIMPLEPIE_CONSTRUCT_IRI); + if ($this->permanent_url !== null) + { + // sanitize encodes ampersands which are required when used in a url. + return str_replace('&', '&', + $this->sanitize($this->permanent_url, + SIMPLEPIE_CONSTRUCT_IRI)); + } } else { - return null; + if ($this->feed_url !== null) + { + return str_replace('&', '&', + $this->sanitize($this->feed_url, + SIMPLEPIE_CONSTRUCT_IRI)); + } } + return null; } /** @@ -1963,7 +2160,21 @@ class SimplePie */ public function sanitize($data, $type, $base = '') { - return $this->sanitize->sanitize($data, $type, $base); + try + { + return $this->sanitize->sanitize($data, $type, $base); + } + catch (SimplePie_Exception $e) + { + if (!$this->enable_exceptions) + { + $this->error = $e->getMessage(); + $this->registry->call('Misc', 'error', array($this->error, E_USER_WARNING, $e->getFile(), $e->getLine())); + return ''; + } + + throw $e; + } } /** @@ -2014,7 +2225,7 @@ class SimplePie * Get a category for the feed * * @since Unknown - * @param int $key The category that you want to return. Remember that arrays begin with 0, not 1 + * @param int $key The category that you want to return. Remember that arrays begin with 0, not 1 * @return SimplePie_Category|null */ public function get_category($key = 0) @@ -2099,7 +2310,7 @@ class SimplePie * Get an author for the feed * * @since 1.1 - * @param int $key The author that you want to return. Remember that arrays begin with 0, not 1 + * @param int $key The author that you want to return. Remember that arrays begin with 0, not 1 * @return SimplePie_Author|null */ public function get_author($key = 0) @@ -2197,7 +2408,7 @@ class SimplePie * Get a contributor for the feed * * @since 1.1 - * @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1 + * @param int $key The contrbutor that you want to return. Remember that arrays begin with 0, not 1 * @return SimplePie_Author|null */ public function get_contributor($key = 0) @@ -2283,7 +2494,7 @@ class SimplePie * Get a single link for the feed * * @since 1.0 (previously called `get_feed_link` since Preview Release, `get_feed_permalink()` since 0.8) - * @param int $key The link that you want to return. Remember that arrays begin with 0, not 1 + * @param int $key The link that you want to return. Remember that arrays begin with 0, not 1 * @param string $rel The relationship of the link to return * @return string|null Link URL */ @@ -2393,6 +2604,12 @@ class SimplePie { return $this->data['links'][$rel]; } + else if (isset($this->data['headers']['link']) && + preg_match('/<([^>]+)>; rel='.preg_quote($rel).'/', + $this->data['headers']['link'], $match)) + { + return array($match[1]); + } else { return null; @@ -2794,7 +3011,7 @@ class SimplePie * * @see get_item_quantity() * @since Beta 2 - * @param int $key The item that you want to return. Remember that arrays begin with 0, not 1 + * @param int $key The item that you want to return. Remember that arrays begin with 0, not 1 * @return SimplePie_Item|null */ public function get_item($key = 0) @@ -2821,7 +3038,7 @@ class SimplePie * @since Beta 2 * @param int $start Index to start at * @param int $end Number of items to return. 0 for all items after `$start` - * @return array|null List of {@see SimplePie_Item} objects + * @return SimplePie_Item[]|null List of {@see SimplePie_Item} objects */ public function get_items($start = 0, $end = 0) { @@ -2830,96 +3047,81 @@ class SimplePie if (!empty($this->multifeed_objects)) { $this->data['items'] = SimplePie::merge_items($this->multifeed_objects, $start, $end, $this->item_limit); + if (empty($this->data['items'])) + { + return array(); + } + return $this->data['items']; } - else + $this->data['items'] = array(); + if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'entry')) { - $this->data['items'] = array(); - if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'entry')) + $keys = array_keys($items); + foreach ($keys as $key) { - $keys = array_keys($items); - foreach ($keys as $key) - { - $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); - } + $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); } - if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry')) + } + if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'entry')) + { + $keys = array_keys($items); + foreach ($keys as $key) { - $keys = array_keys($items); - foreach ($keys as $key) - { - $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); - } + $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); } - if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item')) + } + if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'item')) + { + $keys = array_keys($items); + foreach ($keys as $key) { - $keys = array_keys($items); - foreach ($keys as $key) - { - $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); - } + $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); } - if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item')) + } + if ($items = $this->get_feed_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'item')) + { + $keys = array_keys($items); + foreach ($keys as $key) { - $keys = array_keys($items); - foreach ($keys as $key) - { - $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); - } + $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); } - if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item')) + } + if ($items = $this->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'item')) + { + $keys = array_keys($items); + foreach ($keys as $key) { - $keys = array_keys($items); - foreach ($keys as $key) - { - $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); - } + $this->data['items'][] = $this->registry->create('Item', array($this, $items[$key])); } } } - if (!empty($this->data['items'])) + if (empty($this->data['items'])) { - // If we want to order it by date, check if all items have a date, and then sort it - if ($this->order_by_date && empty($this->multifeed_objects)) - { - if (!isset($this->data['ordered_items'])) - { - $do_sort = true; - foreach ($this->data['items'] as $item) - { - if (!$item->get_date('U')) - { - $do_sort = false; - break; - } - } - $item = null; - $this->data['ordered_items'] = $this->data['items']; - if ($do_sort) - { - usort($this->data['ordered_items'], array(get_class($this), 'sort_items')); - } - } - $items = $this->data['ordered_items']; - } - else - { - $items = $this->data['items']; - } + return array(); + } - // Slice the data as desired - if ($end === 0) + if ($this->order_by_date) + { + if (!isset($this->data['ordered_items'])) { - return array_slice($items, $start); - } - else - { - return array_slice($items, $start, $end); - } + $this->data['ordered_items'] = $this->data['items']; + usort($this->data['ordered_items'], array(get_class($this), 'sort_items')); + } + $items = $this->data['ordered_items']; } else { - return array(); + $items = $this->data['items']; + } + // Slice the data as desired + if ($end === 0) + { + return array_slice($items, $start); + } + else + { + return array_slice($items, $start, $end); } } @@ -2992,7 +3194,19 @@ class SimplePie */ public static function sort_items($a, $b) { - return $a->get_date('U') <= $b->get_date('U'); + $a_date = $a->get_date('U'); + $b_date = $b->get_date('U'); + if ($a_date && $b_date) { + return $a_date > $b_date ? -1 : 1; + } + // Sort items without dates to the top. + if ($a_date) { + return 1; + } + if ($b_date) { + return -1; + } + return 0; } /** @@ -3025,20 +3239,7 @@ class SimplePie } } - $do_sort = true; - foreach ($items as $item) - { - if (!$item->get_date('U')) - { - $do_sort = false; - break; - } - } - $item = null; - if ($do_sort) - { - usort($items, array(get_class($urls[0]), 'sort_items')); - } + usort($items, array(get_class($urls[0]), 'sort_items')); if ($end === 0) { @@ -3055,4 +3256,42 @@ class SimplePie return array(); } } + + /** + * Store PubSubHubbub links as headers + * + * There is no way to find PuSH links in the body of a microformats feed, + * so they are added to the headers when found, to be used later by get_links. + * @param SimplePie_File $file + * @param string $hub + * @param string $self + */ + private function store_links(&$file, $hub, $self) { + if (isset($file->headers['link']['hub']) || + (isset($file->headers['link']) && + preg_match('/rel=hub/', $file->headers['link']))) + { + return; + } + + if ($hub) + { + if (isset($file->headers['link'])) + { + if ($file->headers['link'] !== '') + { + $file->headers['link'] = ', '; + } + } + else + { + $file->headers['link'] = ''; + } + $file->headers['link'] .= '<'.$hub.'>; rel=hub'; + if ($self) + { + $file->headers['link'] .= ', <'.$self.'>; rel=self'; + } + } + } } diff --git a/libraries/simplepie/library/SimplePie/Author.php b/libraries/simplepie/library/SimplePie/Author.php index bbf3812..e6768ff 100644 --- a/libraries/simplepie/library/SimplePie/Author.php +++ b/libraries/simplepie/library/SimplePie/Author.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Cache.php b/libraries/simplepie/library/SimplePie/Cache.php index 75586d7..d98cc65 100644 --- a/libraries/simplepie/library/SimplePie/Cache.php +++ b/libraries/simplepie/library/SimplePie/Cache.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -62,8 +61,10 @@ class SimplePie_Cache * @var array */ protected static $handlers = array( - 'mysql' => 'SimplePie_Cache_MySQL', - 'memcache' => 'SimplePie_Cache_Memcache', + 'mysql' => 'SimplePie_Cache_MySQL', + 'memcache' => 'SimplePie_Cache_Memcache', + 'memcached' => 'SimplePie_Cache_Memcached', + 'redis' => 'SimplePie_Cache_Redis' ); /** diff --git a/libraries/simplepie/library/SimplePie/Cache/Base.php b/libraries/simplepie/library/SimplePie/Cache/Base.php index 937e346..333fb05 100644 --- a/libraries/simplepie/library/SimplePie/Cache/Base.php +++ b/libraries/simplepie/library/SimplePie/Cache/Base.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Cache/DB.php b/libraries/simplepie/library/SimplePie/Cache/DB.php index ac509ae..7e8f775 100644 --- a/libraries/simplepie/library/SimplePie/Cache/DB.php +++ b/libraries/simplepie/library/SimplePie/Cache/DB.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Cache/File.php b/libraries/simplepie/library/SimplePie/Cache/File.php index 5797b3a..6ba6c5f 100644 --- a/libraries/simplepie/library/SimplePie/Cache/File.php +++ b/libraries/simplepie/library/SimplePie/Cache/File.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -136,11 +135,7 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base */ public function mtime() { - if (file_exists($this->name)) - { - return filemtime($this->name); - } - return false; + return @filemtime($this->name); } /** @@ -150,11 +145,7 @@ class SimplePie_Cache_File implements SimplePie_Cache_Base */ public function touch() { - if (file_exists($this->name)) - { - return touch($this->name); - } - return false; + return @touch($this->name); } /** diff --git a/libraries/simplepie/library/SimplePie/Cache/Memcache.php b/libraries/simplepie/library/SimplePie/Cache/Memcache.php index fd44780..5190eef 100644 --- a/libraries/simplepie/library/SimplePie/Cache/Memcache.php +++ b/libraries/simplepie/library/SimplePie/Cache/Memcache.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -95,10 +94,8 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base 'prefix' => 'simplepie_', ), ); - $parsed = SimplePie_Cache::parse_URL($location); - $this->options['host'] = empty($parsed['host']) ? $this->options['host'] : $parsed['host']; - $this->options['port'] = empty($parsed['port']) ? $this->options['port'] : $parsed['port']; - $this->options['extras'] = array_merge($this->options['extras'], $parsed['extras']); + $this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location)); + $this->name = $this->options['extras']['prefix'] . md5("$name:$type"); $this->cache = new Memcache(); @@ -147,7 +144,7 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base if ($data !== false) { - // essentially ignore the mtime because Memcache expires on it's own + // essentially ignore the mtime because Memcache expires on its own return time(); } @@ -165,7 +162,7 @@ class SimplePie_Cache_Memcache implements SimplePie_Cache_Base if ($data !== false) { - return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->duration); + return $this->cache->set($this->name, $data, MEMCACHE_COMPRESSED, (int) $this->options['extras']['timeout']); } return false; diff --git a/libraries/simplepie/library/SimplePie/Cache/Memcached.php b/libraries/simplepie/library/SimplePie/Cache/Memcached.php new file mode 100644 index 0000000..1f73b38 --- /dev/null +++ b/libraries/simplepie/library/SimplePie/Cache/Memcached.php @@ -0,0 +1,166 @@ +options = array( + 'host' => '127.0.0.1', + 'port' => 11211, + 'extras' => array( + 'timeout' => 3600, // one hour + 'prefix' => 'simplepie_', + ), + ); + $this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location)); + + $this->name = $this->options['extras']['prefix'] . md5("$name:$type"); + + $this->cache = new Memcached(); + $this->cache->addServer($this->options['host'], (int)$this->options['port']); + } + + /** + * Save data to the cache + * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property + * @return bool Successfulness + */ + public function save($data) { + if ($data instanceof SimplePie) { + $data = $data->data; + } + + return $this->setData(serialize($data)); + } + + /** + * Retrieve the data saved to the cache + * @return array Data for SimplePie::$data + */ + public function load() { + $data = $this->cache->get($this->name); + + if ($data !== false) { + return unserialize($data); + } + return false; + } + + /** + * Retrieve the last modified time for the cache + * @return int Timestamp + */ + public function mtime() { + $data = $this->cache->get($this->name . '_mtime'); + return (int) $data; + } + + /** + * Set the last modified time to the current time + * @return bool Success status + */ + public function touch() { + $data = $this->cache->get($this->name); + return $this->setData($data); + } + + /** + * Remove the cache + * @return bool Success status + */ + public function unlink() { + return $this->cache->delete($this->name, 0); + } + + /** + * Set the last modified time and data to Memcached + * @return bool Success status + */ + private function setData($data) { + + if ($data !== false) { + $this->cache->set($this->name . '_mtime', time(), (int)$this->options['extras']['timeout']); + return $this->cache->set($this->name, $data, (int)$this->options['extras']['timeout']); + } + + return false; + } +} diff --git a/libraries/simplepie/library/SimplePie/Cache/MySQL.php b/libraries/simplepie/library/SimplePie/Cache/MySQL.php index d53ebc1..8686b6c 100644 --- a/libraries/simplepie/library/SimplePie/Cache/MySQL.php +++ b/libraries/simplepie/library/SimplePie/Cache/MySQL.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -94,9 +93,11 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB 'path' => '', 'extras' => array( 'prefix' => '', + 'cache_purge_time' => 2592000 ), ); - $this->options = array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location)); + + $this->options = SimplePie_Misc::array_merge_recursive($this->options, SimplePie_Cache::parse_URL($location)); // Path is prefixed with a "/" $this->options['dbname'] = substr($this->options['path'], 1); @@ -130,16 +131,20 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB $query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'cache_data` (`id` TEXT CHARACTER SET utf8 NOT NULL, `items` SMALLINT NOT NULL DEFAULT 0, `data` BLOB NOT NULL, `mtime` INT UNSIGNED NOT NULL, UNIQUE (`id`(125)))'); if ($query === false) { + trigger_error("Can't create " . $this->options['extras']['prefix'] . "cache_data table, check permissions", E_USER_WARNING); $this->mysql = null; + return; } } if (!in_array($this->options['extras']['prefix'] . 'items', $db)) { - $query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` TEXT CHARACTER SET utf8 NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))'); + $query = $this->mysql->exec('CREATE TABLE `' . $this->options['extras']['prefix'] . 'items` (`feed_id` TEXT CHARACTER SET utf8 NOT NULL, `id` TEXT CHARACTER SET utf8 NOT NULL, `data` MEDIUMBLOB NOT NULL, `posted` INT UNSIGNED NOT NULL, INDEX `feed_id` (`feed_id`(125)))'); if ($query === false) { + trigger_error("Can't create " . $this->options['extras']['prefix'] . "items table, check permissions", E_USER_WARNING); $this->mysql = null; + return; } } } @@ -157,6 +162,17 @@ class SimplePie_Cache_MySQL extends SimplePie_Cache_DB return false; } + $query = $this->mysql->prepare('DELETE i, cd FROM `' . $this->options['extras']['prefix'] . 'cache_data` cd, ' . + '`' . $this->options['extras']['prefix'] . 'items` i ' . + 'WHERE cd.id = i.feed_id ' . + 'AND cd.mtime < (unix_timestamp() - :purge_time)'); + $query->bindValue(':purge_time', $this->options['extras']['cache_purge_time']); + + if (!$query->execute()) + { + return false; + } + if ($data instanceof SimplePie) { $data = clone $data; diff --git a/libraries/simplepie/library/SimplePie/Cache/Redis.php b/libraries/simplepie/library/SimplePie/Cache/Redis.php new file mode 100644 index 0000000..04d72c7 --- /dev/null +++ b/libraries/simplepie/library/SimplePie/Cache/Redis.php @@ -0,0 +1,166 @@ + + * @link http://galvani.cz/ + * @license http://www.opensource.org/licenses/bsd-license.php BSD License + * @version 0.2.9 + */ + + +/** + * Caches data to redis + * + * Registered for URLs with the "redis" protocol + * + * For example, `redis://localhost:6379/?timeout=3600&prefix=sp_&dbIndex=0` will + * connect to redis on `localhost` on port 6379. All tables will be + * prefixed with `simple_primary-` and data will expire after 3600 seconds + * + * @package SimplePie + * @subpackage Caching + * @uses Redis + */ +class SimplePie_Cache_Redis implements SimplePie_Cache_Base { + /** + * Redis instance + * + * @var \Redis + */ + protected $cache; + + /** + * Options + * + * @var array + */ + protected $options; + + /** + * Cache name + * + * @var string + */ + protected $name; + + /** + * Cache Data + * + * @var type + */ + protected $data; + + /** + * Create a new cache object + * + * @param string $location Location string (from SimplePie::$cache_location) + * @param string $name Unique ID for the cache + * @param string $type Either TYPE_FEED for SimplePie data, or TYPE_IMAGE for image data + */ + public function __construct($location, $name, $options = null) { + //$this->cache = \flow\simple\cache\Redis::getRedisClientInstance(); + $parsed = SimplePie_Cache::parse_URL($location); + $redis = new Redis(); + $redis->connect($parsed['host'], $parsed['port']); + $this->cache = $redis; + + if (!is_null($options) && is_array($options)) { + $this->options = $options; + } else { + $this->options = array ( + 'prefix' => 'rss:simple_primary:', + 'expire' => 0, + ); + } + + $this->name = $this->options['prefix'] . $name; + } + + /** + * @param \Redis $cache + */ + public function setRedisClient(\Redis $cache) { + $this->cache = $cache; + } + + /** + * Save data to the cache + * + * @param array|SimplePie $data Data to store in the cache. If passed a SimplePie object, only cache the $data property + * @return bool Successfulness + */ + public function save($data) { + if ($data instanceof SimplePie) { + $data = $data->data; + } + $response = $this->cache->set($this->name, serialize($data)); + if ($this->options['expire']) { + $this->cache->expire($this->name, $this->options['expire']); + } + + return $response; + } + + /** + * Retrieve the data saved to the cache + * + * @return array Data for SimplePie::$data + */ + public function load() { + $data = $this->cache->get($this->name); + + if ($data !== false) { + return unserialize($data); + } + return false; + } + + /** + * Retrieve the last modified time for the cache + * + * @return int Timestamp + */ + public function mtime() { + + $data = $this->cache->get($this->name); + + if ($data !== false) { + return time(); + } + + return false; + } + + /** + * Set the last modified time to the current time + * + * @return bool Success status + */ + public function touch() { + + $data = $this->cache->get($this->name); + + if ($data !== false) { + $return = $this->cache->set($this->name, $data); + if ($this->options['expire']) { + return $this->cache->expire($this->name, $this->ttl); + } + return $return; + } + + return false; + } + + /** + * Remove the cache + * + * @return bool Success status + */ + public function unlink() { + return $this->cache->set($this->name, null); + } + +} diff --git a/libraries/simplepie/library/SimplePie/Caption.php b/libraries/simplepie/library/SimplePie/Caption.php index 52922c5..abf07de 100644 --- a/libraries/simplepie/library/SimplePie/Caption.php +++ b/libraries/simplepie/library/SimplePie/Caption.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Category.php b/libraries/simplepie/library/SimplePie/Category.php index ad0407b..92d511e 100644 --- a/libraries/simplepie/library/SimplePie/Category.php +++ b/libraries/simplepie/library/SimplePie/Category.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php b/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php index 20d053d..ff35de6 100644 --- a/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php +++ b/libraries/simplepie/library/SimplePie/Content/Type/Sniffer.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -256,7 +255,7 @@ class SimplePie_Content_Type_Sniffer public function feed_or_html() { $len = strlen($this->file->body); - $pos = strspn($this->file->body, "\x09\x0A\x0D\x20"); + $pos = strspn($this->file->body, "\x09\x0A\x0D\x20\xEF\xBB\xBF"); while ($pos < $len) { diff --git a/libraries/simplepie/library/SimplePie/Copyright.php b/libraries/simplepie/library/SimplePie/Copyright.php index 57c535a..3f3d07d 100644 --- a/libraries/simplepie/library/SimplePie/Copyright.php +++ b/libraries/simplepie/library/SimplePie/Copyright.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Core.php b/libraries/simplepie/library/SimplePie/Core.php index 46d9966..c856ba3 100644 --- a/libraries/simplepie/library/SimplePie/Core.php +++ b/libraries/simplepie/library/SimplePie/Core.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Credit.php b/libraries/simplepie/library/SimplePie/Credit.php index d3a3442..9bad9ef 100644 --- a/libraries/simplepie/library/SimplePie/Credit.php +++ b/libraries/simplepie/library/SimplePie/Credit.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php b/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php index 069e8d8..de3f2cb 100644 --- a/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php +++ b/libraries/simplepie/library/SimplePie/Decode/HTML/Entities.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -169,7 +168,6 @@ class SimplePie_Decode_HTML_Entities case "\x09": case "\x0A": case "\x0B": - case "\x0B": case "\x0C": case "\x20": case "\x3C": diff --git a/libraries/simplepie/library/SimplePie/Enclosure.php b/libraries/simplepie/library/SimplePie/Enclosure.php index 5567437..15060e1 100644 --- a/libraries/simplepie/library/SimplePie/Enclosure.php +++ b/libraries/simplepie/library/SimplePie/Enclosure.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -451,7 +450,7 @@ class SimplePie_Enclosure /** * Get the duration of the enclosure * - * @param string $convert Convert seconds into hh:mm:ss + * @param bool $convert Convert seconds into hh:mm:ss * @return string|int|null 'hh:mm:ss' string if `$convert` was specified, otherwise integer (or null if none found) */ public function get_duration($convert = false) @@ -942,7 +941,7 @@ class SimplePie_Enclosure * - `height` (integer): The height of the embedded media. Accepts any * numeric pixel value (such as `360`) or `auto`. Defaults to `auto`, * and it is recommended that you use this default. - * - `loop` (boolean): Do you want the media to loop when its done? + * - `loop` (boolean): Do you want the media to loop when it's done? * Defaults to `false`. * - `mediaplayer` (string): The location of the included * `mediaplayer.swf` file. This allows for the playback of Flash Video diff --git a/libraries/simplepie/library/SimplePie/Exception.php b/libraries/simplepie/library/SimplePie/Exception.php index 73e104d..53c015e 100644 --- a/libraries/simplepie/library/SimplePie/Exception.php +++ b/libraries/simplepie/library/SimplePie/Exception.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.4-dev - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/File.php b/libraries/simplepie/library/SimplePie/File.php index b7d1a2a..e670e05 100644 --- a/libraries/simplepie/library/SimplePie/File.php +++ b/libraries/simplepie/library/SimplePie/File.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -64,8 +63,9 @@ class SimplePie_File var $redirects = 0; var $error; var $method = SIMPLEPIE_FILE_SOURCE_NONE; + var $permanent_url; - public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false) + public function __construct($url, $timeout = 10, $redirects = 5, $headers = null, $useragent = null, $force_fsockopen = false, $curl_options = array()) { if (class_exists('idna_convert')) { @@ -74,6 +74,7 @@ class SimplePie_File $url = SimplePie_Misc::compress_parse_url($parsed['scheme'], $idn->encode($parsed['authority']), $parsed['path'], $parsed['query'], $parsed['fragment']); } $this->url = $url; + $this->permanent_url = $url; $this->useragent = $useragent; if (preg_match('/^http(s)?:\/\//i', $url)) { @@ -102,6 +103,7 @@ class SimplePie_File curl_setopt($fp, CURLOPT_URL, $url); curl_setopt($fp, CURLOPT_HEADER, 1); curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1); + curl_setopt($fp, CURLOPT_FAILONERROR, 1); curl_setopt($fp, CURLOPT_TIMEOUT, $timeout); curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout); curl_setopt($fp, CURLOPT_REFERER, $url); @@ -112,6 +114,9 @@ class SimplePie_File curl_setopt($fp, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($fp, CURLOPT_MAXREDIRS, $redirects); } + foreach ($curl_options as $curl_param => $curl_value) { + curl_setopt($fp, $curl_param, $curl_value); + } $this->headers = curl_exec($fp); if (curl_errno($fp) === 23 || curl_errno($fp) === 61) @@ -126,7 +131,10 @@ class SimplePie_File } else { - $info = curl_getinfo($fp); + // Use the updated url provided by curl_getinfo after any redirects. + if ($info = curl_getinfo($fp)) { + $this->url = $info['url']; + } curl_close($fp); $this->headers = explode("\r\n\r\n", $this->headers, $info['redirect_count'] + 1); $this->headers = array_pop($this->headers); @@ -134,13 +142,16 @@ class SimplePie_File if ($parser->parse()) { $this->headers = $parser->headers; - $this->body = $parser->body; + $this->body = trim($parser->body); $this->status_code = $parser->status_code; if ((in_array($this->status_code, array(300, 301, 302, 303, 307)) || $this->status_code > 307 && $this->status_code < 400) && isset($this->headers['location']) && $this->redirects < $redirects) { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); - return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); + $previousStatusCode = $this->status_code; + $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); + $this->permanent_url = ($previousStatusCode == 301) ? $location : $url; + return; } } } @@ -222,7 +233,10 @@ class SimplePie_File { $this->redirects++; $location = SimplePie_Misc::absolutize_url($this->headers['location'], $url); - return $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); + $previousStatusCode = $this->status_code; + $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen); + $this->permanent_url = ($previousStatusCode == 301) ? $location : $url; + return; } if (isset($this->headers['content-encoding'])) { @@ -239,7 +253,7 @@ class SimplePie_File } else { - $this->body = $decoder->data; + $this->body = trim($decoder->data); } break; @@ -282,7 +296,7 @@ class SimplePie_File else { $this->method = SIMPLEPIE_FILE_SOURCE_LOCAL | SIMPLEPIE_FILE_SOURCE_FILE_GET_CONTENTS; - if (!$this->body = file_get_contents($url)) + if (empty($url) || !($this->body = trim(file_get_contents($url)))) { $this->error = 'file_get_contents could not read the file'; $this->success = false; diff --git a/libraries/simplepie/library/SimplePie/HTTP/Parser.php b/libraries/simplepie/library/SimplePie/HTTP/Parser.php index bff2222..63ae1e0 100644 --- a/libraries/simplepie/library/SimplePie/HTTP/Parser.php +++ b/libraries/simplepie/library/SimplePie/HTTP/Parser.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/IRI.php b/libraries/simplepie/library/SimplePie/IRI.php index d3198c0..2b3fbaf 100644 --- a/libraries/simplepie/library/SimplePie/IRI.php +++ b/libraries/simplepie/library/SimplePie/IRI.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -259,6 +258,15 @@ class SimplePie_IRI $this->set_iri($iri); } + /** + * Clean up + */ + public function __destruct() { + $this->set_iri(null, true); + $this->set_path(null, true); + $this->set_authority(null, true); + } + /** * Create a new IRI object by resolving a relative IRI * @@ -768,24 +776,20 @@ class SimplePie_IRI */ public function is_valid() { - $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; - if ($this->ipath !== '' && - ( - $isauthority && ( - $this->ipath[0] !== '/' || - substr($this->ipath, 0, 2) === '//' - ) || - ( - $this->scheme === null && - !$isauthority && - strpos($this->ipath, ':') !== false && - (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) - ) - ) - ) - { - return false; - } + if ($this->ipath === '') return true; + + $isauthority = $this->iuserinfo !== null || $this->ihost !== null || + $this->port !== null; + if ($isauthority && $this->ipath[0] === '/') return true; + + if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) return false; + + // Relative urls cannot have a colon in the first path segment (and the + // slashes themselves are not included so skip the first character). + if (!$this->scheme && !$isauthority && + strpos($this->ipath, ':') !== false && + strpos($this->ipath, '/', 1) !== false && + strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) return false; return true; } @@ -797,9 +801,14 @@ class SimplePie_IRI * @param string $iri * @return bool */ - public function set_iri($iri) + public function set_iri($iri, $clear_cache = false) { static $cache; + if ($clear_cache) + { + $cache = null; + return; + } if (!$cache) { $cache = array(); @@ -879,9 +888,14 @@ class SimplePie_IRI * @param string $authority * @return bool */ - public function set_authority($authority) + public function set_authority($authority, $clear_cache = false) { static $cache; + if ($clear_cache) + { + $cache = null; + return; + } if (!$cache) $cache = array(); @@ -1049,9 +1063,14 @@ class SimplePie_IRI * @param string $ipath * @return bool */ - public function set_path($ipath) + public function set_path($ipath, $clear_cache = false) { static $cache; + if ($clear_cache) + { + $cache = null; + return; + } if (!$cache) { $cache = array(); diff --git a/libraries/simplepie/library/SimplePie/Item.php b/libraries/simplepie/library/SimplePie/Item.php index a77574b..3979b8f 100644 --- a/libraries/simplepie/library/SimplePie/Item.php +++ b/libraries/simplepie/library/SimplePie/Item.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -203,14 +202,13 @@ class SimplePie_Item * * Uses ``, ``, `` or the `about` attribute * for RDF. If none of these are supplied (or `$hash` is true), creates an - * MD5 hash based on the permalink and title. If either of those are not - * supplied, creates a hash based on the full feed data. + * MD5 hash based on the permalink, title and content. * * @since Beta 2 * @param boolean $hash Should we force using a hash instead of the supplied ID? * @return string */ - public function get_id($hash = false) + public function get_id($hash = false, $fn = '') { if (!$hash) { @@ -238,23 +236,10 @@ class SimplePie_Item { return $this->sanitize($this->data['attribs'][SIMPLEPIE_NAMESPACE_RDF]['about'], SIMPLEPIE_CONSTRUCT_TEXT); } - elseif (($return = $this->get_permalink()) !== null) - { - return $return; - } - elseif (($return = $this->get_title()) !== null) - { - return $return; - } - } - if ($this->get_permalink() !== null || $this->get_title() !== null) - { - return md5($this->get_permalink() . $this->get_title()); - } - else - { - return md5(serialize($this->data)); } + if ($fn === '' || !is_callable($fn)) $fn = 'md5'; + return call_user_func($fn, + $this->get_permalink().$this->get_title().$this->get_content()); } /** @@ -322,41 +307,50 @@ class SimplePie_Item */ public function get_description($description_only = false) { - if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) + if (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'summary')) && + ($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'summary')) && + ($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10, 'description')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_MAYBE_HTML, $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'description')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'description')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'description')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'summary')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'subtitle')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_090, 'description')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML); + return $return; } elseif (!$description_only) @@ -385,17 +379,20 @@ class SimplePie_Item */ public function get_content($content_only = false) { - if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) + if (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'content')) && + ($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_10_content_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_10_content_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_03, 'content')) && + ($return = $this->sanitize($tags[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($tags[0]['attribs'])), $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], $this->registry->call('Misc', 'atom_03_construct_type', array($return[0]['attribs'])), $this->get_base($return[0])); + return $return; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) + elseif (($tags = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_10_MODULES_CONTENT, 'encoded')) && + ($return = $this->sanitize($tags[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($tags[0])))) { - return $this->sanitize($return[0]['data'], SIMPLEPIE_CONSTRUCT_HTML, $this->get_base($return[0])); + return $return; } elseif (!$content_only) { @@ -406,6 +403,30 @@ class SimplePie_Item return null; } } + + /** + * Get the media:thumbnail of the item + * + * Uses `` + * + * + * @return array|null + */ + public function get_thumbnail() + { + if (!isset($this->data['thumbnail'])) + { + if ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_MEDIARSS, 'thumbnail')) + { + $this->data['thumbnail'] = $return[0]['attribs']['']; + } + else + { + $this->data['thumbnail'] = null; + } + } + return $this->data['thumbnail']; + } /** * Get a category for the item @@ -433,7 +454,7 @@ class SimplePie_Item * Uses ``, `` or `` * * @since Beta 3 - * @return array|null List of {@see SimplePie_Category} objects + * @return SimplePie_Category[]|null List of {@see SimplePie_Category} objects */ public function get_categories() { @@ -446,15 +467,15 @@ class SimplePie_Item $label = null; if (isset($category['attribs']['']['term'])) { - $term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_TEXT); + $term = $this->sanitize($category['attribs']['']['term'], SIMPLEPIE_CONSTRUCT_HTML); } if (isset($category['attribs']['']['scheme'])) { - $scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_TEXT); + $scheme = $this->sanitize($category['attribs']['']['scheme'], SIMPLEPIE_CONSTRUCT_HTML); } if (isset($category['attribs']['']['label'])) { - $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_TEXT); + $label = $this->sanitize($category['attribs']['']['label'], SIMPLEPIE_CONSTRUCT_HTML); } $categories[] = $this->registry->create('Category', array($term, $scheme, $label)); } @@ -462,10 +483,10 @@ class SimplePie_Item { // This is really the label, but keep this as the term also for BC. // Label will also work on retrieving because that falls back to term. - $term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT); + $term = $this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML); if (isset($category['attribs']['']['domain'])) { - $scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_TEXT); + $scheme = $this->sanitize($category['attribs']['']['domain'], SIMPLEPIE_CONSTRUCT_HTML); } else { @@ -475,11 +496,11 @@ class SimplePie_Item } foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'subject') as $category) { - $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); + $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null)); } foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'subject') as $category) { - $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); + $categories[] = $this->registry->create('Category', array($this->sanitize($category['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null)); } if (!empty($categories)) @@ -616,7 +637,7 @@ class SimplePie_Item $email = null; if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'])) { - $name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + $name = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML); } if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['uri'][0]['data'])) { @@ -624,7 +645,7 @@ class SimplePie_Item } if (isset($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'])) { - $email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + $email = $this->sanitize($author['child'][SIMPLEPIE_NAMESPACE_ATOM_10]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML); } if ($name !== null || $email !== null || $uri !== null) { @@ -638,7 +659,7 @@ class SimplePie_Item $email = null; if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'])) { - $name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + $name = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['name'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML); } if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['url'][0]['data'])) { @@ -646,7 +667,7 @@ class SimplePie_Item } if (isset($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'])) { - $email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_TEXT); + $email = $this->sanitize($author[0]['child'][SIMPLEPIE_NAMESPACE_ATOM_03]['email'][0]['data'], SIMPLEPIE_CONSTRUCT_HTML); } if ($name !== null || $email !== null || $url !== null) { @@ -655,19 +676,19 @@ class SimplePie_Item } if ($author = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'author')) { - $authors[] = $this->registry->create('Author', array(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_TEXT))); + $authors[] = $this->registry->create('Author', array(null, null, $this->sanitize($author[0]['data'], SIMPLEPIE_CONSTRUCT_HTML))); } foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'creator') as $author) { - $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); + $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null)); } foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'creator') as $author) { - $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); + $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null)); } foreach ((array) $this->get_item_tags(SIMPLEPIE_NAMESPACE_ITUNES, 'author') as $author) { - $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_TEXT), null, null)); + $authors[] = $this->registry->create('Author', array($this->sanitize($author['data'], SIMPLEPIE_CONSTRUCT_HTML), null, null)); } if (!empty($authors)) @@ -738,6 +759,18 @@ class SimplePie_Item { $this->data['date']['raw'] = $return[0]['data']; } + elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'pubDate')) + { + $this->data['date']['raw'] = $return[0]['data']; + } + elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'date')) + { + $this->data['date']['raw'] = $return[0]['data']; + } + elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'date')) + { + $this->data['date']['raw'] = $return[0]['data']; + } elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_ATOM_10, 'updated')) { $this->data['date']['raw'] = $return[0]['data']; @@ -754,18 +787,6 @@ class SimplePie_Item { $this->data['date']['raw'] = $return[0]['data']; } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'pubDate')) - { - $this->data['date']['raw'] = $return[0]['data']; - } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_11, 'date')) - { - $this->data['date']['raw'] = $return[0]['data']; - } - elseif ($return = $this->get_item_tags(SIMPLEPIE_NAMESPACE_DC_10, 'date')) - { - $this->data['date']['raw'] = $return[0]['data']; - } if (!empty($this->data['date']['raw'])) { @@ -821,7 +842,7 @@ class SimplePie_Item if (!empty($this->data['updated']['raw'])) { $parser = $this->registry->call('Parse_Date', 'get'); - $this->data['updated']['parsed'] = $parser->parse($this->data['date']['raw']); + $this->data['updated']['parsed'] = $parser->parse($this->data['updated']['raw']); } else { @@ -1080,8 +1101,8 @@ class SimplePie_Item * * @since Beta 2 * @todo Add support for end-user defined sorting of enclosures by type/handler (so we can prefer the faster-loading FLV over MP4). - * @todo If an element exists at a level, but it's value is empty, we should fall back to the value from the parent (if it exists). - * @return array|null List of SimplePie_Enclosure items + * @todo If an element exists at a level, but its value is empty, we should fall back to the value from the parent (if it exists). + * @return SimplePie_Enclosure[]|null List of SimplePie_Enclosure items */ public function get_enclosures() { @@ -2658,7 +2679,9 @@ class SimplePie_Item // PLAYER if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'])) { - $player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); + if (isset($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'])) { + $player = $this->sanitize($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['player'][0]['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); + } } else { @@ -2733,7 +2756,9 @@ class SimplePie_Item { foreach ($content['child'][SIMPLEPIE_NAMESPACE_MEDIARSS]['thumbnail'] as $thumbnail) { - $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); + if (isset($thumbnail['attribs']['']['url'])) { + $thumbnails[] = $this->sanitize($thumbnail['attribs']['']['url'], SIMPLEPIE_CONSTRUCT_IRI); + } } if (is_array($thumbnails)) { diff --git a/libraries/simplepie/library/SimplePie/Locator.php b/libraries/simplepie/library/SimplePie/Locator.php index 57e910c..bc314c2 100644 --- a/libraries/simplepie/library/SimplePie/Locator.php +++ b/libraries/simplepie/library/SimplePie/Locator.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -121,34 +120,41 @@ class SimplePie_Locator { if ($type & SIMPLEPIE_LOCATOR_LOCAL_EXTENSION && $working = $this->extension($this->local)) { - return $working; + return $working[0]; } if ($type & SIMPLEPIE_LOCATOR_LOCAL_BODY && $working = $this->body($this->local)) { - return $working; + return $working[0]; } if ($type & SIMPLEPIE_LOCATOR_REMOTE_EXTENSION && $working = $this->extension($this->elsewhere)) { - return $working; + return $working[0]; } if ($type & SIMPLEPIE_LOCATOR_REMOTE_BODY && $working = $this->body($this->elsewhere)) { - return $working; + return $working[0]; } } return null; } - public function is_feed($file) + public function is_feed($file, $check_html = false) { if ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE) { $sniffer = $this->registry->create('Content_Type_Sniffer', array($file)); $sniffed = $sniffer->get_type(); - if (in_array($sniffed, array('application/rss+xml', 'application/rdf+xml', 'text/rdf', 'application/atom+xml', 'text/xml', 'application/xml'))) + $mime_types = array('application/rss+xml', 'application/rdf+xml', + 'text/rdf', 'application/atom+xml', 'text/xml', + 'application/xml', 'application/x-rss+xml'); + if ($check_html) + { + $mime_types[] = 'text/html'; + } + if (in_array($sniffed, $mime_types)) { return true; } @@ -226,7 +232,7 @@ class SimplePie_Locator } if ($link->hasAttribute('href') && $link->hasAttribute('rel')) { - $rel = array_unique($this->registry->call('Misc', 'space_seperated_tokens', array(strtolower($link->getAttribute('rel'))))); + $rel = array_unique($this->registry->call('Misc', 'space_separated_tokens', array(strtolower($link->getAttribute('rel'))))); $line = method_exists($link, 'getLineNo') ? $link->getLineNo() : 1; if ($this->base_location < $line) @@ -242,14 +248,14 @@ class SimplePie_Locator continue; } - if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) + if (!in_array($href, $done) && in_array('feed', $rel) || (in_array('alternate', $rel) && !in_array('stylesheet', $rel) && $link->hasAttribute('type') && in_array(strtolower($this->registry->call('Misc', 'parse_mime', array($link->getAttribute('type')))), array('text/html', 'application/rss+xml', 'application/atom+xml'))) && !isset($feeds[$href])) { $this->checked_feeds++; $headers = array( 'Accept' => 'application/atom+xml, application/rss+xml, application/rdf+xml;q=0.9, application/xml;q=0.8, text/xml;q=0.8, text/html;q=0.7, unknown/unknown;q=0.1, application/unknown;q=0.1, */*;q=0.1', ); $feed = $this->registry->create('File', array($href, $this->timeout, 5, $headers, $this->useragent)); - if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) + if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed, true)) { $feeds[$href] = $feed; } @@ -275,9 +281,9 @@ class SimplePie_Locator { $href = trim($link->getAttribute('href')); $parsed = $this->registry->call('Misc', 'parse_url', array($href)); - if ($parsed['scheme'] === '' || preg_match('/^(http(s)|feed)?$/i', $parsed['scheme'])) + if ($parsed['scheme'] === '' || preg_match('/^(https?|feed)?$/i', $parsed['scheme'])) { - if ($this->base_location < $link->getLineNo()) + if (method_exists($link, 'getLineNo') && $this->base_location < $link->getLineNo()) { $href = $this->registry->call('Misc', 'absolutize_url', array(trim($link->getAttribute('href')), $this->base)); } @@ -312,6 +318,57 @@ class SimplePie_Locator return null; } + public function get_rel_link($rel) + { + if ($this->dom === null) + { + throw new SimplePie_Exception('DOMDocument not found, unable to use '. + 'locator'); + } + if (!class_exists('DOMXpath')) + { + throw new SimplePie_Exception('DOMXpath not found, unable to use '. + 'get_rel_link'); + } + + $xpath = new DOMXpath($this->dom); + $query = '//a[@rel and @href] | //link[@rel and @href]'; + foreach ($xpath->query($query) as $link) + { + $href = trim($link->getAttribute('href')); + $parsed = $this->registry->call('Misc', 'parse_url', array($href)); + if ($parsed['scheme'] === '' || + preg_match('/^https?$/i', $parsed['scheme'])) + { + if (method_exists($link, 'getLineNo') && + $this->base_location < $link->getLineNo()) + { + $href = + $this->registry->call('Misc', 'absolutize_url', + array(trim($link->getAttribute('href')), + $this->base)); + } + else + { + $href = + $this->registry->call('Misc', 'absolutize_url', + array(trim($link->getAttribute('href')), + $this->http_base)); + } + if ($href === false) + { + return null; + } + $rel_values = explode(' ', strtolower($link->getAttribute('rel'))); + if (in_array($rel, $rel_values)) + { + return $href; + } + } + } + return null; + } + public function extension(&$array) { foreach ($array as $key => $value) @@ -330,7 +387,7 @@ class SimplePie_Locator $feed = $this->registry->create('File', array($value, $this->timeout, 5, $headers, $this->useragent)); if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) { - return $feed; + return array($feed); } else { @@ -358,7 +415,7 @@ class SimplePie_Locator $feed = $this->registry->create('File', array($value, $this->timeout, 5, null, $this->useragent)); if ($feed->success && ($feed->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($feed->status_code === 200 || $feed->status_code > 206 && $feed->status_code < 300)) && $this->is_feed($feed)) { - return $feed; + return array($feed); } else { diff --git a/libraries/simplepie/library/SimplePie/Misc.php b/libraries/simplepie/library/SimplePie/Misc.php index 5d7367f..2e3107e 100644 --- a/libraries/simplepie/library/SimplePie/Misc.php +++ b/libraries/simplepie/library/SimplePie/Misc.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -124,7 +123,7 @@ class SimplePie_Misc { $attribs[$j][2] = $attribs[$j][1]; } - $return[$i]['attribs'][strtolower($attribs[$j][1])]['data'] = SimplePie_Misc::entities_decode(end($attribs[$j]), 'UTF-8'); + $return[$i]['attribs'][strtolower($attribs[$j][1])]['data'] = SimplePie_Misc::entities_decode(end($attribs[$j])); } } } @@ -138,7 +137,7 @@ class SimplePie_Misc foreach ($element['attribs'] as $key => $value) { $key = strtolower($key); - $full .= " $key=\"" . htmlspecialchars($value['data']) . '"'; + $full .= " $key=\"" . htmlspecialchars($value['data'], ENT_COMPAT, 'UTF-8') . '"'; } if ($element['self_closing']) { @@ -224,6 +223,23 @@ class SimplePie_Misc } } + public static function array_merge_recursive($array1, $array2) + { + foreach ($array2 as $key => $value) + { + if (is_array($value)) + { + $array1[$key] = SimplePie_Misc::array_merge_recursive($array1[$key], $value); + } + else + { + $array1[$key] = $value; + } + } + + return $array1; + } + public static function parse_url($url) { $iri = new SimplePie_IRI($url); @@ -317,11 +333,16 @@ class SimplePie_Misc { return $return; } - // This is last, as behaviour of this varies with OS userland and PHP version + // This is third, as behaviour of this varies with OS userland and PHP version elseif (function_exists('iconv') && ($return = SimplePie_Misc::change_encoding_iconv($data, $input, $output))) { return $return; } + // This is last, as behaviour of this varies with OS userland and PHP version + elseif (class_exists('\UConverter') && ($return = SimplePie_Misc::change_encoding_uconverter($data, $input, $output))) + { + return $return; + } // If we can't do anything, just fail else { @@ -372,6 +393,17 @@ class SimplePie_Misc return @iconv($input, $output, $data); } + /** + * @param string $data + * @param string $input + * @param string $output + * @return string|false + */ + protected static function change_encoding_uconverter($data, $input, $output) + { + return @\UConverter::transcode($data, $output, $input); + } + /** * Normalize an encoding name * @@ -1926,7 +1958,7 @@ class SimplePie_Misc return (bool) preg_match('/^([A-Za-z0-9\-._~\x{A0}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFEF}\x{10000}-\x{1FFFD}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}\x{40000}-\x{4FFFD}\x{50000}-\x{5FFFD}\x{60000}-\x{6FFFD}\x{70000}-\x{7FFFD}\x{80000}-\x{8FFFD}\x{90000}-\x{9FFFD}\x{A0000}-\x{AFFFD}\x{B0000}-\x{BFFFD}\x{C0000}-\x{CFFFD}\x{D0000}-\x{DFFFD}\x{E1000}-\x{EFFFD}!$&\'()*+,;=@]|(%[0-9ABCDEF]{2}))+$/u', $string); } - public static function space_seperated_tokens($string) + public static function space_separated_tokens($string) { $space_characters = "\x20\x09\x0A\x0B\x0C\x0D"; $string_length = strlen($string); diff --git a/libraries/simplepie/library/SimplePie/Net/IPv6.php b/libraries/simplepie/library/SimplePie/Net/IPv6.php index da80d8a..47658af 100644 --- a/libraries/simplepie/library/SimplePie/Net/IPv6.php +++ b/libraries/simplepie/library/SimplePie/Net/IPv6.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Parse/Date.php b/libraries/simplepie/library/SimplePie/Parse/Date.php index d51f500..1f21566 100644 --- a/libraries/simplepie/library/SimplePie/Parse/Date.php +++ b/libraries/simplepie/library/SimplePie/Parse/Date.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -173,7 +172,7 @@ class SimplePie_Parse_Date 'aug' => 8, 'august' => 8, 'sep' => 9, - 'september' => 8, + 'september' => 9, 'oct' => 10, 'october' => 10, 'nov' => 11, @@ -331,6 +330,7 @@ class SimplePie_Parse_Date 'CCT' => 23400, 'CDT' => -18000, 'CEDT' => 7200, + 'CEST' => 7200, 'CET' => 3600, 'CGST' => -7200, 'CGT' => -10800, @@ -630,7 +630,7 @@ class SimplePie_Parse_Date /** * Parse a superset of W3C-DTF (allows hyphens and colons to be omitted, as * well as allowing any of upper or lower case "T", horizontal tabs, or - * spaces to be used as the time seperator (including more than one)) + * spaces to be used as the time separator (including more than one)) * * @access protected * @return int Timestamp @@ -690,7 +690,7 @@ class SimplePie_Parse_Date } // Convert the number of seconds to an integer, taking decimals into account - $second = round($match[6] + $match[7] / pow(10, strlen($match[7]))); + $second = round((int)$match[6] + (int)$match[7] / pow(10, strlen($match[7]))); return gmmktime($match[4], $match[5], $second, $match[2], $match[3], $match[1]) - $timezone; } @@ -720,7 +720,7 @@ class SimplePie_Parse_Date { $output .= substr($string, $position, $pos - $position); $position = $pos + 1; - if ($string[$pos - 1] !== '\\') + if ($pos === 0 || $string[$pos - 1] !== '\\') { $depth++; while ($depth && $position < $length) diff --git a/libraries/simplepie/library/SimplePie/Parser.php b/libraries/simplepie/library/SimplePie/Parser.php index d698552..17139ab 100644 --- a/libraries/simplepie/library/SimplePie/Parser.php +++ b/libraries/simplepie/library/SimplePie/Parser.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -75,8 +74,31 @@ class SimplePie_Parser $this->registry = $registry; } - public function parse(&$data, $encoding) + public function parse(&$data, $encoding, $url = '') { + if (function_exists('Mf2\parse')) { + // Check for both h-feed and h-entry, as both a feed with no entries + // and a list of entries without an h-feed wrapper are both valid. + $position = 0; + while ($position = strpos($data, 'h-feed', $position)) { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($data, $start, 400); + if (preg_match('/class="[^"]*h-feed/', $check)) { + return $this->parse_microformats($data, $url); + } + $position += 7; + } + $position = 0; + while ($position = strpos($data, 'h-entry', $position)) { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($data, $start, 400); + if (preg_match('/class="[^"]*h-entry/', $check)) { + return $this->parse_microformats($data, $url); + } + $position += 7; + } + } + // Use UTF-8 if we get passed US-ASCII, as every US-ASCII character is a UTF-8 character if (strtoupper($encoding) === 'US-ASCII') { @@ -120,7 +142,7 @@ class SimplePie_Parser if ($declaration->parse()) { $data = substr($data, $pos + 2); - $data = 'version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' . $data; + $data = 'version . '" encoding="' . $encoding . '" standalone="' . (($declaration->standalone) ? 'yes' : 'no') . '"?>' ."\n". $this->declare_html_entities() . $data; } else { @@ -404,4 +426,231 @@ class SimplePie_Parser } return $cache[$string]; } -} + + private function parse_hcard($data, $category = false) { + $name = ''; + $link = ''; + // Check if h-card is set and pass that information on in the link. + if (isset($data['type']) && in_array('h-card', $data['type'])) { + if (isset($data['properties']['name'][0])) { + $name = $data['properties']['name'][0]; + } + if (isset($data['properties']['url'][0])) { + $link = $data['properties']['url'][0]; + if ($name === '') { + $name = $link; + } + else { + // can't have commas in categories. + $name = str_replace(',', '', $name); + } + $person_tag = $category ? '' : ''; + return ''.$person_tag.$name.''; + } + } + return isset($data['value']) ? $data['value'] : ''; + } + + private function parse_microformats(&$data, $url) { + $feed_title = ''; + $feed_author = NULL; + $author_cache = array(); + $items = array(); + $entries = array(); + $mf = Mf2\parse($data, $url); + // First look for an h-feed. + $h_feed = array(); + foreach ($mf['items'] as $mf_item) { + if (in_array('h-feed', $mf_item['type'])) { + $h_feed = $mf_item; + break; + } + // Also look for an h-feed in the children of each top level item. + if (!isset($mf_item['children'][0]['type'])) continue; + if (in_array('h-feed', $mf_item['children'][0]['type'])) { + $h_feed = $mf_item['children'][0]; + // In this case the parent of the h-feed may be an h-card, so use it as + // the feed_author. + if (in_array('h-card', $mf_item['type'])) $feed_author = $mf_item; + break; + } + } + if (isset($h_feed['children'])) { + $entries = $h_feed['children']; + // Also set the feed title and store author from the h-feed if available. + if (isset($mf['items'][0]['properties']['name'][0])) { + $feed_title = $mf['items'][0]['properties']['name'][0]; + } + if (isset($mf['items'][0]['properties']['author'][0])) { + $feed_author = $mf['items'][0]['properties']['author'][0]; + } + } + else { + $entries = $mf['items']; + } + for ($i = 0; $i < count($entries); $i++) { + $entry = $entries[$i]; + if (in_array('h-entry', $entry['type'])) { + $item = array(); + $title = ''; + $description = ''; + if (isset($entry['properties']['url'][0])) { + $link = $entry['properties']['url'][0]; + if (isset($link['value'])) $link = $link['value']; + $item['link'] = array(array('data' => $link)); + } + if (isset($entry['properties']['uid'][0])) { + $guid = $entry['properties']['uid'][0]; + if (isset($guid['value'])) $guid = $guid['value']; + $item['guid'] = array(array('data' => $guid)); + } + if (isset($entry['properties']['name'][0])) { + $title = $entry['properties']['name'][0]; + if (isset($title['value'])) $title = $title['value']; + $item['title'] = array(array('data' => $title)); + } + if (isset($entry['properties']['author'][0]) || isset($feed_author)) { + // author is a special case, it can be plain text or an h-card array. + // If it's plain text it can also be a url that should be followed to + // get the actual h-card. + $author = isset($entry['properties']['author'][0]) ? + $entry['properties']['author'][0] : $feed_author; + if (!is_string($author)) { + $author = $this->parse_hcard($author); + } + else if (strpos($author, 'http') === 0) { + if (isset($author_cache[$author])) { + $author = $author_cache[$author]; + } + else { + $mf = Mf2\fetch($author); + foreach ($mf['items'] as $hcard) { + // Only interested in an h-card by itself in this case. + if (!in_array('h-card', $hcard['type'])) { + continue; + } + // It must have a url property matching what we fetched. + if (!isset($hcard['properties']['url']) || + !(in_array($author, $hcard['properties']['url']))) { + continue; + } + // Save parse_hcard the trouble of finding the correct url. + $hcard['properties']['url'][0] = $author; + // Cache this h-card for the next h-entry to check. + $author_cache[$author] = $this->parse_hcard($hcard); + $author = $author_cache[$author]; + break; + } + } + } + $item['author'] = array(array('data' => $author)); + } + if (isset($entry['properties']['photo'][0])) { + // If a photo is also in content, don't need to add it again here. + $content = ''; + if (isset($entry['properties']['content'][0]['html'])) { + $content = $entry['properties']['content'][0]['html']; + } + $photo_list = array(); + for ($j = 0; $j < count($entry['properties']['photo']); $j++) { + $photo = $entry['properties']['photo'][$j]; + if (strpos($content, $photo) === false) { + $photo_list[] = $photo; + } + } + // When there's more than one photo show the first and use a lightbox. + $count = count($photo_list); + if ($count > 1) { + $description = '

    '; + for ($j = 0; $j < $count; $j++) { + $hidden = $j === 0 ? '' : 'class="hidden" '; + $description .= ''. + ''; + } + $description .= '
    '.$count.' photos

    '; + } + else if ($count == 1) { + $description = '

    '; + } + } + if (isset($entry['properties']['content'][0]['html'])) { + // e-content['value'] is the same as p-name when they are on the same + // element. Use this to replace title with a strip_tags version so + // that alt text from images is not included in the title. + if ($entry['properties']['content'][0]['value'] === $title) { + $title = strip_tags($entry['properties']['content'][0]['html']); + $item['title'] = array(array('data' => $title)); + } + $description .= $entry['properties']['content'][0]['html']; + if (isset($entry['properties']['in-reply-to'][0]['value'])) { + $in_reply_to = $entry['properties']['in-reply-to'][0]['value']; + $description .= '

    '. + ''.$in_reply_to.'

    '; + } + $item['description'] = array(array('data' => $description)); + } + if (isset($entry['properties']['category'])) { + $category_csv = ''; + // Categories can also contain h-cards. + foreach ($entry['properties']['category'] as $category) { + if ($category_csv !== '') $category_csv .= ', '; + if (is_string($category)) { + // Can't have commas in categories. + $category_csv .= str_replace(',', '', $category); + } + else { + $category_csv .= $this->parse_hcard($category, true); + } + } + $item['category'] = array(array('data' => $category_csv)); + } + if (isset($entry['properties']['published'][0])) { + $timestamp = strtotime($entry['properties']['published'][0]); + $pub_date = date('F j Y g:ia', $timestamp).' GMT'; + $item['pubDate'] = array(array('data' => $pub_date)); + } + // The title and description are set to the empty string to represent + // a deleted item (which also makes it an invalid rss item). + if (isset($entry['properties']['deleted'][0])) { + $item['title'] = array(array('data' => '')); + $item['description'] = array(array('data' => '')); + } + $items[] = array('child' => array('' => $item)); + } + } + // Mimic RSS data format when storing microformats. + $link = array(array('data' => $url)); + $image = ''; + if (!is_string($feed_author) && + isset($feed_author['properties']['photo'][0])) { + $image = array(array('child' => array('' => array('url' => + array(array('data' => $feed_author['properties']['photo'][0])))))); + } + // Use the a name given for the h-feed, or get the title from the html. + if ($feed_title !== '') { + $feed_title = array(array('data' => htmlspecialchars($feed_title))); + } + else if ($position = strpos($data, '')) { + $start = $position < 200 ? 0 : $position - 200; + $check = substr($data, $start, 400); + $matches = array(); + if (preg_match('/<title>(.+)<\/title>/', $check, $matches)) { + $feed_title = array(array('data' => htmlspecialchars($matches[1]))); + } + } + $channel = array('channel' => array(array('child' => array('' => + array('link' => $link, 'image' => $image, 'title' => $feed_title, + 'item' => $items))))); + $rss = array(array('attribs' => array('' => array('version' => '2.0')), + 'child' => array('' => $channel))); + $this->data = array('child' => array('' => array('rss' => $rss))); + return true; + } + + private function declare_html_entities() { + // This is required because the RSS specification says that entity-encoded + // html is allowed, but the xml specification says they must be declared. + return '<!DOCTYPE html [ <!ENTITY nbsp " "> <!ENTITY iexcl "¡"> <!ENTITY cent "¢"> <!ENTITY pound "£"> <!ENTITY curren "¤"> <!ENTITY yen "¥"> <!ENTITY brvbar "¦"> <!ENTITY sect "§"> <!ENTITY uml "¨"> <!ENTITY copy "©"> <!ENTITY ordf "ª"> <!ENTITY laquo "«"> <!ENTITY not "¬"> <!ENTITY shy "­"> <!ENTITY reg "®"> <!ENTITY macr "¯"> <!ENTITY deg "°"> <!ENTITY plusmn "±"> <!ENTITY sup2 "²"> <!ENTITY sup3 "³"> <!ENTITY acute "´"> <!ENTITY micro "µ"> <!ENTITY para "¶"> <!ENTITY middot "·"> <!ENTITY cedil "¸"> <!ENTITY sup1 "¹"> <!ENTITY ordm "º"> <!ENTITY raquo "»"> <!ENTITY frac14 "¼"> <!ENTITY frac12 "½"> <!ENTITY frac34 "¾"> <!ENTITY iquest "¿"> <!ENTITY Agrave "À"> <!ENTITY Aacute "Á"> <!ENTITY Acirc "Â"> <!ENTITY Atilde "Ã"> <!ENTITY Auml "Ä"> <!ENTITY Aring "Å"> <!ENTITY AElig "Æ"> <!ENTITY Ccedil "Ç"> <!ENTITY Egrave "È"> <!ENTITY Eacute "É"> <!ENTITY Ecirc "Ê"> <!ENTITY Euml "Ë"> <!ENTITY Igrave "Ì"> <!ENTITY Iacute "Í"> <!ENTITY Icirc "Î"> <!ENTITY Iuml "Ï"> <!ENTITY ETH "Ð"> <!ENTITY Ntilde "Ñ"> <!ENTITY Ograve "Ò"> <!ENTITY Oacute "Ó"> <!ENTITY Ocirc "Ô"> <!ENTITY Otilde "Õ"> <!ENTITY Ouml "Ö"> <!ENTITY times "×"> <!ENTITY Oslash "Ø"> <!ENTITY Ugrave "Ù"> <!ENTITY Uacute "Ú"> <!ENTITY Ucirc "Û"> <!ENTITY Uuml "Ü"> <!ENTITY Yacute "Ý"> <!ENTITY THORN "Þ"> <!ENTITY szlig "ß"> <!ENTITY agrave "à"> <!ENTITY aacute "á"> <!ENTITY acirc "â"> <!ENTITY atilde "ã"> <!ENTITY auml "ä"> <!ENTITY aring "å"> <!ENTITY aelig "æ"> <!ENTITY ccedil "ç"> <!ENTITY egrave "è"> <!ENTITY eacute "é"> <!ENTITY ecirc "ê"> <!ENTITY euml "ë"> <!ENTITY igrave "ì"> <!ENTITY iacute "í"> <!ENTITY icirc "î"> <!ENTITY iuml "ï"> <!ENTITY eth "ð"> <!ENTITY ntilde "ñ"> <!ENTITY ograve "ò"> <!ENTITY oacute "ó"> <!ENTITY ocirc "ô"> <!ENTITY otilde "õ"> <!ENTITY ouml "ö"> <!ENTITY divide "÷"> <!ENTITY oslash "ø"> <!ENTITY ugrave "ù"> <!ENTITY uacute "ú"> <!ENTITY ucirc "û"> <!ENTITY uuml "ü"> <!ENTITY yacute "ý"> <!ENTITY thorn "þ"> <!ENTITY yuml "ÿ"> <!ENTITY OElig "Œ"> <!ENTITY oelig "œ"> <!ENTITY Scaron "Š"> <!ENTITY scaron "š"> <!ENTITY Yuml "Ÿ"> <!ENTITY fnof "ƒ"> <!ENTITY circ "ˆ"> <!ENTITY tilde "˜"> <!ENTITY Alpha "Α"> <!ENTITY Beta "Β"> <!ENTITY Gamma "Γ"> <!ENTITY Epsilon "Ε"> <!ENTITY Zeta "Ζ"> <!ENTITY Eta "Η"> <!ENTITY Theta "Θ"> <!ENTITY Iota "Ι"> <!ENTITY Kappa "Κ"> <!ENTITY Lambda "Λ"> <!ENTITY Mu "Μ"> <!ENTITY Nu "Ν"> <!ENTITY Xi "Ξ"> <!ENTITY Omicron "Ο"> <!ENTITY Pi "Π"> <!ENTITY Rho "Ρ"> <!ENTITY Sigma "Σ"> <!ENTITY Tau "Τ"> <!ENTITY Upsilon "Υ"> <!ENTITY Phi "Φ"> <!ENTITY Chi "Χ"> <!ENTITY Psi "Ψ"> <!ENTITY Omega "Ω"> <!ENTITY alpha "α"> <!ENTITY beta "β"> <!ENTITY gamma "γ"> <!ENTITY delta "δ"> <!ENTITY epsilon "ε"> <!ENTITY zeta "ζ"> <!ENTITY eta "η"> <!ENTITY theta "θ"> <!ENTITY iota "ι"> <!ENTITY kappa "κ"> <!ENTITY lambda "λ"> <!ENTITY mu "μ"> <!ENTITY nu "ν"> <!ENTITY xi "ξ"> <!ENTITY omicron "ο"> <!ENTITY pi "π"> <!ENTITY rho "ρ"> <!ENTITY sigmaf "ς"> <!ENTITY sigma "σ"> <!ENTITY tau "τ"> <!ENTITY upsilon "υ"> <!ENTITY phi "φ"> <!ENTITY chi "χ"> <!ENTITY psi "ψ"> <!ENTITY omega "ω"> <!ENTITY thetasym "ϑ"> <!ENTITY upsih "ϒ"> <!ENTITY piv "ϖ"> <!ENTITY ensp " "> <!ENTITY emsp " "> <!ENTITY thinsp " "> <!ENTITY zwnj "‌"> <!ENTITY zwj "‍"> <!ENTITY lrm "‎"> <!ENTITY rlm "‏"> <!ENTITY ndash "–"> <!ENTITY mdash "—"> <!ENTITY lsquo "‘"> <!ENTITY rsquo "’"> <!ENTITY sbquo "‚"> <!ENTITY ldquo "“"> <!ENTITY rdquo "”"> <!ENTITY bdquo "„"> <!ENTITY dagger "†"> <!ENTITY Dagger "‡"> <!ENTITY bull "•"> <!ENTITY hellip "…"> <!ENTITY permil "‰"> <!ENTITY prime "′"> <!ENTITY Prime "″"> <!ENTITY lsaquo "‹"> <!ENTITY rsaquo "›"> <!ENTITY oline "‾"> <!ENTITY frasl "⁄"> <!ENTITY euro "€"> <!ENTITY image "ℑ"> <!ENTITY weierp "℘"> <!ENTITY real "ℜ"> <!ENTITY trade "™"> <!ENTITY alefsym "ℵ"> <!ENTITY larr "←"> <!ENTITY uarr "↑"> <!ENTITY rarr "→"> <!ENTITY darr "↓"> <!ENTITY harr "↔"> <!ENTITY crarr "↵"> <!ENTITY lArr "⇐"> <!ENTITY uArr "⇑"> <!ENTITY rArr "⇒"> <!ENTITY dArr "⇓"> <!ENTITY hArr "⇔"> <!ENTITY forall "∀"> <!ENTITY part "∂"> <!ENTITY exist "∃"> <!ENTITY empty "∅"> <!ENTITY nabla "∇"> <!ENTITY isin "∈"> <!ENTITY notin "∉"> <!ENTITY ni "∋"> <!ENTITY prod "∏"> <!ENTITY sum "∑"> <!ENTITY minus "−"> <!ENTITY lowast "∗"> <!ENTITY radic "√"> <!ENTITY prop "∝"> <!ENTITY infin "∞"> <!ENTITY ang "∠"> <!ENTITY and "∧"> <!ENTITY or "∨"> <!ENTITY cap "∩"> <!ENTITY cup "∪"> <!ENTITY int "∫"> <!ENTITY there4 "∴"> <!ENTITY sim "∼"> <!ENTITY cong "≅"> <!ENTITY asymp "≈"> <!ENTITY ne "≠"> <!ENTITY equiv "≡"> <!ENTITY le "≤"> <!ENTITY ge "≥"> <!ENTITY sub "⊂"> <!ENTITY sup "⊃"> <!ENTITY nsub "⊄"> <!ENTITY sube "⊆"> <!ENTITY supe "⊇"> <!ENTITY oplus "⊕"> <!ENTITY otimes "⊗"> <!ENTITY perp "⊥"> <!ENTITY sdot "⋅"> <!ENTITY lceil "⌈"> <!ENTITY rceil "⌉"> <!ENTITY lfloor "⌊"> <!ENTITY rfloor "⌋"> <!ENTITY lang "〈"> <!ENTITY rang "〉"> <!ENTITY loz "◊"> <!ENTITY spades "♠"> <!ENTITY clubs "♣"> <!ENTITY hearts "♥"> <!ENTITY diams "♦"> ]>'; + } +} \ No newline at end of file diff --git a/libraries/simplepie/library/SimplePie/Rating.php b/libraries/simplepie/library/SimplePie/Rating.php index 8689e5d..eaf5708 100644 --- a/libraries/simplepie/library/SimplePie/Rating.php +++ b/libraries/simplepie/library/SimplePie/Rating.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Registry.php b/libraries/simplepie/library/SimplePie/Registry.php index 1072cde..e0909bb 100644 --- a/libraries/simplepie/library/SimplePie/Registry.php +++ b/libraries/simplepie/library/SimplePie/Registry.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -113,7 +112,7 @@ class SimplePie_Registry */ public function register($type, $class, $legacy = false) { - if (!is_subclass_of($class, $this->default[$type])) + if (!@is_subclass_of($class, $this->default[$type])) { return false; } @@ -222,4 +221,4 @@ class SimplePie_Registry $result = call_user_func_array(array($class, $method), $parameters); return $result; } -} \ No newline at end of file +} diff --git a/libraries/simplepie/library/SimplePie/Restriction.php b/libraries/simplepie/library/SimplePie/Restriction.php index 4ba371b..001a5cd 100644 --- a/libraries/simplepie/library/SimplePie/Restriction.php +++ b/libraries/simplepie/library/SimplePie/Restriction.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/Sanitize.php b/libraries/simplepie/library/SimplePie/Sanitize.php index 6810cc4..5a11721 100644 --- a/libraries/simplepie/library/SimplePie/Sanitize.php +++ b/libraries/simplepie/library/SimplePie/Sanitize.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue @@ -61,7 +60,8 @@ class SimplePie_Sanitize var $image_handler = ''; var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); var $encode_instead_of_strip = false; - var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); + var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); + var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')); var $strip_comments = false; var $output_encoding = 'UTF-8'; var $enable_cache = true; @@ -160,7 +160,7 @@ class SimplePie_Sanitize $this->encode_instead_of_strip = (bool) $encode; } - public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) + public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) { if ($attribs) { @@ -179,6 +179,25 @@ class SimplePie_Sanitize } } + public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'))) + { + if ($attribs) + { + if (is_array($attribs)) + { + $this->add_attributes = $attribs; + } + else + { + $this->add_attributes = explode(',', $attribs); + } + } + else + { + $this->add_attributes = false; + } + } + public function strip_comments($strip = false) { $this->strip_comments = (bool) $strip; @@ -247,18 +266,24 @@ class SimplePie_Sanitize if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) { + if (!class_exists('DOMDocument')) + { + throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer'); + } $document = new DOMDocument(); $document->encoding = 'UTF-8'; + $data = $this->preprocess($data, $type); set_error_handler(array('SimplePie_Misc', 'silence_errors')); $document->loadHTML($data); restore_error_handler(); + $xpath = new DOMXPath($document); + // Strip comments if ($this->strip_comments) { - $xpath = new DOMXPath($document); $comments = $xpath->query('//comment()'); foreach ($comments as $comment) @@ -274,7 +299,7 @@ class SimplePie_Sanitize { foreach ($this->strip_htmltags as $tag) { - $this->strip_tag($tag, $document, $type); + $this->strip_tag($tag, $document, $xpath, $type); } } @@ -282,7 +307,15 @@ class SimplePie_Sanitize { foreach ($this->strip_attributes as $attrib) { - $this->strip_attr($attrib, $document); + $this->strip_attr($attrib, $xpath); + } + } + + if ($this->add_attributes) + { + foreach ($this->add_attributes as $tag => $valuePairs) + { + $this->add_attr($tag, $valuePairs, $document); } } @@ -310,7 +343,7 @@ class SimplePie_Sanitize } else { - $file = $this->registry->create('File', array($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); + $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); $headers = $file->headers; if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) @@ -329,19 +362,17 @@ class SimplePie_Sanitize } } - // Remove the DOCTYPE - // Seems to cause segfaulting if we don't do this - if ($document->firstChild instanceof DOMDocumentType) - { - $document->removeChild($document->firstChild); - } - - // Move everything from the body to the root - $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); - $document->replaceChild($real_body, $document->firstChild); - + // Get content node + $div = $document->getElementsByTagName('body')->item(0)->firstChild; // Finally, convert to a HTML string - $data = trim($document->saveHTML()); + if (version_compare(PHP_VERSION, '5.3.6', '>=')) + { + $data = trim($document->saveHTML($div)); + } + else + { + $data = trim($document->saveXML($div)); + } if ($this->remove_div) { @@ -379,6 +410,7 @@ class SimplePie_Sanitize protected function preprocess($html, $type) { $ret = ''; + $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html); if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) { // Atom XHTML constructs are wrapped with a div by default @@ -451,9 +483,8 @@ class SimplePie_Sanitize } } - protected function strip_tag($tag, $document, $type) + protected function strip_tag($tag, $document, $xpath, $type) { - $xpath = new DOMXPath($document); $elements = $xpath->query('body//' . $tag); if ($this->encode_instead_of_strip) { @@ -536,9 +567,8 @@ class SimplePie_Sanitize } } - protected function strip_attr($attrib, $document) + protected function strip_attr($attrib, $xpath) { - $xpath = new DOMXPath($document); $elements = $xpath->query('//*[@' . $attrib . ']'); foreach ($elements as $element) @@ -546,4 +576,16 @@ class SimplePie_Sanitize $element->removeAttribute($attrib); } } + + protected function add_attr($tag, $valuePairs, $document) + { + $elements = $document->getElementsByTagName($tag); + foreach ($elements as $element) + { + foreach ($valuePairs as $attrib => $value) + { + $element->setAttribute($attrib, $value); + } + } + } } diff --git a/libraries/simplepie/library/SimplePie/Source.php b/libraries/simplepie/library/SimplePie/Source.php index 51d8e6c..1a66a39 100644 --- a/libraries/simplepie/library/SimplePie/Source.php +++ b/libraries/simplepie/library/SimplePie/Source.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php b/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php index aec19f1..99e7516 100644 --- a/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php +++ b/libraries/simplepie/library/SimplePie/XML/Declaration/Parser.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/libraries/simplepie/library/SimplePie/gzdecode.php b/libraries/simplepie/library/SimplePie/gzdecode.php index 52e024e..0e8bc8f 100644 --- a/libraries/simplepie/library/SimplePie/gzdecode.php +++ b/libraries/simplepie/library/SimplePie/gzdecode.php @@ -5,7 +5,7 @@ * A PHP-Based RSS and Atom Feed Framework. * Takes the hard work out of managing a complete RSS/Atom solution. * - * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors + * Copyright (c) 2004-2016, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are @@ -33,8 +33,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package SimplePie - * @version 1.3.1 - * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue + * @copyright 2004-2016 Ryan Parman, Geoffrey Sneddon, Ryan McCue * @author Ryan Parman * @author Geoffrey Sneddon * @author Ryan McCue diff --git a/makefulltextfeed.php b/makefulltextfeed.php index b19ec24..ebaa01d 100644 --- a/makefulltextfeed.php +++ b/makefulltextfeed.php @@ -1,10 +1,10 @@ <?php // Full-Text RSS: Create Full-Text Feeds // Author: Keyvan Minoukadeh -// Copyright (c) 2015 Keyvan Minoukadeh +// Copyright (c) 2017 Keyvan Minoukadeh // License: AGPLv3 -// Version: 3.6 -// Date: 2016-02-17 +// Version: 3.7 +// Date: 2017-02-12 // More info: http://fivefilters.org/content-only/ // Help: http://help.fivefilters.org @@ -272,7 +272,7 @@ if (file_exists('custom_init.php')) require 'custom_init.php'; /////////////////////////////////////////////// // Check URL against list of blacklisted URLs /////////////////////////////////////////////// -if (!url_allowed($url)) die('URL blocked'); +if (!url_allowed($url)) die($options->blocked_message); /////////////////////////////////////////////// // Max entries @@ -302,12 +302,20 @@ if (isset($_REQUEST['links']) && in_array($_REQUEST['links'], array('preserve', $links = 'preserve'; } +/////////////////////////////////////////////// +// Image handling +/////////////////////////////////////////////// +$images = true; +if (isset($_REQUEST['images']) && in_array($_REQUEST['images'], array('0', 'remove'))) { + $images = false; +} + /////////////////////////////////////////////// // Favour item titles in feed? /////////////////////////////////////////////// $favour_feed_titles = true; if ($options->favour_feed_titles == 'user') { - $favour_feed_titles = !isset($_REQUEST['use_extracted_title']); + $favour_feed_titles = (!isset($_REQUEST['use_extracted_title']) || $_REQUEST['use_extracted_title'] === '0'); } else { $favour_feed_titles = $options->favour_feed_titles; } @@ -317,7 +325,7 @@ if ($options->favour_feed_titles == 'user') { /////////////////////////////////////////////// $favour_effective_url = false; if ($options->favour_effective_url == 'user') { - $favour_effective_url = isset($_REQUEST['use_effective_url']); + $favour_effective_url = (isset($_REQUEST['use_effective_url']) && $_REQUEST['use_effective_url'] !== '0'); } else { $favour_effective_url = $options->favour_effective_url; } @@ -333,6 +341,17 @@ if ($options->content === 'user') { } } +/////////////////////////////////////////////// +// HTML5 output? +/////////////////////////////////////////////// +if ($options->html5_output === 'user') { + if (isset($_REQUEST['content']) && $_REQUEST['content'] === 'html5') { + $options->html5_output = true; + } else { + $options->html5_output = false; + } +} + /////////////////////////////////////////////// // Include summaries in output? /////////////////////////////////////////////// @@ -367,7 +386,7 @@ if ($options->detect_language === 'user') { $detect_language = $options->detect_language; } -$use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= 0); +$use_cld = extension_loaded('cld'); ///////////////////////////////////// // Check for valid format @@ -468,8 +487,7 @@ if (isset($_REQUEST['inputhtml']) && _FF_FTR_MODE == 'simple') { ////////////////////////////////// if ($options->caching) { debug('Caching is enabled...'); - $cache_id = md5($max.$url.(int)$valid_key.$accept.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary. - (int)$xss_filter.(int)$favour_effective_url.(int)$exclude_on_fail.$format.$detect_language.$parser.$user_submitted_config._FF_FTR_MODE); + $cache_id = md5($max.$url.(int)$valid_key.$accept.$links.$images.(int)$favour_feed_titles.(int)$options->content.(int)$options->html5_output.(int)$options->summary.(int)$xss_filter.(int)$favour_effective_url.(int)$exclude_on_fail.$format.$detect_language.$parser.$user_submitted_config._FF_FTR_MODE); $check_cache = true; if ($options->apc && $options->smart_cache) { apc_add("cache.$cache_id", 0, $options->cache_time*60); @@ -554,6 +572,7 @@ SiteConfig::use_apc($options->apc); $extractor->fingerprints = $options->fingerprints; $extractor->allowedParsers = $options->allowed_parsers; $extractor->parserOverride = $parser; +if (!$images) $extractor->stripImages = true; if ($options->user_submitted_config && $user_submitted_config) { $extractor->setUserSubmittedConfig($user_submitted_config); } @@ -633,7 +652,7 @@ if ($accept === 'html' || !$result) { public function get_enclosure($key=0, $prefer=null) { return null; } public function get_enclosures() { return null; } public function get_categories() { return null; } - public function get_item_tags($namespace='', $tag='') { return null; } + public function get_item_tags($namespace='', $tag='') { return null; } } $feed = new DummySingleItemFeed($url); } @@ -792,7 +811,7 @@ foreach ($items as $key => $item) { // if user has asked to see parsed HTML, show it and exit. if ($debug_show_parsed_html) { debug("Here's the full HTML after it's been parsed by Full-Text RSS:"); - die($readability->dom->saveXML($readability->dom->documentElement)); + die(make_html($readability->dom->documentElement)); } // is this a native ad? if ($extract_result && $extractor->isNativeAd()) { @@ -801,6 +820,8 @@ foreach ($items as $key => $item) { continue; // skip this feed item entry } } + $base_url = get_base_url($readability->dom); + if (!$base_url) $base_url = $effective_url; $content_block = ($extract_result) ? $extractor->getContent() : null; $extracted_title = ($extract_result) ? $extractor->getTitle() : ''; // Deal with multi-page articles @@ -814,8 +835,8 @@ foreach ($items as $key => $item) { while ($next_page_url = $extractor->getNextPageUrl()) { debug('--------'); debug('Processing next page: '.$next_page_url); - // If we've got URL, resolve against $url - if ($next_page_url = make_absolute_str($effective_url, $next_page_url)) { + // If we've got URL, resolve against $base_url + if ($next_page_url = make_absolute_str($base_url, $next_page_url)) { // check it's not what we have already! if (!in_array($next_page_url, $multi_page_urls)) { // it's not, so let's attempt to fetch it @@ -870,19 +891,24 @@ foreach ($items as $key => $item) { if ($do_content_extraction) { // if we failed to extract content... if (!$extract_result) { - if ($exclude_on_fail) { + if ($exclude_on_fail && (_FF_FTR_MODE != 'simple')) { debug('Failed to extract, so skipping (due to exclude on fail parameter)'); continue; // skip this and move to next item } - //TODO: get text sample for language detection - $html = $options->error_message; - // keep the original item description - $html .= $item->get_description(); + if (_FF_FTR_MODE === 'simple') { + $html = ''; + } else { + //TODO: get text sample for language detection + $html = $options->error_message; + // keep the original item description + $html .= $item->get_description(); + } } else { $readability->clean($content_block, 'select'); if ($options->rewrite_relative_urls) { - $base_url = get_base_url($readability->dom); - if (!$base_url) $base_url = $effective_url; + // we've got $base_url already above + //$base_url = get_base_url($readability->dom); + //if (!$base_url) $base_url = $effective_url; // rewrite URLs make_absolute($base_url, $content_block); } @@ -908,20 +934,32 @@ foreach ($items as $key => $item) { // convert content block to HTML string // Need to preserve things like body: //img[@id='feature'] if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer', 'li', 'td'))) { - $html = $content_block->innerHTML; + //$html = $content_block->innerHTML; + $html = make_html($content_block, true); // true = innerHTML //} elseif (in_array(strtolower($content_block->tagName), array('td', 'li'))) { // $html = '<div>'.$content_block->innerHTML.'</div>'; } else { - $html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML + //$html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML + $html = make_html($content_block); // outerHTML } //unset($content_block); // post-processing cleanup $html = preg_replace('!<p>[\s\h\v]*</p>!u', '', $html); if ($links == 'remove') { - $html = preg_replace('!</?a[^>]*>!', '', $html); + $html = preg_replace('!<a\s+[^>]*>!', '', $html); + $html = preg_replace('!</a>!', '', $html); } // get text sample for language detection - $text_sample = strip_tags(substr($html, 0, 500)); + $_og = $extractor->getOpenGraph(); + $text_sample = ''; + if (isset($_og['og:title'])) { + $text_sample .= $_og['og:title']; + } + if (isset($_og['og:description'])) { + $text_sample .= ' '.$_og['og:description']; + } + $text_sample .= mb_substr($content_block->textContent, 0, 3000); + unset($_og); $html = make_substitutions($options->message_to_prepend).$html; $html .= make_substitutions($options->message_to_append); } @@ -1007,10 +1045,17 @@ foreach ($items as $key => $item) { // add open graph if ($opengraph = $extractor->getOpenGraph()) { - foreach ($opengraph as $og_prop => $og_val) { - $newitem->addElement($og_prop, $og_val); + foreach ($opengraph as $_prop => $_val) { + $newitem->addElement($_prop, $_val); } } + // add Twitter Card + if ($twitterCard = $extractor->getTwitterCard()) { + foreach ($twitterCard as $_prop => $_val) { + $newitem->addElement($_prop, $_val); + } + } + unset($_prop, $_val); // add language if ($detect_language) { @@ -1184,6 +1229,7 @@ function get_self_url() { if (isset($_GET['accept'])) $self .= '&accept='.urlencode($_GET['accept']); if (isset($_GET['max'])) $self .= '&max='.(int)$_GET['max']; if (isset($_GET['links'])) $self .= '&links='.urlencode($_GET['links']); + if (isset($_GET['images'])) $self .= '&images='.urlencode($_GET['images']); if (isset($_GET['exc'])) $self .= '&exc='.urlencode($_GET['exc']); if (isset($_GET['format'])) $self .= '&format='.urlencode($_GET['format']); if (isset($_GET['callback'])) $self .= '&callback='.urlencode($_GET['callback']); @@ -1409,6 +1455,32 @@ function make_absolute_str($base, $url) { return false; } } +function make_html($dom, $inner=false) { + global $options; + static $html5 = null; + if ($options->html5_output) { + if ($html5 === null) { + $html5 = new Masterminds\HTML5(array('disable_html_ns' => true)); + } + if (!$inner) { + return $html5->saveHTML($dom); + } else { + $_inner = ''; + if ($dom->hasChildNodes()) { + foreach ($dom->childNodes as $child) { + $_inner .= $html5->saveHTML($child); + } + } + return $_inner; + } + } else { + if (!$inner) { + return $dom->ownerDocument->saveXML($dom); + } else { + return $dom->innerHTML; + } + } +} // returns single page response, or false if not found function get_single_page($item, $html, $url) { global $http, $extractor; @@ -1457,8 +1529,10 @@ function get_single_page($item, $html, $url) { } } } - // If we've got URL, resolve against $url - if (isset($single_page_url) && ($single_page_url = make_absolute_str($url, $single_page_url))) { + $base_url = get_base_url($readability->dom); + if (!$base_url) $base_url = $url; + // If we've got URL, resolve against $base_url + if (isset($single_page_url) && ($single_page_url = make_absolute_str($base_url, $single_page_url))) { // check it's not what we have already! if ($single_page_url != $url) { // it's not, so let's try to fetch it... diff --git a/site_config/standard/README.md b/site_config/standard/README.md index bf2766c..ab5b12d 100644 --- a/site_config/standard/README.md +++ b/site_config/standard/README.md @@ -1,7 +1,7 @@ Full-Text RSS site config files ================ -[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules found, it tries to detect the content block automatically. +[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no rules are found, it tries to detect the content block automatically. This repository contains the site-specific extraction rules we rely on in Full-Text RSS. diff --git a/ubuntu-15.10.pp b/ubuntu-16.04.pp similarity index 58% rename from ubuntu-15.10.pp rename to ubuntu-16.04.pp index af357f1..85da648 100644 --- a/ubuntu-15.10.pp +++ b/ubuntu-16.04.pp @@ -1,9 +1,6 @@ -# Puppet file intended to install server componenets for self-hosted FiveFilters.org web services +# Puppet file intended to install server componenets for FiveFilters.org web services # This file is intended for base images of: -# Ubuntu 15.10 - -# Please see here for more information on how to use this: -# http://help.fivefilters.org/customer/en/portal/articles/1143210-hosting +# Ubuntu 16.04 Exec { path => "/bin:/usr/bin:/usr/local/bin" } @@ -31,6 +28,10 @@ class init { APT::Periodic::Unattended-Upgrade "1";', require => Package["unattended-upgrades"] } + #exec { "configure-unattended-upgrades": + # require => Package["unattended-upgrades"], + # command => "sudo dpkg-reconfigure unattended-upgrades", + #} } # make sure apt-update run before package @@ -56,6 +57,11 @@ class apache { require => Package["apache2"], notify => Exec["restart-apache"] } + + exec { "enable-prefork": + require => Package["apache2"], + command => "sudo a2dismod mpm_event && sudo a2enmod mpm_prefork", + } file { "/etc/apache2/sites-available/fivefilters.conf": ensure => present, @@ -104,24 +110,34 @@ class apache { } class php { - package { "php5": ensure => latest } - package { "libapache2-mod-php5": ensure => latest } - package { "php5-cli": ensure => latest } - package { "php5-tidy": ensure => latest } - package { "php5-curl": ensure => latest } - package { "libcurl4-gnutls-dev": ensure => latest } + package { "php7.0": ensure => latest } + #package { "php-apc": ensure => latest } + package { "libapache2-mod-php7.0": ensure => latest } + package { "php7.0-cli": ensure => latest } + package { "php7.0-tidy": ensure => latest } + package { "php7.0-curl": ensure => latest } + #package { "libcurl4-gnutls-dev": ensure => latest } + package { "libcurl4-openssl-dev": ensure => latest } package { "libpcre3-dev": ensure => latest } package { "make": ensure=>latest } package { "php-pear": ensure => latest } - package { "php5-dev": ensure => latest } - package { "php5-intl": ensure => latest } - package { "php5-gd": ensure => latest } - package { "php5-imagick": ensure => latest } - package { "php5-json": ensure => latest } + package { "php7.0-dev": ensure => latest } + package { "php7.0-intl": ensure => latest } + package { "php7.0-gd": ensure => latest } + package { "php7.0-mbstring": ensure => latest } + package { "php-imagick": ensure => latest } + package { "php7.0-json": ensure => latest } #package { "php-http": ensure => latest } - package { "php5-raphf": ensure => latest } - package { "php5-propro": ensure => latest } - file { "/etc/php5/mods-available/fivefilters-php.ini": + package { "php-raphf": ensure => latest } + package { "php-propro": ensure => latest } + package { "php7.0-zip": ensure => latest } + # for gumbo-php + package { "libgumbo1": ensure => latest } + package { "libgumbo-dev": ensure => latest } + package { "libxml2": ensure => latest } + package { "libxml2-dev": ensure => latest } + + file { "/etc/php/7.0/mods-available/fivefilters-php.ini": ensure => present, content => "engine = On expose_php = Off @@ -134,17 +150,17 @@ class php { default_socket_timeout = 120 file_uploads = Off date.timezoe = 'UTC'", - require => Package["php5"], + require => Package["php7.0"], before => Exec["enable-fivefilters-php"], } exec { "enable-fivefilters-php": - command => "sudo php5enmod fivefilters-php", + command => "sudo phpenmod fivefilters-php", } } class php_pecl_http { # Important: this file needs to be in place before we install the HTTP extension - file { "/etc/php5/mods-available/http.ini": + file { "/etc/php/7.0/mods-available/http.ini": ensure => present, #owner => root, group => root, mode => 444, content => "; priority=25 @@ -156,7 +172,7 @@ extension=http.so", } exec { "enable-http": - command => "sudo php5enmod http", + command => "sudo phpenmod http", require => Class["php"], } @@ -171,10 +187,9 @@ extension=http.so", } exec { "install-http-pecl": - command => "pecl install https://pecl.php.net/get/pecl_http-2.5.5.tgz", - #command => "sudo pecl install pecl_http", - # the above is now version 3.0 - requires PHP7 - #command => "pecl install http://pecl.php.net/get/pecl_http-1.7.6.tgz", + # For some reason this command doesn't return a success code, even though + # it appears to succeed. So we use || /bin/true + command => "sudo pecl install channel://pecl.php.net/pecl_http-3.1.0.tgz || /bin/true", #creates => "/tmp/needed/directory", require => Exec["enable-http"] } @@ -182,12 +197,12 @@ extension=http.so", class php_pecl_apcu { exec { "install-apcu-pecl": - command => "sudo pecl install channel://pecl.php.net/APCu-4.0.10", + command => "sudo pecl install channel://pecl.php.net/APCu-5.1.8", #creates => "/tmp/needed/directory", require => Class["php"] } - file { "/etc/php5/mods-available/apcu.ini": + file { "/etc/php/7.0/mods-available/apcu.ini": ensure => present, #owner => root, group => root, mode => 444, content => "extension=apcu.so", @@ -195,63 +210,66 @@ class php_pecl_apcu { before => Exec["enable-apcu"] } exec { "enable-apcu": - command => "sudo php5enmod apcu", + command => "sudo phpenmod apcu", notify => Exec["restart-apache"], } } -class php_cld { - # see https://github.com/lstrojny/php-cld +class php_gumbo { + # see https://github.com/layershifter/gumbo-php package { "git": ensure => latest } - package { "build-essential": ensure => latest } - file { "/tmp/cld": + file { "/tmp/gumbo": ensure => absent, - before => Exec["download-cld"], + before => Exec["download-gumbo"], recurse => true, force => true } - exec { "download-cld": - command => "git clone git://github.com/lstrojny/php-cld.git /tmp/cld", - require => [Package["git"], Class["php"]], - before => Exec["build-cld"] + exec { "download-gumbo": + command => "git clone git://github.com/layershifter/gumbo-php.git /tmp/gumbo", + require => [Package["git"], Class["php"]] } - exec { "checkout-cld-version": - # recent version does not work, so we switch to an older one - command => "git reset --hard fd5aa5721b01bfe547ff6674fa0daa9c3b791ca3", - cwd => "/tmp/cld", - require => Exec["download-cld"], - before => Exec["build-cld"] - } - - exec { "build-cld": - command => "./build.sh", - #new cld:command => "sh compile_libs.sh", - cwd => "/tmp/cld/vendor/libcld", - require => Package["build-essential"], - provider => "shell" - } - - exec { "install-cld-extension": - command => "phpize && ./configure --with-libcld-dir=/tmp/cld/vendor/libcld && make && sudo make install", - cwd => "/tmp/cld", + exec { "install-gumbo-extension": + command => "phpize && ./configure && make && sudo make install", + cwd => "/tmp/gumbo", provider => "shell", - require => Exec["build-cld"] + require => Exec["download-gumbo"] } - file { "/etc/php5/mods-available/cld.ini": + file { "/etc/php/7.0/mods-available/gumbo.ini": ensure => present, #owner => root, group => root, mode => 444, - content => "extension=cld.so", - require => Exec["install-cld-extension"], - before => Exec["enable-cld"], + content => "extension=gumbo.so", + require => Exec["install-gumbo-extension"], + before => Exec["enable-gumbo"] } - exec { "enable-cld": - command => "sudo php5enmod cld", + exec { "enable-gumbo": + command => "sudo phpenmod gumbo", + notify => Exec["restart-apache"], + require => Exec["install-gumbo-extension"] + } +} + +class php_pecl_apc_bc { + exec { "install-apc-bc-pecl": + command => "sudo pecl install channel://pecl.php.net/apcu_bc-1.0.3", + #creates => "/tmp/needed/directory", + require => Class["php_pecl_apcu"] + } + + file { "/etc/php/7.0/mods-available/z_apc_bc.ini": + ensure => present, + #owner => root, group => root, mode => 444, + content => "extension=apc.so", + require => Exec["install-apc-bc-pecl"], + before => Exec["enable-apc-bc"] + } + exec { "enable-apc-bc": + command => "sudo phpenmod z_apc_bc", notify => Exec["restart-apache"], } } @@ -261,12 +279,17 @@ class final { command => "echo 'vm.swappiness = 10' >> /etc/sysctl.conf && sudo sysctl -p", provider => "shell" } + exec { "enable-php": + command => "sudo a2enmod php7.0 && sudo service apache2 restart", + provider => "shell" + } } include init include apache include php include php_pecl_apcu -include php_cld +include php_pecl_apc_bc include php_pecl_http +include php_gumbo include final \ No newline at end of file