diff --git a/admin/apc.php b/admin/apc.php index 45896c2..f244d27 100644 --- a/admin/apc.php +++ b/admin/apc.php @@ -22,7 +22,7 @@ */ -$VERSION='$Id: apc.php 307048 2011-01-03 23:53:17Z kalle $'; +$VERSION='$Id$'; ////////// READ OPTIONAL CONFIGURATION FILE //////////// if (file_exists("apc.conf.php")) include("apc.conf.php"); @@ -35,9 +35,10 @@ $admin_page = 'apc'; require_once('../config.php'); require_once('require_login.php'); require_once('template.php'); -if (!isset($_REQUEST['IMG'])) tpl_header('APC'); +if (!isset($_REQUEST['IMG'])) tpl_header('APCu'); ////////// BEGIN OF DEFAULT CONFIG AREA /////////////////////////////////////////////////////////// + defaults('USE_AUTHENTICATION',0); // Use (internal) authentication - best choice if // no other authentication is available // If set to 0: @@ -46,8 +47,8 @@ defaults('USE_AUTHENTICATION',0); // Use (internal) authentication - best choi // If set to 1: // You need to change ADMIN_PASSWORD to make // this work! -//defaults('ADMIN_USERNAME','admin'); // Admin Username -//defaults('ADMIN_PASSWORD',''); // Admin Password - CHANGE THIS TO ENABLE!!! +defaults('ADMIN_USERNAME','apc'); // Admin Username +defaults('ADMIN_PASSWORD','password'); // Admin Password - CHANGE THIS TO ENABLE!!! // (beckerr) I'm using a clear text password here, because I've no good idea how to let // users generate a md5 or crypt password in a easy way to fill it in above @@ -79,10 +80,8 @@ if (isset($_SERVER['SERVER_ADDR'])) { // operation constants define('OB_HOST_STATS',1); -define('OB_SYS_CACHE',2); -define('OB_USER_CACHE',3); -define('OB_SYS_CACHE_DIR',4); -define('OB_VERSION_CHECK',9); +define('OB_USER_CACHE',2); +define('OB_VERSION_CHECK',3); // check validity of input variables $vardom=array( @@ -99,12 +98,9 @@ $vardom=array( 'SORT1' => '/^[AHSMCDTZ]$/', // first sort key 'SORT2' => '/^[DA]$/', // second sort key 'AGGR' => '/^\d+$/', // aggregation by dir level - 'SEARCH' => '~^[a-zA-Z0-1/_.-]*$~' // aggregation by dir level + 'SEARCH' => '~^[a-zA-Z0-9/_.-]*$~' // aggregation by dir level ); -// default cache mode -$cache_mode='opcode'; - // cache scope $scope_list=array( 'A' => 'cache_list', @@ -183,28 +179,24 @@ EOB; } } } - -// select cache mode -if ($AUTHENTICATED && $MYREQUEST['OB'] == OB_USER_CACHE) { - $cache_mode='user'; -} + // clear cache if ($AUTHENTICATED && isset($MYREQUEST['CC']) && $MYREQUEST['CC']) { - apc_clear_cache($cache_mode); + apcu_clear_cache(); } if ($AUTHENTICATED && !empty($MYREQUEST['DU'])) { - apc_delete($MYREQUEST['DU']); + apcu_delete($MYREQUEST['DU']); } -if(!function_exists('apc_cache_info') || !($cache=@apc_cache_info($cache_mode))) { +if(!function_exists('apcu_cache_info')) { echo "No cache info available. APC does not appear to be running."; exit; } -$cache_user = apc_cache_info('user', 1); -$mem=apc_sma_info(); -if(!$cache['num_hits']) { $cache['num_hits']=1; $time++; } // Avoid division by 0 errors on a cache clear +$cache = apcu_cache_info(); + +$mem=apcu_sma_info(); // don't cache this page // @@ -390,13 +382,13 @@ if (isset($MYREQUEST['IMG'])) text_arc($image,$x,$y,$size,$angle[0]*360,$angle[1]*360,$col_black,bsize($s*($angle[1]-$angle[0]))); } break; - + case 2: - $s=$cache['num_hits']+$cache['num_misses']; - $a=$cache['num_hits']; + $s=$cache['nhits']+$cache['nmisses']; + $a=$cache['nhits']; - fill_box($image, 30,$size,50,-$a*($size-21)/$s,$col_black,$col_green,sprintf("%.1f%%",$cache['num_hits']*100/$s)); - fill_box($image,130,$size,50,-max(4,($s-$a)*($size-21)/$s),$col_black,$col_red,sprintf("%.1f%%",$cache['num_misses']*100/$s)); + fill_box($image, 30,$size,50,$s ? (-$a*($size-21)/$s) : 0,$col_black,$col_green,sprintf("%.1f%%",$s ? $cache['nhits']*100/$s : 0)); + fill_box($image,130,$size,50,$s ? -max(4,($s-$a)*($size-21)/$s) : 0,$col_black,$col_red,sprintf("%.1f%%",$s ? $cache['nmisses']*100/$s : 0)); break; case 3: @@ -439,15 +431,16 @@ if (isset($MYREQUEST['IMG'])) } } break; - case 4: - $s=$cache['num_hits']+$cache['num_misses']; - $a=$cache['num_hits']; - - fill_box($image, 30,$size,50,-$a*($size-21)/$s,$col_black,$col_green,sprintf("%.1f%%",$cache['num_hits']*100/$s)); - fill_box($image,130,$size,50,-max(4,($s-$a)*($size-21)/$s),$col_black,$col_red,sprintf("%.1f%%",$cache['num_misses']*100/$s)); + + case 4: + $s=$cache['nhits']+$cache['nmisses']; + $a=$cache['nhits']; + + fill_box($image, 30,$size,50,$s ? -$a*($size-21)/$s : 0,$col_black,$col_green,sprintf("%.1f%%", $s ? $cache['nhits']*100/$s : 0)); + fill_box($image,130,$size,50,$s ? -max(4,($s-$a)*($size-21)/$s) : 0,$col_black,$col_red,sprintf("%.1f%%", $s ? $cache['nmisses']*100/$s : 0)); break; - } + header("Content-type: image/png"); imagepng($image); exit; @@ -524,7 +517,7 @@ function block_sort($array1, $array2) /* -APC INFO <?php echo $host ?> +APCu INFO <?php echo $host ?> */ ?> +
diff --git a/extract.php b/extract.php new file mode 100644 index 0000000..273319b --- /dev/null +++ b/extract.php @@ -0,0 +1,64 @@ +. +*/ + +// Usage +// ----- +// Request this file passing it a web page URL in the querystring: extract.php?url=example.org +// You can use GET and POST requests. +// You'll get a simple JSON response: +/* +HTTP/1.0 200 OK +{ + "title": "Blowing Smoke with Boxing's Big Voice", + "content"

Content here

More content

", + "author": "Rafi Kohan", + "excerpt": "Short extract from the beginning of the article.", + "language": "en", + "url": "http://example.org/article.html", + "effective_url": "http://example.org/article.html", + "date": "2014-05-10" +} +*/ + +define('_FF_FTR_MODE', 'simple'); + +// Don't process URL as feed +$_POST['html'] = '1'; +// JSON output only +$_POST['format'] = 'json'; +// Enable excerpts +$_POST['summary'] = '1'; +// Don't produce result if extraction fails +$_POST['exc'] = '1'; +// Enable XSS filtering (unless explicitly disabled) +if (isset($_POST['xss']) && $_POST['xss'] !== '0') { + $_POST['xss'] = '1'; +} elseif (isset($_GET['xss']) && $_GET['xss'] !== '0') { + $_GET['xss'] = '1'; +} else { + $_POST['xss'] = '1'; +} + +require 'makefulltextfeed.php'; \ No newline at end of file diff --git a/ftr_compatibility_test.php b/ftr_compatibility_test.php index bb19148..59a1a6c 100644 --- a/ftr_compatibility_test.php +++ b/ftr_compatibility_test.php @@ -16,9 +16,10 @@ SimplePie.org. We have kept most of their checks intact as we use SimplePie in o http://github.com/simplepie/simplepie/tree/master/compatibility_test/ */ -$app_name = 'Full-Text RSS 3.2'; +$app_name = 'Full-Text RSS 3.3'; -$php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.2.0', '>=')); +// Full-Text RSS is not yet compatible with HHVM, that's why we check for it with HHVM_VERSION. +$php_ok = (function_exists('version_compare') && version_compare(phpversion(), '5.2.0', '>=') && !defined('HHVM_VERSION')); $pcre_ok = extension_loaded('pcre'); $zlib_ok = extension_loaded('zlib'); $mbstring_ok = extension_loaded('mbstring'); @@ -295,7 +296,7 @@ div.chunk {
  • Tidy: You have Tidy support installed. No problems here.
  • -
  • Tidy: The Tidy extension is not available. should still work with most feeds/articles, but you may experience problems with some.
  • +
  • Tidy: The Tidy extension is not available. should still work with most feeds/articles, but you may experience problems with some. For problem feeds we recommend you use the HTML5 parser.
  • @@ -362,7 +363,7 @@ div.chunk { ?>

    will be used on this server.

    -

    Alternative PHP Cache (APC)

    +

    Alternative PHP Cache (APC/APCu)

    Full-Text RSS can make use of APC's memory cache to store site config files (when requested for the first time). This is not required, but if available it may improve performance slightly by reducing disk access.

    +

    HTML parser

    +

    Full-Text RSS uses the fast libxml parser (the default PHP parser) but it can also make use of HTML5-PHP (an HTML5 parser written in PHP) if your version of PHP supports it. The latter might produce better results for some sites, especially if Tidy is not available on your server, however, it is slower than libxml.

    + = 0) { + echo '

    HTML5-PHP can be used on this server.

    '; + } else { + echo '

    You need at least PHP 5.3 to be able to use HTML5-PHP.

    '; + } + ?> +

    Language detection

    Full-Text RSS can detect the language of each article processed. This occurs using Text_LanguageDetect or PHP-CLD (if available).

    @@ -148,6 +174,7 @@ if (!defined('_FF_FTR_INDEX')) { @@ -196,6 +223,301 @@ if (!defined('_FF_FTR_INDEX')) {

    Thank you!

    Thanks for downloading and setting up Full-Text RSS. This software is developed and maintained by FiveFilters.org. If you find it useful, but have not purchased this from us, please consider supporting us by purchasing from FiveFilters.org.

    +
    + + + +
    + +

    Request and Response

    + +

    The details on this page are mainly intended for developers who'd like to use Full-Text RSS for article extraction and feed conversion. + News enthusiasts who simply want to subscribe to a full-text feed in their news reading application can safely ignore the details here and use the form above.

    + +

    This page describes the two endpoints offered by Full-Text RSS: Article Extraction and Feed Conversion. If you've restricted access to Full-Text RSS, the final section on API keys will tell you how to pass your key along in the request.

    + +
    +

    1. Article Extraction

    +

    To extract article content from a web page and get a simple JSON response, use the following endpoint:

    + + +

    Request Parameters

    + +

    When making HTTP requests, you can pass the following parameters to extract.php in a GET or POST request.

    +

    Note: for many of these parameters, the configuration file will ultimately determine if and how they can be used.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterValueDescription
    urlstring (URL)This is the only required parameter. It should be the URL to a standard HTML page. You can omit the 'http://' prefix if you like.
    inputhtmlstring (HTML)If you already have the HTML, you can pass it here. We will not make any HTTP requests for the content if this parameter is used. Note: The input HTML should be UTF-8 encoded. And you will still need to give us the URL associated with the content (the URL may determine how the content is extracted, if we have extraction rules associated with it).
    content0, 1 (default)If set to 0, the extracted content will not be included in the output.
    linkspreserve (default), footnotes, removeLinks can either be preserved, made into footnotes, or removed. None of these options affect the link text, only the hyperlink itself.
    xss0, 1 (default)

    Use this to enable/disable XSS filtering. It is enabled by default, but if your application/framework/CMS already filters HTML for XSS vulnerabilities, you can disable XSS filtering here.

    +

    If enabled, we'll pass retrieved HTML content through htmLawed (safe flag on and style attributes denied). Note: when enabled this will remove certain elements you may want to preserve, such as iframes.

    lang0, 1 (default), 2, 3

    Language detection. If you'd like Full-Text RSS to find the language of the articles it processes, you can use one of the following values:

    +
    +
    0
    Ignore language
    +
    1
    Use article metadata (e.g. HTML lang attribute) (Default value)
    +
    2
    As above, but guess the language if it's not specified.
    +
    3
    Always guess the language, whether it's specified or not.
    +
    +
    debug[no value], rawhtml, parsedhtml

    If this parameter is present, Full-Text RSS will output the steps it is taking behind the scenes to help you debug problems.

    +

    If the parameter value is rawhtml, Full-Text RSS will output the HTTP response (headers and body) of the first response after redirects.

    +

    If the parameter value is parsedhtml, Full-Text RSS will output the reconstructed HTML (after its own parsing). This version is what the extraction rules are applied to, and it may differ from the original (rawhtml) output. If your extraction rules are not picking out any elements, this will likely help identify the problem.

    +

    Note: Full-Text RSS will stop execution after HTML output if one of the last two parameter values are passed. Otherwise it will continue showing debug output until the end.

    parserhtml5php, libxmlThe default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter.
    proxy0, 1, string (proxy name)This parameter has no effect if proxy servers have not been entered in the config file. If they have been entered and enabled, you can pass the following values: 0 to disable proxy use (uses direct connection). 1 for default proxy behaviour (whatever is set in the config), or a string to identify a specific proxy server (has to match the name given to the proxy in the config file).
    + + +

    Response (example)

    +

    Simple JSON output containing extracted article title, content, and more. It was produced from the following input URL: http://chomsky.info/articles/20131105.htm

    + + {
        "title": "De-Americanizing the World",
        "excerpt": "During the latest episode of the Washington farce that has astonish…",
        "date": null,
        "author": "Noam Chomsky",
        "language": "en",
        "url": "http://chomsky.info/articles/20131105.htm",
        "effective_url": "http://chomsky.info/articles/20131105.htm",
        "content": "<p>During the latest episode of the Washington farce that has aston…"
    }
    +

    Note: For brevity the output above is truncated.

    + +
    +

    2. Feed Conversion

    +

    To transform a partial feed to a full-text feed, pass the URL (encoded) in the querystring to the following URL:

    + + +

    All the parameters in the form at the top of this page can be passed in this way. Examine the URL in the address bar after you click 'Create Feed' to see the values.

    + +

    Request Parameters

    + +

    When making HTTP requests, you can pass the following parameters to makefulltextfeed.php in a GET request. Most of these parameters have default values suitable for news enthusiasts who simply want to subscribe to a full-text feed in their news reading application. If that's what you're doing, you can safely ignore the details here. For developers, or others who need more control over the output produced by Full-Text RSS, this section should give you an idea of what you can do.

    +

    We do not provide form fields for all of these parameters, but you can modify the URL in your browser after clicking 'Create Feed' to use them.

    +

    Note: for many of these parameters, the configuration file will ultimately determine if and how they can be used.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterValueDescription
    urlstring (URL)This is the only required parameter. It should be the URL to a partial feed or a standard HTML page. You can omit the 'http://' prefix if you like.
    formatrss (default), jsonThe default Full-Text RSS output is RSS. The only other valid output format is JSON. To get JSON output, pass format=json in the querystring. Exclude it from the URL (or set it to ‘rss’) if you’d like RSS.
    summary0 (default), 1If set to 1, an excerpt will be included for each item in the output.
    content0, 1 (default)If set to 0, the extracted content will not be included in the output.
    linkspreserve (default), footnotes, removeLinks can either be preserved, made into footnotes, or removed. None of these options affect the link text, only the hyperlink itself.
    exc0 (default), 1If Full-Text RSS fails to extract the article body, the generated feed item will include a message saying extraction failed followed by the original item description (if present in the original feed). You ask Full-Text RSS to remove such items from the generated feed completely by passing 1 in this parameter.
    html0 (default), 1

    Treat input source as HTML (or parse-as-html-first mode). To enable, pass html=1 in the querystring. If enabled, Full-Text RSS will not attempt to parse the response as a feed. This increases performance slightly and should be used if you know that the URL is not a feed.

    + +

    Note: If excluded, or set to 0, Full-Text RSS first tries to parse the server's response as a feed, and only if it fails to parse as a feed will it revert to HTML parsing. In the default parse-as-feed-first mode, Full-Text RSS will identify itself as PHP first and only if a valid feed is returned will it identify itself as a browser in subsequent requests to fetch the feed items. In parse-as-html-first mode, Full-Text RSS will identify itself as a browser from the very first request.

    xss0 (default), 1

    Use this to enable XSS filtering. We have not enabled this by default because we assume the majority of our users do not display the HTML retrieved by Full-Text RSS in a web page without further processing. If you subscribe to our generated feeds in your news reader application, it should, if it's good software, already filter the resulting HTML for XSS attacks, making it redundant for Full-Text RSS do the same. Similarly with frameworks/CMSs which display feed content - the content should be treated like any other user-submitted content.

    + +

    If you are writing an application yourself which is processing feeds generated by Full-Text RSS, you can either filter the HTML yourself to remove potential XSS attacks or enable this option. This might be useful if you are processing our generated feeds with JavaScript on the client side - although there's client side xss filtering available too.

    + +

    If enabled, we'll pass retrieved HTML content through htmLawed (safe flag on and style attributes denied). Note: if enabled this will also remove certain elements you may want to preserve, such as iframes.

    callbackstringThis is for JSONP use. If you're requesting JSON output, you can also specify a callback function (Javascript client-side function) to receive the Full-Text RSS JSON output.
    lang0, 1 (default), 2, 3

    Language detection. If you'd like Full-Text RSS to find the language of the articles it processes, you can use one of the following values:

    +
    +
    0
    Ignore language
    +
    1
    Use article metadata (e.g. HTML lang attribute) or feed metadata. (Default value)
    +
    2
    As above, but guess the language if it's not specified.
    +
    3
    Always guess the language, whether it's specified or not.
    +
    +

    If language detection is enabled and a match is found, the language code will be returned in the <dc:language> element inside the <item> element.

    +
    debug[no value], rawhtml, parsedhtml

    If this parameter is present, Full-Text RSS will output the steps it is taking behind the scenes to help you debug problems.

    +

    If the parameter value is rawhtml, Full-Text RSS will output the HTTP response (headers and body) of the first response after redirects.

    +

    If the parameter value is parsedhtml, Full-Text RSS will output the reconstructed HTML (after its own parsing). This version is what the extraction rules are applied to, and it may differ from the original (rawhtml) output. If your extraction rules are not picking out any elements, this will likely help identify the problem.

    +

    Note: Full-Text RSS will stop execution after HTML output if one of the last two parameter values are passed. Otherwise it will continue showing debug output until the end.

    parserhtml5php, libxmlThe default parser is libxml as it's the fastest. HTML5-PHP is an HTML5 parser implemented in PHP. It's slower than libxml, but can often produce better results. You can request HTML5-PHP be used as the parser in a site-specific config file (to ensure it gets used for all URLs for that site), or explicitly via this request parameter.
    proxy0, 1, string (proxy name)This parameter has no effect if proxy servers have not been entered in the config file. If they have been entered and enabled, you can pass the following values: 0 to disable proxy use (uses direct connection). 1 for default proxy behaviour (whatever is set in the config), or a string to identify a specific proxy server (has to match the name given to the proxy in the config file).
    + +

    Feed-only parameters — These parameters only apply to web feeds. They have no effect when the input URL points to a web page.

    + + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterValueDescription
    use_extracted_title[no value]By default, if the input URL points to a feed, item titles in the generated feed will not be changed - we assume item titles in feeds are not truncated. If you'd like them to be replaced with titles Full-Text RSS extracts, use this parameter in the request (the value does not matter). To enable/disable this for for all feeds, see the config file - specifically $options->favour_feed_titles
    maxnumberThe maximum number of feed items to process. (The default and upper limit will be found in the configuration file.)
    + +

    Response (example)

    +

    JSON output produced for the BBC feed http://feeds.bbci.co.uk/news/rss.xml. You can also request regular RSS.

    + + {
        "rss": {
            "@attributes": {
                "version": "2.0"
            }
    ,
            "channel": {
                "title": "BBC News - Home",
                "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                "description": "The latest stories from the Home section of the BBC News web site.",
                "ttl": 15,
                "image": {
                    "title": "BBC News - Home",
                    "link": "http://www.bbc.co.uk/news/#sa-ns_mchannel=rss&amp;ns_source=PublicR…",
                    "url": "http://news.bbcimg.co.uk/nol/shared/img/bbc_news_120x60.gif"
                }
    ,
                "item": [
                    {
                        "title": "Russia's Putin visits annexed Crimea",
                        "link": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-europe-27344029#sa-ns_mchannel=rss&…",
                        "description": "President Putin: \"[Crimeans have] proved their loyalty to a histor…",
                        "content_encoded": "<!-- Adding hypertab -->&#13;\n&#13;\n&#13;\n<!-- end of hypertab -…",
                        "pubDate": "Fri, 09 May 2014 15:02:04 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-europe-27344029",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751301_ycst2i…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74751000/jpg/_74751302_ycst2i…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Harris 'assaulted daughter's friend'",
                        "link": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&ns_source=…",
                        "guid": "http://www.bbc.co.uk/news/uk-27340134#sa-ns_mchannel=rss&amp;ns_sou…",
                        "description": "Rolf Harris arrives at court flanked by his wife and daughter Rolf …",
                        "content_encoded": "<!-- Embedding the video player -->&#13;\n<!-- This is the embedd…",
                        "pubDate": "Fri, 09 May 2014 15:21:52 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/uk-27340134",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740642_hi0221…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74740000/jpg/_74740643_hi0221…"
                                }

                            }

                        ]

                    }
    ,
                    {
                        "title": "Nigeria 'ignored' school warning",
                        "link": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "guid": "http://www.bbc.co.uk/news/world-africa-27344863#sa-ns_mchannel=rss&…",
                        "description": "Nigeria's military had advance warning of the attack on a school at…",
                        "content_encoded": "<div class=\"caption full-width\">&#13;\n <img src=\"http://news.b…",
                        "pubDate": "Fri, 09 May 2014 15:48:34 +0000",
                        "dc_language": "en-gb",
                        "dc_format": "text/html",
                        "dc_identifier": "http://www.bbc.co.uk/news/world-africa-27344863",
                        "media_thumbnail": [
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749855_747495…"
                                }

                            }
    ,
                            {
                                "@attributes": {
                                    "url": "http://news.bbcimg.co.uk/media/images/74749000/jpg/_74749856_747495…"
                                }

                            }

                        ]

                    }

                ]

            }

        }

    }
    +

    Note: For brevity the output above is truncated.

    + +
    +

    API Keys

    +

    To restrict access to your copy of Full-Text RSS, you can specify API keys in the config file.

    +

    Note: Full-text feeds produced by Full-Text RSS are intended to be publically accessible to work with feed readers. As such, the API key should not appear in the final URL for feeds.

    + + + + + + + + + + + + + + + + + + + + + + + +
    ParameterValueDescription
    keystring or number

    This parameter has two functions.

    If you're calling Full-Text RSS programattically, it's better to use this parameter to provide the API key index number together with the hash parameter (see below) so that the actual API key does not get sent in the HTTP request.

    If you pass the actual API key in this parameter, the hash parameter is not required. If you pass the actual API key to makefulltextfeed.php, Full-Text RSS will find the index number and generate the hash value automatically and redirect to a new URL to hide the API key. If you'd like to link to a generated feed publically while protecting your API key, make sure you copy and paste the URL that results after the redirect.

    If you've configured Full-Text RSS to require a key, an invalid key will result in an error message.

    hashstringA SHA-1 hash value of the API key (actual key, not index number) and the URL supplied in the url parameter, concatenated. This parameter must be passed along with the API key's index number using the key parameter (see above). In PHP, for example: $hash = sha1($api_key.$url);
    + + +
    @@ -224,14 +546,6 @@ if (!defined('_FF_FTR_INDEX')) {

    If you're not the owner of this site (ie. you're not hosting this yourself), you do not have to rely on an external service if you don't want to. You can download your own copy of Full-Text RSS under the AGPL license.

    -

    URL Construction

    -

    To extract content from a web page or to transform an existing partial feed to full text, pass the URL (encoded) in the querystring to the following URL:

    - - -

    All the parameters in the form above can be passed in this way. Examine the URL in the address bar after you click 'Create Feed' to see the values.

    -

    Software Components

    Full-Text RSS is written in PHP and relies on the following primary components:

    Depending on your configuration, these secondary components may also be used:

    diff --git a/libraries/content-extractor/ContentExtractor.php b/libraries/content-extractor/ContentExtractor.php index 21e693e..77224c9 100644 --- a/libraries/content-extractor/ContentExtractor.php +++ b/libraries/content-extractor/ContentExtractor.php @@ -5,10 +5,10 @@ * Uses patterns specified in site config files and auto detection (hNews/PHP Readability) * to extract content from HTML files. * - * @version 1.0 - * @date 2013-02-05 + * @version 1.1 + * @date 2014-03-28 * @author Keyvan Minoukadeh - * @copyright 2013 Keyvan Minoukadeh + * @copyright 2014 Keyvan Minoukadeh * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 */ @@ -40,7 +40,9 @@ class ContentExtractor protected $body; protected $success = false; protected $nextPageUrl; - public $allowedParsers = array('libxml', 'html5lib'); + public $allowedParsers = array('libxml', 'html5php'); + public $defaultParser = 'libxml'; + public $parserOverride = null; public $fingerprints = array(); public $readability; public $debug = false; @@ -184,10 +186,18 @@ class ContentExtractor } // load and parse html - $_parser = $this->config->parser(); + if ($this->parserOverride) { + // from querystring: &parser=xxx + $_parser = $this->parserOverride; + } else { + // from site config file: parser: xxx + $_parser = $this->config->parser(); + } + // for backword compatibility... + if ($_parser == 'html5lib') $_parser = 'html5php'; if (!in_array($_parser, $this->allowedParsers)) { - $this->debug("HTML parser $_parser not listed, using libxml instead"); - $_parser = 'libxml'; + $this->debug("HTML parser $_parser not listed, using ".$this->defaultParser." instead"); + $_parser = $this->defaultParser; } $this->debug("Attempting to parse HTML with $_parser"); $this->readability = new Readability($html, $url, $_parser); @@ -310,7 +320,9 @@ class ContentExtractor if ($elems && $elems->length > 0) { $this->debug('Stripping '.$elems->length.' elements (strip)'); for ($i=$elems->length-1; $i >= 0; $i--) { - $elems->item($i)->parentNode->removeChild($elems->item($i)); + if ($elems->item($i)->parentNode) { + $elems->item($i)->parentNode->removeChild($elems->item($i)); + } } } } @@ -456,7 +468,7 @@ class ContentExtractor if ($detect_date) { // check for time element with pubdate attribute - $elems = @$xpath->query(".//time[@pubdate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry); + $elems = @$xpath->query(".//time[@pubdate or @pubDate] | .//abbr[contains(concat(' ',normalize-space(@class),' '),' published ')]", $hentry); if ($elems && $elems->length > 0) { $this->date = strtotime(trim($elems->item(0)->textContent)); // remove date from document @@ -572,6 +584,55 @@ class ContentExtractor $detect_body = false; } } + + // check for elements marked with itemprop="articleBody" (from Schema.org) + if ($detect_body) { + $elems = @$xpath->query("//*[@itemprop='articleBody']", $this->readability->dom); + if ($elems && $elems->length > 0) { + $this->debug('body found (Schema.org itemprop="articleBody")'); + if ($elems->length == 1) { + // what if it's empty? (content placed outside an empty itemprop='articleBody' element) + $e = $elems->item(0); + if (($e->tagName == 'img') || (trim($e->textContent) != '')) { + $this->body = $elems->item(0); + // prune (clean up elements that may not be content) + if ($this->config->prune()) { + $this->debug('Pruning content'); + $this->readability->prepArticle($this->body); + } + $detect_body = false; + } else { + $this->debug('Schema.org: skipping itemprop="articleBody" - appears not to contain content'); + } + unset($e); + } else { + $this->body = $this->readability->dom->createElement('div'); + $this->debug($elems->length.' itemprop="articleBody" elems found'); + foreach ($elems as $elem) { + if (!isset($elem->parentNode)) continue; + $isDescendant = false; + foreach ($this->body->childNodes as $parent) { + if ($this->isDescendant($parent, $elem)) { + $isDescendant = true; + break; + } + } + if ($isDescendant) { + $this->debug('Element is child of another body element, skipping.'); + } else { + // prune (clean up elements that may not be content) + if ($this->config->prune()) { + $this->debug('Pruning content'); + $this->readability->prepArticle($elem); + } + $this->debug('Element added to body'); + $this->body->appendChild($elem); + } + } + $detect_body = false; + } + } + } // Find author in rel="author" marked element // We only use this if there's exactly one. @@ -594,7 +655,7 @@ class ContentExtractor // For the same reason given above, we only use this // if there's exactly one element. if ($detect_date) { - $elems = @$xpath->query("//time[@pubdate]", $this->readability->dom); + $elems = @$xpath->query("//time[@pubdate or @pubDate]", $this->readability->dom); if ($elems && $elems->length == 1) { $this->date = strtotime(trim($elems->item(0)->textContent)); // remove date from document diff --git a/libraries/content-extractor/SiteConfig.php b/libraries/content-extractor/SiteConfig.php index 1f6a760..8675ee2 100644 --- a/libraries/content-extractor/SiteConfig.php +++ b/libraries/content-extractor/SiteConfig.php @@ -159,6 +159,7 @@ class SiteConfig $key = strtolower($key); if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); if ($config->cache_key) $key = $config->cache_key; + $key .= '.'.self::get_key_suffix(); self::$config_cache[$key] = $config; if (self::$apc && $use_apc) { self::debug("Adding site config to APC cache with key sc.$key"); @@ -169,6 +170,7 @@ class SiteConfig public static function is_cached($key) { $key = strtolower($key); + $key .= '.'.self::get_key_suffix(); if (substr($key, 0, 4) == 'www.') $key = substr($key, 4); if (array_key_exists($key, self::$config_cache)) { return true; @@ -198,6 +200,16 @@ class SiteConfig } } + // This is used to make sure that when a different primary folder is chosen + // The key for the cached result includes that folder choice. + // Otherwise, a subsequent request choosing a different folder + // could return the wrong cached config. + public static function get_key_suffix() { + $key_suffix = basename(self::$config_path); + if ($key_suffix === 'custom') $key_suffix = ''; + return $key_suffix; + } + // returns SiteConfig instance if an appropriate one is found, false otherwise // if $exact_host_match is true, we will not look for wildcard config matches // by default if host is 'test.example.org' we will look for and load '.example.org.txt' if it exists @@ -216,13 +228,20 @@ class SiteConfig } } + // Which primary folder should we look inside? + // If it's not the default ('custom'), we need + // a key suffix to distinguish site config fules + // held in this folder from those in other folders. + $key_suffix = self::get_key_suffix(); + // look for site config file in primary folder self::debug(". looking for site config for $host in primary folder"); foreach ($try as $h) { - if (array_key_exists($h, self::$config_cache)) { + $h_key = "$h.$key_suffix"; + if (array_key_exists($h_key, self::$config_cache)) { self::debug("... site config for $h already loaded in this request"); - return self::$config_cache[$h]; - } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h"))) { + return self::$config_cache[$h_key]; + } elseif (self::$apc && ($sconfig = apc_fetch("sc.$h_key"))) { self::debug("... site config for $h in APC cache"); return $sconfig; } elseif (file_exists(self::$config_path."/$h.txt")) { diff --git a/libraries/feedwriter/FeedWriter.php b/libraries/feedwriter/FeedWriter.php index 0cd1ea0..f9ea03b 100644 --- a/libraries/feedwriter/FeedWriter.php +++ b/libraries/feedwriter/FeedWriter.php @@ -6,7 +6,7 @@ define('JSONP', 3, true); /** * Univarsel Feed Writer class * - * Genarate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) + * Generate RSS2 or JSON (original: RSS 1.0, RSS2.0 and ATOM Feed) * * Modified for FiveFilters.org's Full-Text RSS project * to allow for inclusion of hubs, JSON output. @@ -26,6 +26,7 @@ define('JSONP', 3, true); private $CDATAEncoding = array(); // The tag names which have to encoded as CDATA private $xsl = null; // stylesheet to render RSS (used by Chrome) private $json = null; // JSON object + private $simplejson = false; private $version = null; @@ -52,6 +53,10 @@ define('JSONP', 3, true); // Start # public functions --------------------------------------------- + public function enableSimpleJson($enable=true) { + $this->simplejson = $enable; + } + /** * Set a channel element * @access public @@ -82,12 +87,12 @@ define('JSONP', 3, true); } /** - * Genarate the actual RSS/JSON file + * Generate the actual RSS/JSON file * * @access public * @return void */ - public function genarateFeed() + public function generateFeed() { if ($this->version == RSS2) { header('Content-type: text/xml; charset=UTF-8'); @@ -106,7 +111,46 @@ define('JSONP', 3, true); $this->printItems(); $this->printTale(); if ($this->version == JSON || $this->version == JSONP) { - echo json_encode($this->json); + if (!$this->simplejson) { + echo json_encode($this->json); + } else { + $simplejson = new stdClass(); + if (is_array($this->json->rss['channel']->item)) { + // get first item + $jsonitem = $this->json->rss['channel']->item[0]; + } else { + $jsonitem = $this->json->rss['channel']->item; + } + // defaults + $simplejson->title = null; + $simplejson->excerpt = null; + $simplejson->date = null; + $simplejson->author = null; + $simplejson->language = null; + $simplejson->url = null; + $simplejson->effective_url = null; + $simplejson->content = null; + // actual values + $simplejson->url = $jsonitem->link; + $simplejson->effective_url = $jsonitem->dc_identifier; + if (isset($jsonitem->title)) $simplejson->title = $jsonitem->title; + if (isset($jsonitem->dc_language)) $simplejson->language = $jsonitem->dc_language; + if (isset($jsonitem->content_encoded)) { + $simplejson->content = $jsonitem->content_encoded; + if (isset($jsonitem->description)) { + $simplejson->excerpt = $jsonitem->description; + } + } else { + $simplejson->content = $jsonitem->description; + } + if (isset($jsonitem->dc_creator)) { + $simplejson->author = $jsonitem->dc_creator; + } + if (isset($jsonitem->pubDate)) { + $simplejson->date = gmdate(DATE_ATOM, strtotime($jsonitem->pubDate)); + } + echo json_encode($simplejson); + } } } @@ -175,7 +219,19 @@ define('JSONP', 3, true); public function setXsl($xsl) { $this->xsl = $xsl; - } + } + + /** + * Set TTL + * + * @access public + * @param int time to live (minutes) + * @return void + */ + public function setTtl($ttl) + { + $this->setChannelElement('ttl', (int)$ttl); + } /** * Set self URL @@ -196,10 +252,9 @@ define('JSONP', 3, true); * @param srting value of 'description' channel tag * @return void */ - public function setDescription($desciption) - { - $tag = ($this->version == ATOM)? 'subtitle' : 'description'; - $this->setChannelElement($tag, $desciption); + public function setDescription($description) + { + $this->setChannelElement('description', $description); } /** @@ -404,9 +459,9 @@ define('JSONP', 3, true); echo $this->endItem(); if ($this->version == JSON || $this->version == JSONP) { if (count($this->items) > 1) { - $this->json->rss['channel']->item[] = $json_item; + $this->json->rss['channel']->item[] = (object)$json_item; } else { - $this->json->rss['channel']->item = $json_item; + $this->json->rss['channel']->item = (object)$json_item; } } } diff --git a/libraries/htmLawed/htmLawed.php b/libraries/htmLawed/htmLawed.php index 9a62aca..032ef79 100644 --- a/libraries/htmLawed/htmLawed.php +++ b/libraries/htmLawed/htmLawed.php @@ -1,8 +1,8 @@ (`.|[^"])*)"/sme', 'substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), "$0"), 1, -1)', trim($t))); +$t = str_replace(array("\t", "\r", "\n", ' '), '', preg_replace_callback('/"(?>(`.|[^"])*)"/sm', create_function('$m', 'return substr(str_replace(array(";", "|", "~", " ", ",", "/", "(", ")", \'`"\'), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\""), $m[0]), 1, -1);'), trim($t))); for($i = count(($t = explode(';', $t))); --$i>=0;){ $w = $t[$i]; if(empty($w) or ($e = strpos($w, '=')) === false or !strlen(($a = substr($w, $e+1)))){continue;} @@ -647,7 +647,7 @@ return ''; public static function hl_tidy($t, $w, $p){ // Tidy/compact HTM if(strpos(' pre,script,textarea', "$p,")){return $t;} -$t = str_replace(' ]*(?)\s+`', '`\s+`', '`(<\w[^>]*(?) `'), array(' $1', ' ', '$1'), preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t))); +$t = preg_replace('`\s+`', ' ', preg_replace_callback(array('`(<(!\[CDATA\[))(.+?)(\]\]>)`sm', '`(<(!--))(.+?)(-->)`sm', '`(<(pre|script|textarea)[^>]*?>)(.+?)()`sm'), create_function('$m', 'return $m[1]. str_replace(array("<", ">", "\n", "\r", "\t", " "), array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), $m[3]). $m[4];'), $t)); if(($w = strtolower($w)) == -1){ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array('<', '>', "\n", "\r", "\t", ' '), $t); } @@ -655,7 +655,7 @@ $s = strpos(" $w", 't') ? "\t" : ' '; $s = preg_match('`\d`', $w, $m) ? str_repeat($s, $m[0]) : str_repeat($s, ($s == "\t" ? 1 : 2)); $N = preg_match('`[ts]([1-9])`', $w, $m) ? $m[1] : 0; $a = array('br'=>1); -$b = array('button'=>1, 'input'=>1, 'option'=>1); +$b = array('button'=>1, 'input'=>1, 'option'=>1, 'param'=>1); $c = array('caption'=>1, 'dd'=>1, 'dt'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'isindex'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'object'=>1, 'p'=>1, 'pre'=>1, 'td'=>1, 'textarea'=>1, 'th'=>1); $d = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'colgroup'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'fieldset'=>1, 'form'=>1, 'hr'=>1, 'iframe'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1); $T = explode('<', $t); @@ -677,20 +677,20 @@ while($X){ else{++$N; ob_end_clean(); continue 2;} } else{echo "\n", str_repeat($s, $n), "$e\n", str_repeat($s, ($x != 1 ? ++$n : $n));} - echo ltrim($r); continue; + echo $r; continue; } $f = "\n". str_repeat($s, $n); if(isset($c[$y])){ - if(!$x){echo $e, $f, ltrim($r);} + if(!$x){echo $e, $f, $r;} else{echo $f, $e, $r;} }elseif(isset($b[$y])){echo $f, $e, $r; - }elseif(isset($a[$y])){echo $e, $f, ltrim($r); - }elseif(!$y){echo $f, $e, $f, ltrim($r); + }elseif(isset($a[$y])){echo $e, $f, $r; + }elseif(!$y){echo $f, $e, $f, $r; }else{echo $e, $r;} } $X = 0; } -$t = preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents()); +$t = str_replace(array("\n ", " \n"), "\n", preg_replace('`[\n]\s*?[\n]+`', "\n", ob_get_contents())); ob_end_clean(); if(($l = strpos(" $w", 'r') ? (strpos(" $w", 'n') ? "\r\n" : "\r") : 0)){ $t = str_replace("\n", $l, $t); @@ -701,7 +701,7 @@ return str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05", "\x07"), array( public static function hl_version(){ // rel -return '1.1.14'; +return '1.1.17'; // eof } diff --git a/libraries/html5/Data.php b/libraries/html5/Data.php deleted file mode 100644 index 497345f..0000000 --- a/libraries/html5/Data.php +++ /dev/null @@ -1,114 +0,0 @@ - 0xFFFD, // REPLACEMENT CHARACTER - 0x0D => 0x000A, // LINE FEED (LF) - 0x80 => 0x20AC, // EURO SIGN ('€') - 0x81 => 0x0081, // - 0x82 => 0x201A, // SINGLE LOW-9 QUOTATION MARK ('‚') - 0x83 => 0x0192, // LATIN SMALL LETTER F WITH HOOK ('ƒ') - 0x84 => 0x201E, // DOUBLE LOW-9 QUOTATION MARK ('„') - 0x85 => 0x2026, // HORIZONTAL ELLIPSIS ('…') - 0x86 => 0x2020, // DAGGER ('†') - 0x87 => 0x2021, // DOUBLE DAGGER ('‡') - 0x88 => 0x02C6, // MODIFIER LETTER CIRCUMFLEX ACCENT ('ˆ') - 0x89 => 0x2030, // PER MILLE SIGN ('‰') - 0x8A => 0x0160, // LATIN CAPITAL LETTER S WITH CARON ('Š') - 0x8B => 0x2039, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK ('‹') - 0x8C => 0x0152, // LATIN CAPITAL LIGATURE OE ('Œ') - 0x8D => 0x008D, // - 0x8E => 0x017D, // LATIN CAPITAL LETTER Z WITH CARON ('Ž') - 0x8F => 0x008F, // - 0x90 => 0x0090, // - 0x91 => 0x2018, // LEFT SINGLE QUOTATION MARK ('‘') - 0x92 => 0x2019, // RIGHT SINGLE QUOTATION MARK ('’') - 0x93 => 0x201C, // LEFT DOUBLE QUOTATION MARK ('“') - 0x94 => 0x201D, // RIGHT DOUBLE QUOTATION MARK ('”') - 0x95 => 0x2022, // BULLET ('•') - 0x96 => 0x2013, // EN DASH ('–') - 0x97 => 0x2014, // EM DASH ('—') - 0x98 => 0x02DC, // SMALL TILDE ('˜') - 0x99 => 0x2122, // TRADE MARK SIGN ('™') - 0x9A => 0x0161, // LATIN SMALL LETTER S WITH CARON ('š') - 0x9B => 0x203A, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK ('›') - 0x9C => 0x0153, // LATIN SMALL LIGATURE OE ('œ') - 0x9D => 0x009D, // - 0x9E => 0x017E, // LATIN SMALL LETTER Z WITH CARON ('ž') - 0x9F => 0x0178, // LATIN CAPITAL LETTER Y WITH DIAERESIS ('Ÿ') - ); - - protected static $namedCharacterReferences; - - protected static $namedCharacterReferenceMaxLength; - - /** - * Returns the "real" Unicode codepoint of a malformed character - * reference. - */ - public static function getRealCodepoint($ref) { - if (!isset(self::$realCodepointTable[$ref])) return false; - else return self::$realCodepointTable[$ref]; - } - - public static function getNamedCharacterReferences() { - if (!self::$namedCharacterReferences) { - self::$namedCharacterReferences = unserialize( - file_get_contents(dirname(__FILE__) . '/named-character-references.ser')); - } - return self::$namedCharacterReferences; - } - - /** - * Converts a Unicode codepoint to sequence of UTF-8 bytes. - * @note Shamelessly stolen from HTML Purifier, which is also - * shamelessly stolen from Feyd (which is in public domain). - */ - public static function utf8chr($code) { - /* We don't care: we live dangerously - * if($code > 0x10FFFF or $code < 0x0 or - ($code >= 0xD800 and $code <= 0xDFFF) ) { - // bits are set outside the "valid" range as defined - // by UNICODE 4.1.0 - return "\xEF\xBF\xBD"; - }*/ - - $x = $y = $z = $w = 0; - if ($code < 0x80) { - // regular ASCII character - $x = $code; - } else { - // set up bits for UTF-8 - $x = ($code & 0x3F) | 0x80; - if ($code < 0x800) { - $y = (($code & 0x7FF) >> 6) | 0xC0; - } else { - $y = (($code & 0xFC0) >> 6) | 0x80; - if($code < 0x10000) { - $z = (($code >> 12) & 0x0F) | 0xE0; - } else { - $z = (($code >> 12) & 0x3F) | 0x80; - $w = (($code >> 18) & 0x07) | 0xF0; - } - } - } - // set up the actual character - $ret = ''; - if($w) $ret .= chr($w); - if($z) $ret .= chr($z); - if($y) $ret .= chr($y); - $ret .= chr($x); - - return $ret; - } - -} diff --git a/libraries/html5/InputStream.php b/libraries/html5/InputStream.php deleted file mode 100644 index f98b427..0000000 --- a/libraries/html5/InputStream.php +++ /dev/null @@ -1,284 +0,0 @@ - - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*/ - -// Some conventions: -// /* */ indicates verbatim text from the HTML 5 specification -// // indicates regular comments - -class HTML5_InputStream { - /** - * The string data we're parsing. - */ - private $data; - - /** - * The current integer byte position we are in $data - */ - private $char; - - /** - * Length of $data; when $char === $data, we are at the end-of-file. - */ - private $EOF; - - /** - * Parse errors. - */ - public $errors = array(); - - /** - * @param $data Data to parse - */ - public function __construct($data) { - - /* Given an encoding, the bytes in the input stream must be - converted to Unicode characters for the tokeniser, as - described by the rules for that encoding, except that the - leading U+FEFF BYTE ORDER MARK character, if any, must not - be stripped by the encoding layer (it is stripped by the rule below). - - Bytes or sequences of bytes in the original byte stream that - could not be converted to Unicode characters must be converted - to U+FFFD REPLACEMENT CHARACTER code points. */ - - // XXX currently assuming input data is UTF-8; once we - // build encoding detection this will no longer be the case - // - // We previously had an mbstring implementation here, but that - // implementation is heavily non-conforming, so it's been - // omitted. - if (extension_loaded('iconv')) { - // non-conforming - $data = @iconv('UTF-8', 'UTF-8//IGNORE', $data); - } else { - // we can make a conforming native implementation - throw new Exception('Not implemented, please install mbstring or iconv'); - } - - /* One leading U+FEFF BYTE ORDER MARK character must be - ignored if any are present. */ - if (substr($data, 0, 3) === "\xEF\xBB\xBF") { - $data = substr($data, 3); - } - - /* All U+0000 NULL characters in the input must be replaced - by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such - characters is a parse error. */ - for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) { - $this->errors[] = array( - 'type' => HTML5_Tokenizer::PARSEERROR, - 'data' => 'null-character' - ); - } - /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED - (LF) characters are treated specially. Any CR characters - that are followed by LF characters must be removed, and any - CR characters not followed by LF characters must be converted - to LF characters. Thus, newlines in HTML DOMs are represented - by LF characters, and there are never any CR characters in the - input to the tokenization stage. */ - $data = str_replace( - array( - "\0", - "\r\n", - "\r" - ), - array( - "\xEF\xBF\xBD", - "\n", - "\n" - ), - $data - ); - - /* Any occurrences of any characters in the ranges U+0001 to - U+0008, U+000B, U+000E to U+001F, U+007F to U+009F, - U+D800 to U+DFFF , U+FDD0 to U+FDEF, and - characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF, - U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE, - U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF, - U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE, - U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and - U+10FFFF are parse errors. (These are all control characters - or permanently undefined Unicode characters.) */ - // Check PCRE is loaded. - if (extension_loaded('pcre')) { - $count = preg_match_all( - '/(?: - [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B, U+000E to U+001F and U+007F - | - \xC2[\x80-\x9F] # U+0080 to U+009F - | - \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF - | - \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF - | - \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF - | - [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16}) - )/x', - $data, - $matches - ); - for ($i = 0; $i < $count; $i++) { - $this->errors[] = array( - 'type' => HTML5_Tokenizer::PARSEERROR, - 'data' => 'invalid-codepoint' - ); - } - } else { - // XXX: Need non-PCRE impl, probably using substr_count - } - - $this->data = $data; - $this->char = 0; - $this->EOF = strlen($data); - } - - /** - * Returns the current line that the tokenizer is at. - */ - public function getCurrentLine() { - // Check the string isn't empty - if($this->EOF) { - // Add one to $this->char because we want the number for the next - // byte to be processed. - return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1; - } else { - // If the string is empty, we are on the first line (sorta). - return 1; - } - } - - /** - * Returns the current column of the current line that the tokenizer is at. - */ - public function getColumnOffset() { - // strrpos is weird, and the offset needs to be negative for what we - // want (i.e., the last \n before $this->char). This needs to not have - // one (to make it point to the next character, the one we want the - // position of) added to it because strrpos's behaviour includes the - // final offset byte. - $lastLine = strrpos($this->data, "\n", $this->char - 1 - strlen($this->data)); - - // However, for here we want the length up until the next byte to be - // processed, so add one to the current byte ($this->char). - if($lastLine !== false) { - $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine); - } else { - $findLengthOf = substr($this->data, 0, $this->char); - } - - // Get the length for the string we need. - if(extension_loaded('iconv')) { - return iconv_strlen($findLengthOf, 'utf-8'); - } elseif(extension_loaded('mbstring')) { - return mb_strlen($findLengthOf, 'utf-8'); - } elseif(extension_loaded('xml')) { - return strlen(utf8_decode($findLengthOf)); - } else { - $count = count_chars($findLengthOf); - // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range) - // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range) - return array_sum(array_slice($count, 0, 0x80)) + - array_sum(array_slice($count, 0xC2, 0x33)); - } - } - - /** - * Retrieve the currently consume character. - * @note This performs bounds checking - */ - public function char() { - return ($this->char++ < $this->EOF) - ? $this->data[$this->char - 1] - : false; - } - - /** - * Get all characters until EOF. - * @note This performs bounds checking - */ - public function remainingChars() { - if($this->char < $this->EOF) { - $data = substr($this->data, $this->char); - $this->char = $this->EOF; - return $data; - } else { - return false; - } - } - - /** - * Matches as far as possible until we reach a certain set of bytes - * and returns the matched substring. - * @param $bytes Bytes to match. - */ - public function charsUntil($bytes, $max = null) { - if ($this->char < $this->EOF) { - if ($max === 0 || $max) { - $len = strcspn($this->data, $bytes, $this->char, $max); - } else { - $len = strcspn($this->data, $bytes, $this->char); - } - $string = (string) substr($this->data, $this->char, $len); - $this->char += $len; - return $string; - } else { - return false; - } - } - - /** - * Matches as far as possible with a certain set of bytes - * and returns the matched substring. - * @param $bytes Bytes to match. - */ - public function charsWhile($bytes, $max = null) { - if ($this->char < $this->EOF) { - if ($max === 0 || $max) { - $len = strspn($this->data, $bytes, $this->char, $max); - } else { - $len = strspn($this->data, $bytes, $this->char); - } - $string = (string) substr($this->data, $this->char, $len); - $this->char += $len; - return $string; - } else { - return false; - } - } - - /** - * Unconsume one character. - */ - public function unget() { - if ($this->char <= $this->EOF) { - $this->char--; - } - } -} diff --git a/libraries/html5/Parser.php b/libraries/html5/Parser.php deleted file mode 100644 index 5f9ca56..0000000 --- a/libraries/html5/Parser.php +++ /dev/null @@ -1,36 +0,0 @@ -parse(); - return $tokenizer->save(); - } - /** - * Parses an HTML fragment. - * @param $text HTML text to parse - * @param $context String name of context element to pretend parsing is in. - * @param $builder Custom builder implementation - * @return Parsed HTML as DOMDocument - */ - static public function parseFragment($text, $context = null, $builder = null) { - $tokenizer = new HTML5_Tokenizer($text, $builder); - $tokenizer->parseFragment($context); - return $tokenizer->save(); - } -} diff --git a/libraries/html5/Tokenizer.php b/libraries/html5/Tokenizer.php deleted file mode 100644 index 0af0716..0000000 --- a/libraries/html5/Tokenizer.php +++ /dev/null @@ -1,2422 +0,0 @@ - -Copyright 2008 Edward Z. Yang -Copyright 2009 Geoffrey Sneddon - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*/ - -// Some conventions: -// /* */ indicates verbatim text from the HTML 5 specification -// // indicates regular comments - -// all flags are in hyphenated form - -class HTML5_Tokenizer { - /** - * Points to an InputStream object. - */ - protected $stream; - - /** - * Tree builder that the tokenizer emits token to. - */ - private $tree; - - /** - * Current content model we are parsing as. - */ - protected $content_model; - - /** - * Current token that is being built, but not yet emitted. Also - * is the last token emitted, if applicable. - */ - protected $token; - - // These are constants describing the content model - const PCDATA = 0; - const RCDATA = 1; - const CDATA = 2; - const PLAINTEXT = 3; - - // These are constants describing tokens - // XXX should probably be moved somewhere else, probably the - // HTML5 class. - const DOCTYPE = 0; - const STARTTAG = 1; - const ENDTAG = 2; - const COMMENT = 3; - const CHARACTER = 4; - const SPACECHARACTER = 5; - const EOF = 6; - const PARSEERROR = 7; - - // These are constants representing bunches of characters. - const ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'; - const UPPER_ALPHA = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - const LOWER_ALPHA = 'abcdefghijklmnopqrstuvwxyz'; - const DIGIT = '0123456789'; - const HEX = '0123456789ABCDEFabcdef'; - const WHITESPACE = "\t\n\x0c "; - - /** - * @param $data Data to parse - */ - public function __construct($data, $builder = null) { - $this->stream = new HTML5_InputStream($data); - if (!$builder) $this->tree = new HTML5_TreeBuilder; - else $this->tree = $builder; - $this->content_model = self::PCDATA; - } - - public function parseFragment($context = null) { - $this->tree->setupContext($context); - if ($this->tree->content_model) { - $this->content_model = $this->tree->content_model; - $this->tree->content_model = null; - } - $this->parse(); - } - - // XXX maybe convert this into an iterator? regardless, this function - // and the save function should go into a Parser facade of some sort - /** - * Performs the actual parsing of the document. - */ - public function parse() { - // Current state - $state = 'data'; - // This is used to avoid having to have look-behind in the data state. - $lastFourChars = ''; - /** - * Escape flag as specified by the HTML5 specification: "used to - * control the behavior of the tokeniser. It is either true or - * false, and initially must be set to the false state." - */ - $escape = false; - //echo "\n\n"; - while($state !== null) { - - /*echo $state . ' '; - switch ($this->content_model) { - case self::PCDATA: echo 'PCDATA'; break; - case self::RCDATA: echo 'RCDATA'; break; - case self::CDATA: echo 'CDATA'; break; - case self::PLAINTEXT: echo 'PLAINTEXT'; break; - } - if ($escape) echo " escape"; - echo "\n";*/ - - switch($state) { - case 'data': - - /* Consume the next input character */ - $char = $this->stream->char(); - $lastFourChars .= $char; - if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); - - // see below for meaning - $hyp_cond = - !$escape && - ( - $this->content_model === self::RCDATA || - $this->content_model === self::CDATA - ); - $amp_cond = - !$escape && - ( - $this->content_model === self::PCDATA || - $this->content_model === self::RCDATA - ); - $lt_cond = - $this->content_model === self::PCDATA || - ( - ( - $this->content_model === self::RCDATA || - $this->content_model === self::CDATA - ) && - !$escape - ); - $gt_cond = - $escape && - ( - $this->content_model === self::RCDATA || - $this->content_model === self::CDATA - ); - - if($char === '&' && $amp_cond) { - /* U+0026 AMPERSAND (&) - When the content model flag is set to one of the PCDATA or RCDATA - states and the escape flag is false: switch to the - character reference data state. Otherwise: treat it as per - the "anything else" entry below. */ - $state = 'character reference data'; - - } elseif( - $char === '-' && - $hyp_cond && - $lastFourChars === '' - ) { - /* If the content model flag is set to either the RCDATA state or - the CDATA state, and the escape flag is true, and the last three - characters in the input stream including this one are U+002D - HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"), - set the escape flag to false. */ - $escape = false; - - /* In any case, emit the input character as a character token. - Stay in the data state. */ - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => '>' - )); - // We do the "any case" part as part of "anything else". - - } elseif($char === false) { - /* EOF - Emit an end-of-file token. */ - $state = null; - $this->tree->emitToken(array( - 'type' => self::EOF - )); - - } elseif($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - // Directly after emitting a token you switch back to the "data - // state". At that point spaceCharacters are important so they are - // emitted separately. - $chars = $this->stream->charsWhile(self::WHITESPACE); - $this->emitToken(array( - 'type' => self::SPACECHARACTER, - 'data' => $char . $chars - )); - $lastFourChars .= $chars; - if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); - - } else { - /* Anything else - THIS IS AN OPTIMIZATION: Get as many character that - otherwise would also be treated as a character token and emit it - as a single character token. Stay in the data state. */ - - $mask = ''; - if ($hyp_cond) $mask .= '-'; - if ($amp_cond) $mask .= '&'; - if ($lt_cond) $mask .= '<'; - if ($gt_cond) $mask .= '>'; - - if ($mask === '') { - $chars = $this->stream->remainingChars(); - } else { - $chars = $this->stream->charsUntil($mask); - } - - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => $char . $chars - )); - - $lastFourChars .= $chars; - if (strlen($lastFourChars) > 4) $lastFourChars = substr($lastFourChars, -4); - - $state = 'data'; - } - break; - - case 'character reference data': - /* (This cannot happen if the content model flag - is set to the CDATA state.) */ - - /* Attempt to consume a character reference, with no - additional allowed character. */ - $entity = $this->consumeCharacterReference(); - - /* If nothing is returned, emit a U+0026 AMPERSAND - character token. Otherwise, emit the character token that - was returned. */ - // This is all done when consuming the character reference. - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => $entity - )); - - /* Finally, switch to the data state. */ - $state = 'data'; - break; - - case 'tag open': - $char = $this->stream->char(); - - switch($this->content_model) { - case self::RCDATA: - case self::CDATA: - /* Consume the next input character. If it is a - U+002F SOLIDUS (/) character, switch to the close - tag open state. Otherwise, emit a U+003C LESS-THAN - SIGN character token and reconsume the current input - character in the data state. */ - // We consumed above. - - if($char === '/') { - $state = 'close tag open'; - - } else { - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => '<' - )); - - $this->stream->unget(); - - $state = 'data'; - } - break; - - case self::PCDATA: - /* If the content model flag is set to the PCDATA state - Consume the next input character: */ - // We consumed above. - - if($char === '!') { - /* U+0021 EXCLAMATION MARK (!) - Switch to the markup declaration open state. */ - $state = 'markup declaration open'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the close tag open state. */ - $state = 'close tag open'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new start tag token, set its tag name to the lowercase - version of the input character (add 0x0020 to the character's code - point), then switch to the tag name state. (Don't emit the token - yet; further details will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::STARTTAG, - 'attr' => array() - ); - - $state = 'tag name'; - - } elseif('a' <= $char && $char <= 'z') { - /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new start tag token, set its tag name to the input - character, then switch to the tag name state. (Don't emit - the token yet; further details will be filled in before it - is emitted.) */ - $this->token = array( - 'name' => $char, - 'type' => self::STARTTAG, - 'attr' => array() - ); - - $state = 'tag name'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit a U+003C LESS-THAN SIGN character token and a - U+003E GREATER-THAN SIGN character token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-tag-name-but-got-right-bracket' - )); - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => '<>' - )); - - $state = 'data'; - - } elseif($char === '?') { - /* U+003F QUESTION MARK (?) - Parse error. Switch to the bogus comment state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-tag-name-but-got-question-mark' - )); - $this->token = array( - 'data' => '?', - 'type' => self::COMMENT - ); - $state = 'bogus comment'; - - } else { - /* Anything else - Parse error. Emit a U+003C LESS-THAN SIGN character token and - reconsume the current input character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-tag-name' - )); - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => '<' - )); - - $state = 'data'; - $this->stream->unget(); - } - break; - } - break; - - case 'close tag open': - if ( - $this->content_model === self::RCDATA || - $this->content_model === self::CDATA - ) { - /* If the content model flag is set to the RCDATA or CDATA - states... */ - $name = strtolower($this->stream->charsWhile(self::ALPHA)); - $following = $this->stream->char(); - $this->stream->unget(); - if ( - !$this->token || - $this->token['name'] !== $name || - $this->token['name'] === $name && !in_array($following, array("\x09", "\x0A", "\x0C", "\x20", "\x3E", "\x2F", false)) - ) { - /* if no start tag token has ever been emitted by this instance - of the tokenizer (fragment case), or, if the next few - characters do not match the tag name of the last start tag - token emitted (compared in an ASCII case-insensitive manner), - or if they do but they are not immediately followed by one of - the following characters: - - * U+0009 CHARACTER TABULATION - * U+000A LINE FEED (LF) - * U+000C FORM FEED (FF) - * U+0020 SPACE - * U+003E GREATER-THAN SIGN (>) - * U+002F SOLIDUS (/) - * EOF - - ...then emit a U+003C LESS-THAN SIGN character token, a - U+002F SOLIDUS character token, and switch to the data - state to process the next input character. */ - // XXX: Probably ought to replace in_array with $following === x ||... - - // We also need to emit $name now we've consumed that, as we - // know it'll just be emitted as a character token. - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => 'token = array( - 'name' => $name, - 'type' => self::ENDTAG - ); - - // Change to tag name state. - $state = 'tag name'; - } - } elseif ($this->content_model === self::PCDATA) { - /* Otherwise, if the content model flag is set to the PCDATA - state [...]: */ - $char = $this->stream->char(); - - if ('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z - Create a new end tag token, set its tag name to the lowercase version - of the input character (add 0x0020 to the character's code point), then - switch to the tag name state. (Don't emit the token yet; further details - will be filled in before it is emitted.) */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::ENDTAG - ); - - $state = 'tag name'; - - } elseif ('a' <= $char && $char <= 'z') { - /* U+0061 LATIN SMALL LETTER A through to U+007A LATIN SMALL LETTER Z - Create a new end tag token, set its tag name to the - input character, then switch to the tag name state. - (Don't emit the token yet; further details will be - filled in before it is emitted.) */ - $this->token = array( - 'name' => $char, - 'type' => self::ENDTAG - ); - - $state = 'tag name'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-closing-tag-but-got-right-bracket' - )); - $state = 'data'; - - } elseif($char === false) { - /* EOF - Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F - SOLIDUS character token. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-closing-tag-but-got-eof' - )); - $this->emitToken(array( - 'type' => self::CHARACTER, - 'data' => 'stream->unget(); - $state = 'data'; - - } else { - /* Parse error. Switch to the bogus comment state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-closing-tag-but-got-char' - )); - $this->token = array( - 'data' => $char, - 'type' => self::COMMENT - ); - $state = 'bogus comment'; - } - } - break; - - case 'tag name': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $state = 'before attribute name'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the self-closing start tag state. */ - $state = 'self-closing start tag'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input - character (add 0x0020 to the character's code point) to - the current tag token's tag name. Stay in the tag name state. */ - $chars = $this->stream->charsWhile(self::UPPER_ALPHA); - - $this->token['name'] .= strtolower($char . $chars); - $state = 'tag name'; - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-tag-name' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append the current input character to the current tag token's tag name. - Stay in the tag name state. */ - $chars = $this->stream->charsUntil("\t\n\x0C />" . self::UPPER_ALPHA); - - $this->token['name'] .= $char . $chars; - $state = 'tag name'; - } - break; - - case 'before attribute name': - /* Consume the next input character: */ - $char = $this->stream->char(); - - // this conditional is optimized, check bottom - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute name state. */ - $state = 'before attribute name'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the self-closing start tag state. */ - $state = 'self-closing start tag'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current - input character (add 0x0020 to the character's code - point), and its value to the empty string. Switch to the - attribute name state.*/ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => '' - ); - - $state = 'attribute name'; - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-attribute-name-but-got-eof' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003C LESS-THAN SIGN (<) - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" entry - below. */ - if($char === '"' || $char === "'" || $char === '<' || $char === '=') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'invalid-character-in-attribute-name' - )); - } - - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => $char, - 'value' => '' - ); - - $state = 'attribute name'; - } - break; - - case 'attribute name': - // Consume the next input character: - $char = $this->stream->char(); - - // this conditional is optimized, check bottom - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after attribute name state. */ - $state = 'after attribute name'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the self-closing start tag state. */ - $state = 'self-closing start tag'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $state = 'before attribute value'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the current input - character (add 0x0020 to the character's code point) to - the current attribute's name. Stay in the attribute name - state. */ - $chars = $this->stream->charsWhile(self::UPPER_ALPHA); - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['name'] .= strtolower($char . $chars); - - $state = 'attribute name'; - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-attribute-name' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003C LESS-THAN SIGN (<) - Parse error. Treat it as per the "anything else" - entry below. */ - if($char === '"' || $char === "'" || $char === '<') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'invalid-character-in-attribute-name' - )); - } - - /* Anything else - Append the current input character to the current attribute's name. - Stay in the attribute name state. */ - $chars = $this->stream->charsUntil("\t\n\x0C /=>\"'" . self::UPPER_ALPHA); - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['name'] .= $char . $chars; - - $state = 'attribute name'; - } - - /* When the user agent leaves the attribute name state - (and before emitting the tag token, if appropriate), the - complete attribute's name must be compared to the other - attributes on the same token; if there is already an - attribute on the token with the exact same name, then this - is a parse error and the new attribute must be dropped, along - with the value that gets associated with it (if any). */ - // this might be implemented in the emitToken method - break; - - case 'after attribute name': - // Consume the next input character: - $char = $this->stream->char(); - - // this is an optimized conditional, check the bottom - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after attribute name state. */ - $state = 'after attribute name'; - - } elseif($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the self-closing start tag state. */ - $state = 'self-closing start tag'; - - } elseif($char === '=') { - /* U+003D EQUALS SIGN (=) - Switch to the before attribute value state. */ - $state = 'before attribute value'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Start a new attribute in the current tag token. Set that - attribute's name to the lowercase version of the current - input character (add 0x0020 to the character's code - point), and its value to the empty string. Switch to the - attribute name state. */ - $this->token['attr'][] = array( - 'name' => strtolower($char), - 'value' => '' - ); - - $state = 'attribute name'; - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-end-of-tag-but-got-eof' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003C LESS-THAN SIGN(<) - Parse error. Treat it as per the "anything else" - entry below. */ - if($char === '"' || $char === "'" || $char === "<") { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'invalid-character-after-attribute-name' - )); - } - - /* Anything else - Start a new attribute in the current tag token. Set that attribute's - name to the current input character, and its value to the empty string. - Switch to the attribute name state. */ - $this->token['attr'][] = array( - 'name' => $char, - 'value' => '' - ); - - $state = 'attribute name'; - } - break; - - case 'before attribute value': - // Consume the next input character: - $char = $this->stream->char(); - - // this is an optimized conditional - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before attribute value state. */ - $state = 'before attribute value'; - - } elseif($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the attribute value (double-quoted) state. */ - $state = 'attribute value (double-quoted)'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the attribute value (unquoted) state and reconsume - this input character. */ - $this->stream->unget(); - $state = 'attribute value (unquoted)'; - - } elseif($char === '\'') { - /* U+0027 APOSTROPHE (') - Switch to the attribute value (single-quoted) state. */ - $state = 'attribute value (single-quoted)'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit the current tag token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-attribute-value-but-got-right-bracket' - )); - $this->emitToken($this->token); - $state = 'data'; - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-attribute-value-but-got-eof' - )); - $this->stream->unget(); - $state = 'data'; - - } else { - /* U+003D EQUALS SIGN (=) - * U+003C LESS-THAN SIGN (<) - Parse error. Treat it as per the "anything else" entry below. */ - if($char === '=' || $char === '<') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'equals-in-unquoted-attribute-value' - )); - } - - /* Anything else - Append the current input character to the current attribute's value. - Switch to the attribute value (unquoted) state. */ - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - $state = 'attribute value (unquoted)'; - } - break; - - case 'attribute value (double-quoted)': - // Consume the next input character: - $char = $this->stream->char(); - - if($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the after attribute value (quoted) state. */ - $state = 'after attribute value (quoted)'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the character reference in attribute value - state, with the additional allowed character - being U+0022 QUOTATION MARK ("). */ - $this->characterReferenceInAttributeValue('"'); - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-attribute-value-double-quote' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (double-quoted) state. */ - $chars = $this->stream->charsUntil('"&'); - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char . $chars; - - $state = 'attribute value (double-quoted)'; - } - break; - - case 'attribute value (single-quoted)': - // Consume the next input character: - $char = $this->stream->char(); - - if($char === "'") { - /* U+0022 QUOTATION MARK (') - Switch to the after attribute value state. */ - $state = 'after attribute value (quoted)'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state. */ - $this->characterReferenceInAttributeValue("'"); - - } elseif($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-attribute-value-single-quote' - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (single-quoted) state. */ - $chars = $this->stream->charsUntil("'&"); - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char . $chars; - - $state = 'attribute value (single-quoted)'; - } - break; - - case 'attribute value (unquoted)': - // Consume the next input character: - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $state = 'before attribute name'; - - } elseif($char === '&') { - /* U+0026 AMPERSAND (&) - Switch to the entity in attribute value state, with the - additional allowed character being U+003E - GREATER-THAN SIGN (>). */ - $this->characterReferenceInAttributeValue('>'); - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif ($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-attribute-value-no-quotes' - )); - $this->stream->unget(); - $state = 'data'; - - } else { - /* U+0022 QUOTATION MARK (") - U+0027 APOSTROPHE (') - U+003C LESS-THAN SIGN (<) - U+003D EQUALS SIGN (=) - Parse error. Treat it as per the "anything else" - entry below. */ - if($char === '"' || $char === "'" || $char === '=' || $char == '<') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-character-in-unquoted-attribute-value' - )); - } - - /* Anything else - Append the current input character to the current attribute's value. - Stay in the attribute value (unquoted) state. */ - $chars = $this->stream->charsUntil("\t\n\x0c &>\"'="); - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char . $chars; - - $state = 'attribute value (unquoted)'; - } - break; - - case 'after attribute value (quoted)': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before attribute name state. */ - $state = 'before attribute name'; - - } elseif ($char === '/') { - /* U+002F SOLIDUS (/) - Switch to the self-closing start tag state. */ - $state = 'self-closing start tag'; - - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current tag token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif ($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-EOF-after-attribute-value' - )); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Parse error. Reconsume the character in the before attribute - name state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-character-after-attribute-value' - )); - $this->stream->unget(); - $state = 'before attribute name'; - } - break; - - case 'self-closing start tag': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Set the self-closing flag of the current tag token. - Emit the current tag token. Switch to the data state. */ - // not sure if this is the name we want - $this->token['self-closing'] = true; - $this->emitToken($this->token); - $state = 'data'; - - } elseif ($char === false) { - /* EOF - Parse error. Reconsume the EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-eof-after-self-closing' - )); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Parse error. Reconsume the character in the before attribute name state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-character-after-self-closing' - )); - $this->stream->unget(); - $state = 'before attribute name'; - } - break; - - case 'bogus comment': - /* (This can only happen if the content model flag is set to the PCDATA state.) */ - /* Consume every character up to the first U+003E GREATER-THAN SIGN - character (>) or the end of the file (EOF), whichever comes first. Emit - a comment token whose data is the concatenation of all the characters - starting from and including the character that caused the state machine - to switch into the bogus comment state, up to and including the last - consumed character before the U+003E character, if any, or up to the - end of the file otherwise. (If the comment was started by the end of - the file (EOF), the token is empty.) */ - $this->token['data'] .= (string) $this->stream->charsUntil('>'); - $this->stream->char(); - - $this->emitToken($this->token); - - /* Switch to the data state. */ - $state = 'data'; - break; - - case 'markup declaration open': - // Consume for below - $hyphens = $this->stream->charsWhile('-', 2); - if ($hyphens === '-') { - $this->stream->unget(); - } - if ($hyphens !== '--') { - $alpha = $this->stream->charsWhile(self::ALPHA, 7); - } - - /* If the next two characters are both U+002D HYPHEN-MINUS (-) - characters, consume those two characters, create a comment token whose - data is the empty string, and switch to the comment state. */ - if($hyphens === '--') { - $state = 'comment start'; - $this->token = array( - 'data' => '', - 'type' => self::COMMENT - ); - - /* Otherwise if the next seven characters are a case-insensitive match - for the word "DOCTYPE", then consume those characters and switch to the - DOCTYPE state. */ - } elseif(strtoupper($alpha) === 'DOCTYPE') { - $state = 'DOCTYPE'; - - // XXX not implemented - /* Otherwise, if the insertion mode is "in foreign content" - and the current node is not an element in the HTML namespace - and the next seven characters are an ASCII case-sensitive - match for the string "[CDATA[" (the five uppercase letters - "CDATA" with a U+005B LEFT SQUARE BRACKET character before - and after), then consume those characters and switch to the - CDATA section state (which is unrelated to the content model - flag's CDATA state). */ - - /* Otherwise, is is a parse error. Switch to the bogus comment state. - The next character that is consumed, if any, is the first character - that will be in the comment. */ - } else { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-dashes-or-doctype' - )); - $this->token = array( - 'data' => (string) $alpha, - 'type' => self::COMMENT - ); - $state = 'bogus comment'; - } - break; - - case 'comment start': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === '-') { - /* U+002D HYPHEN-MINUS (-) - Switch to the comment start dash state. */ - $state = 'comment start dash'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the - data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'incorrect-comment' - )); - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* EOF - Parse error. Emit the comment token. Reconsume the - EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Append the input character to the comment token's - data. Switch to the comment state. */ - $this->token['data'] .= $char; - $state = 'comment'; - } - break; - - case 'comment start dash': - /* Consume the next input character: */ - $char = $this->stream->char(); - if ($char === '-') { - /* U+002D HYPHEN-MINUS (-) - Switch to the comment end state */ - $state = 'comment end'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Emit the comment token. Switch to the - data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'incorrect-comment' - )); - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* Parse error. Emit the comment token. Reconsume the - EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - $this->token['data'] .= '-' . $char; - $state = 'comment'; - } - break; - - case 'comment': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === '-') { - /* U+002D HYPHEN-MINUS (-) - Switch to the comment end dash state */ - $state = 'comment end dash'; - - } elseif($char === false) { - /* EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append the input character to the comment token's data. Stay in - the comment state. */ - $chars = $this->stream->charsUntil('-'); - - $this->token['data'] .= $char . $chars; - } - break; - - case 'comment end dash': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === '-') { - /* U+002D HYPHEN-MINUS (-) - Switch to the comment end state */ - $state = 'comment end'; - - } elseif($char === false) { - /* EOF - Parse error. Emit the comment token. Reconsume the EOF character - in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment-end-dash' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append a U+002D HYPHEN-MINUS (-) character and the input - character to the comment token's data. Switch to the comment state. */ - $this->token['data'] .= '-'.$char; - $state = 'comment'; - } - break; - - case 'comment end': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the comment token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif($char === '-') { - /* U+002D HYPHEN-MINUS (-) - Parse error. Append a U+002D HYPHEN-MINUS (-) character - to the comment token's data. Stay in the comment end - state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-dash-after-double-dash-in-comment' - )); - $this->token['data'] .= '-'; - - } elseif($char === "\t" || $char === "\n" || $char === "\x0a" || $char === ' ') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-space-after-double-dash-in-comment' - )); - $this->token['data'] .= '--' . $char; - $state = 'comment end space'; - - } elseif($char === '!') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-bang-after-double-dash-in-comment' - )); - $state = 'comment end bang'; - - } elseif($char === false) { - /* EOF - Parse error. Emit the comment token. Reconsume the - EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment-double-dash' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Parse error. Append two U+002D HYPHEN-MINUS (-) - characters and the input character to the comment token's - data. Switch to the comment state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-comment' - )); - $this->token['data'] .= '--'.$char; - $state = 'comment'; - } - break; - - case 'comment end bang': - $char = $this->stream->char(); - if ($char === '>') { - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === "-") { - $this->token['data'] .= '--!'; - $state = 'comment end dash'; - } elseif ($char === false) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-comment-end-bang' - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - $this->token['data'] .= '--!' . $char; - $state = 'comment'; - } - break; - - case 'comment end space': - $char = $this->stream->char(); - if ($char === '>') { - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === '-') { - $state = 'comment end dash'; - } elseif ($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - $this->token['data'] .= $char; - } elseif ($char === false) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-eof-in-comment-end-space', - )); - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - $this->token['data'] .= $char; - $state = 'comment'; - } - break; - - case 'DOCTYPE': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the before DOCTYPE name state. */ - $state = 'before DOCTYPE name'; - - } elseif($char === false) { - /* EOF - Parse error. Create a new DOCTYPE token. Set its - force-quirks flag to on. Emit the token. Reconsume the - EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'need-space-after-doctype-but-got-eof' - )); - $this->emitToken(array( - 'name' => '', - 'type' => self::DOCTYPE, - 'force-quirks' => true, - 'error' => true - )); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Parse error. Reconsume the current character in the - before DOCTYPE name state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'need-space-after-doctype' - )); - $this->stream->unget(); - $state = 'before DOCTYPE name'; - } - break; - - case 'before DOCTYPE name': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE name state. */ - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Create a new DOCTYPE token. Set its - force-quirks flag to on. Emit the token. Switch to the - data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-doctype-name-but-got-right-bracket' - )); - $this->emitToken(array( - 'name' => '', - 'type' => self::DOCTYPE, - 'force-quirks' => true, - 'error' => true - )); - - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Create a new DOCTYPE token. Set the token's name to the - lowercase version of the input character (add 0x0020 to - the character's code point). Switch to the DOCTYPE name - state. */ - $this->token = array( - 'name' => strtolower($char), - 'type' => self::DOCTYPE, - 'error' => true - ); - - $state = 'DOCTYPE name'; - - } elseif($char === false) { - /* EOF - Parse error. Create a new DOCTYPE token. Set its - force-quirks flag to on. Emit the token. Reconsume the - EOF character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-doctype-name-but-got-eof' - )); - $this->emitToken(array( - 'name' => '', - 'type' => self::DOCTYPE, - 'force-quirks' => true, - 'error' => true - )); - - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Create a new DOCTYPE token. Set the token's name to the - current input character. Switch to the DOCTYPE name state. */ - $this->token = array( - 'name' => $char, - 'type' => self::DOCTYPE, - 'error' => true - ); - - $state = 'DOCTYPE name'; - } - break; - - case 'DOCTYPE name': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Switch to the after DOCTYPE name state. */ - $state = 'after DOCTYPE name'; - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif('A' <= $char && $char <= 'Z') { - /* U+0041 LATIN CAPITAL LETTER A through to U+005A LATIN CAPITAL LETTER Z - Append the lowercase version of the input character - (add 0x0020 to the character's code point) to the current - DOCTYPE token's name. Stay in the DOCTYPE name state. */ - $this->token['name'] .= strtolower($char); - - } elseif($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype-name' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Append the current input character to the current - DOCTYPE token's name. Stay in the DOCTYPE name state. */ - $this->token['name'] .= $char; - } - - // XXX this is probably some sort of quirks mode designation, - // check tree-builder to be sure. In general 'error' needs - // to be specc'ified, this probably means removing it at the end - $this->token['error'] = ($this->token['name'] === 'HTML') - ? false - : true; - break; - - case 'after DOCTYPE name': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE name state. */ - - } elseif($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else */ - - $nextSix = strtoupper($char . $this->stream->charsWhile(self::ALPHA, 5)); - if ($nextSix === 'PUBLIC') { - /* If the next six characters are an ASCII - case-insensitive match for the word "PUBLIC", then - consume those characters and switch to the before - DOCTYPE public identifier state. */ - $state = 'before DOCTYPE public identifier'; - - } elseif ($nextSix === 'SYSTEM') { - /* Otherwise, if the next six characters are an ASCII - case-insensitive match for the word "SYSTEM", then - consume those characters and switch to the before - DOCTYPE system identifier state. */ - $state = 'before DOCTYPE system identifier'; - - } else { - /* Otherwise, this is the parse error. Set the DOCTYPE - token's force-quirks flag to on. Switch to the bogus - DOCTYPE state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-space-or-right-bracket-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->token['error'] = true; - $state = 'bogus DOCTYPE'; - } - } - break; - - case 'before DOCTYPE public identifier': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE public identifier state. */ - } elseif ($char === '"') { - /* U+0022 QUOTATION MARK (") - Set the DOCTYPE token's public identifier to the empty - string (not missing), then switch to the DOCTYPE public - identifier (double-quoted) state. */ - $this->token['public'] = ''; - $state = 'DOCTYPE public identifier (double-quoted)'; - } elseif ($char === "'") { - /* U+0027 APOSTROPHE (') - Set the DOCTYPE token's public identifier to the empty - string (not missing), then switch to the DOCTYPE public - identifier (single-quoted) state. */ - $this->token['public'] = ''; - $state = 'DOCTYPE public identifier (single-quoted)'; - } elseif ($char === '>') { - /* Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-end-of-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* Parse error. Set the DOCTYPE token's force-quirks - flag to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Parse error. Set the DOCTYPE token's force-quirks flag - to on. Switch to the bogus DOCTYPE state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-doctype' - )); - $this->token['force-quirks'] = true; - $state = 'bogus DOCTYPE'; - } - break; - - case 'DOCTYPE public identifier (double-quoted)': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE public identifier state. */ - $state = 'after DOCTYPE public identifier'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-end-of-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Append the current input character to the current - DOCTYPE token's public identifier. Stay in the DOCTYPE - public identifier (double-quoted) state. */ - $this->token['public'] .= $char; - } - break; - - case 'DOCTYPE public identifier (single-quoted)': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === "'") { - /* U+0027 APOSTROPHE (') - Switch to the after DOCTYPE public identifier state. */ - $state = 'after DOCTYPE public identifier'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-end-of-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Append the current input character to the current - DOCTYPE token's public identifier. Stay in the DOCTYPE - public identifier (double-quoted) state. */ - $this->token['public'] .= $char; - } - break; - - case 'after DOCTYPE public identifier': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE public identifier state. */ - } elseif ($char === '"') { - /* U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the - empty string (not missing), then switch to the DOCTYPE - system identifier (double-quoted) state. */ - $this->token['system'] = ''; - $state = 'DOCTYPE system identifier (double-quoted)'; - } elseif ($char === "'") { - /* U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the - empty string (not missing), then switch to the DOCTYPE - system identifier (single-quoted) state. */ - $this->token['system'] = ''; - $state = 'DOCTYPE system identifier (single-quoted)'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* Parse error. Set the DOCTYPE token's force-quirks - flag to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Switch to the bogus DOCTYPE state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-doctype' - )); - $this->token['force-quirks'] = true; - $state = 'bogus DOCTYPE'; - } - break; - - case 'before DOCTYPE system identifier': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the before DOCTYPE system identifier state. */ - } elseif ($char === '"') { - /* U+0022 QUOTATION MARK (") - Set the DOCTYPE token's system identifier to the empty - string (not missing), then switch to the DOCTYPE system - identifier (double-quoted) state. */ - $this->token['system'] = ''; - $state = 'DOCTYPE system identifier (double-quoted)'; - } elseif ($char === "'") { - /* U+0027 APOSTROPHE (') - Set the DOCTYPE token's system identifier to the empty - string (not missing), then switch to the DOCTYPE system - identifier (single-quoted) state. */ - $this->token['system'] = ''; - $state = 'DOCTYPE system identifier (single-quoted)'; - } elseif ($char === '>') { - /* Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* Parse error. Set the DOCTYPE token's force-quirks - flag to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Parse error. Set the DOCTYPE token's force-quirks flag - to on. Switch to the bogus DOCTYPE state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-doctype' - )); - $this->token['force-quirks'] = true; - $state = 'bogus DOCTYPE'; - } - break; - - case 'DOCTYPE system identifier (double-quoted)': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === '"') { - /* U+0022 QUOTATION MARK (") - Switch to the after DOCTYPE system identifier state. */ - $state = 'after DOCTYPE system identifier'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-end-of-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Append the current input character to the current - DOCTYPE token's system identifier. Stay in the DOCTYPE - system identifier (double-quoted) state. */ - $this->token['system'] .= $char; - } - break; - - case 'DOCTYPE system identifier (single-quoted)': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === "'") { - /* U+0027 APOSTROPHE (') - Switch to the after DOCTYPE system identifier state. */ - $state = 'after DOCTYPE system identifier'; - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Switch to the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-end-of-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* EOF - Parse error. Set the DOCTYPE token's force-quirks flag - to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Append the current input character to the current - DOCTYPE token's system identifier. Stay in the DOCTYPE - system identifier (double-quoted) state. */ - $this->token['system'] .= $char; - } - break; - - case 'after DOCTYPE system identifier': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if($char === "\t" || $char === "\n" || $char === "\x0c" || $char === ' ') { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - Stay in the after DOCTYPE system identifier state. */ - } elseif ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the current DOCTYPE token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - } elseif ($char === false) { - /* Parse error. Set the DOCTYPE token's force-quirks - flag to on. Emit that DOCTYPE token. Reconsume the EOF - character in the data state. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'eof-in-doctype' - )); - $this->token['force-quirks'] = true; - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - } else { - /* Anything else - Parse error. Switch to the bogus DOCTYPE state. - (This does not set the DOCTYPE token's force-quirks - flag to on.) */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'unexpected-char-in-doctype' - )); - $state = 'bogus DOCTYPE'; - } - break; - - case 'bogus DOCTYPE': - /* Consume the next input character: */ - $char = $this->stream->char(); - - if ($char === '>') { - /* U+003E GREATER-THAN SIGN (>) - Emit the DOCTYPE token. Switch to the data state. */ - $this->emitToken($this->token); - $state = 'data'; - - } elseif($char === false) { - /* EOF - Emit the DOCTYPE token. Reconsume the EOF character in - the data state. */ - $this->emitToken($this->token); - $this->stream->unget(); - $state = 'data'; - - } else { - /* Anything else - Stay in the bogus DOCTYPE state. */ - } - break; - - // case 'cdataSection': - - } - } - } - - /** - * Returns a serialized representation of the tree. - */ - public function save() { - return $this->tree->save(); - } - - /** - * Returns the input stream. - */ - public function stream() { - return $this->stream; - } - - private function consumeCharacterReference($allowed = false, $inattr = false) { - // This goes quite far against spec, and is far closer to the Python - // impl., mainly because we don't do the large unconsuming the spec - // requires. - - // All consumed characters. - $chars = $this->stream->char(); - - /* This section defines how to consume a character - reference. This definition is used when parsing character - references in text and in attributes. - - The behavior depends on the identity of the next character - (the one immediately after the U+0026 AMPERSAND character): */ - - if ( - $chars[0] === "\x09" || - $chars[0] === "\x0A" || - $chars[0] === "\x0C" || - $chars[0] === "\x20" || - $chars[0] === '<' || - $chars[0] === '&' || - $chars === false || - $chars[0] === $allowed - ) { - /* U+0009 CHARACTER TABULATION - U+000A LINE FEED (LF) - U+000C FORM FEED (FF) - U+0020 SPACE - U+003C LESS-THAN SIGN - U+0026 AMPERSAND - EOF - The additional allowed character, if there is one - Not a character reference. No characters are consumed, - and nothing is returned. (This is not an error, either.) */ - // We already consumed, so unconsume. - $this->stream->unget(); - return '&'; - } elseif ($chars[0] === '#') { - /* Consume the U+0023 NUMBER SIGN. */ - // Um, yeah, we already did that. - /* The behavior further depends on the character after - the U+0023 NUMBER SIGN: */ - $chars .= $this->stream->char(); - if (isset($chars[1]) && ($chars[1] === 'x' || $chars[1] === 'X')) { - /* U+0078 LATIN SMALL LETTER X - U+0058 LATIN CAPITAL LETTER X */ - /* Consume the X. */ - // Um, yeah, we already did that. - /* Follow the steps below, but using the range of - characters U+0030 DIGIT ZERO through to U+0039 DIGIT - NINE, U+0061 LATIN SMALL LETTER A through to U+0066 - LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER - A, through to U+0046 LATIN CAPITAL LETTER F (in other - words, 0123456789, ABCDEF, abcdef). */ - $char_class = self::HEX; - /* When it comes to interpreting the - number, interpret it as a hexadecimal number. */ - $hex = true; - } else { - /* Anything else */ - // Unconsume because we shouldn't have consumed this. - $chars = $chars[0]; - $this->stream->unget(); - /* Follow the steps below, but using the range of - characters U+0030 DIGIT ZERO through to U+0039 DIGIT - NINE (i.e. just 0123456789). */ - $char_class = self::DIGIT; - /* When it comes to interpreting the number, - interpret it as a decimal number. */ - $hex = false; - } - - /* Consume as many characters as match the range of characters given above. */ - $consumed = $this->stream->charsWhile($char_class); - if ($consumed === '' || $consumed === false) { - /* If no characters match the range, then don't consume - any characters (and unconsume the U+0023 NUMBER SIGN - character and, if appropriate, the X character). This - is a parse error; nothing is returned. */ - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-numeric-entity' - )); - return '&' . $chars; - } else { - /* Otherwise, if the next character is a U+003B SEMICOLON, - consume that too. If it isn't, there is a parse error. */ - if ($this->stream->char() !== ';') { - $this->stream->unget(); - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'numeric-entity-without-semicolon' - )); - } - - /* If one or more characters match the range, then take - them all and interpret the string of characters as a number - (either hexadecimal or decimal as appropriate). */ - $codepoint = $hex ? hexdec($consumed) : (int) $consumed; - - /* If that number is one of the numbers in the first column - of the following table, then this is a parse error. Find the - row with that number in the first column, and return a - character token for the Unicode character given in the - second column of that row. */ - $new_codepoint = HTML5_Data::getRealCodepoint($codepoint); - if ($new_codepoint) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'illegal-windows-1252-entity' - )); - return HTML5_Data::utf8chr($new_codepoint); - } else { - /* Otherwise, if the number is greater than 0x10FFFF, then - * this is a parse error. Return a U+FFFD REPLACEMENT - * CHARACTER. */ - if ($codepoint > 0x10FFFF) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'overlong-character-entity' // XXX probably not correct - )); - return "\xEF\xBF\xBD"; - } - /* Otherwise, return a character token for the Unicode - * character whose code point is that number. If the - * number is in the range 0x0001 to 0x0008, 0x000E to - * 0x001F, 0x007F to 0x009F, 0xD800 to 0xDFFF, 0xFDD0 to - * 0xFDEF, or is one of 0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, - * 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, - * 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, - * 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, - * 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - * 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, - * or 0x10FFFF, then this is a parse error. */ - // && has higher precedence than || - if ( - $codepoint >= 0x0000 && $codepoint <= 0x0008 || - $codepoint === 0x000B || - $codepoint >= 0x000E && $codepoint <= 0x001F || - $codepoint >= 0x007F && $codepoint <= 0x009F || - $codepoint >= 0xD800 && $codepoint <= 0xDFFF || - $codepoint >= 0xFDD0 && $codepoint <= 0xFDEF || - ($codepoint & 0xFFFE) === 0xFFFE || - $codepoint == 0x10FFFF || $codepoint == 0x10FFFE - ) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'illegal-codepoint-for-numeric-entity' - )); - } - return HTML5_Data::utf8chr($codepoint); - } - } - - } else { - /* Anything else */ - - /* Consume the maximum number of characters possible, - with the consumed characters matching one of the - identifiers in the first column of the named character - references table (in a case-sensitive manner). */ - // What we actually do here is consume as much as we can while it - // matches the start of one of the identifiers in the first column. - - $refs = HTML5_Data::getNamedCharacterReferences(); - - // Get the longest string which is the start of an identifier - // ($chars) as well as the longest identifier which matches ($id) - // and its codepoint ($codepoint). - $codepoint = false; - $char = $chars; - while ($char !== false && isset($refs[$char])) { - $refs = $refs[$char]; - if (isset($refs['codepoint'])) { - $id = $chars; - $codepoint = $refs['codepoint']; - } - $chars .= $char = $this->stream->char(); - } - - // Unconsume the one character we just took which caused the while - // statement to fail. This could be anything and could cause state - // changes (as if it matches the while loop it must be - // alphanumeric so we can just concat it to whatever we get later). - $this->stream->unget(); - if ($char !== false) { - $chars = substr($chars, 0, -1); - } - - /* If no match can be made, then this is a parse error. - No characters are consumed, and nothing is returned. */ - if (!$codepoint) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'expected-named-entity' - )); - return '&' . $chars; - } - - /* If the last character matched is not a U+003B SEMICOLON - (;), there is a parse error. */ - $semicolon = true; - if (substr($id, -1) !== ';') { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'named-entity-without-semicolon' - )); - $semicolon = false; - } - - /* If the character reference is being consumed as part of - an attribute, and the last character matched is not a - U+003B SEMICOLON (;), and the next character is in the - range U+0030 DIGIT ZERO to U+0039 DIGIT NINE, U+0041 - LATIN CAPITAL LETTER A to U+005A LATIN CAPITAL LETTER Z, - or U+0061 LATIN SMALL LETTER A to U+007A LATIN SMALL LETTER Z, - then, for historical reasons, all the characters that were - matched after the U+0026 AMPERSAND (&) must be unconsumed, - and nothing is returned. */ - if ($inattr && !$semicolon) { - // The next character is either the next character in $chars or in the stream. - if (strlen($chars) > strlen($id)) { - $next = substr($chars, strlen($id), 1); - } else { - $next = $this->stream->char(); - $this->stream->unget(); - } - if ( - '0' <= $next && $next <= '9' || - 'A' <= $next && $next <= 'Z' || - 'a' <= $next && $next <= 'z' - ) { - return '&' . $chars; - } - } - - /* Otherwise, return a character token for the character - corresponding to the character reference name (as given - by the second column of the named character references table). */ - return HTML5_Data::utf8chr($codepoint) . substr($chars, strlen($id)); - } - } - - private function characterReferenceInAttributeValue($allowed = false) { - /* Attempt to consume a character reference. */ - $entity = $this->consumeCharacterReference($allowed, true); - - /* If nothing is returned, append a U+0026 AMPERSAND - character to the current attribute's value. - - Otherwise, append the returned character token to the - current attribute's value. */ - $char = (!$entity) - ? '&' - : $entity; - - $last = count($this->token['attr']) - 1; - $this->token['attr'][$last]['value'] .= $char; - - /* Finally, switch back to the attribute value state that you - were in when were switched into this state. */ - } - - /** - * Emits a token, passing it on to the tree builder. - */ - protected function emitToken($token, $checkStream = true, $dry = false) { - if ($checkStream) { - // Emit errors from input stream. - while ($this->stream->errors) { - $this->emitToken(array_shift($this->stream->errors), false); - } - } - if($token['type'] === self::ENDTAG && !empty($token['attr'])) { - for ($i = 0; $i < count($token['attr']); $i++) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'attributes-in-end-tag' - )); - } - } - if($token['type'] === self::ENDTAG && !empty($token['self-closing'])) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'self-closing-flag-on-end-tag', - )); - } - if($token['type'] === self::STARTTAG) { - // This could be changed to actually pass the tree-builder a hash - $hash = array(); - foreach ($token['attr'] as $keypair) { - if (isset($hash[$keypair['name']])) { - $this->emitToken(array( - 'type' => self::PARSEERROR, - 'data' => 'duplicate-attribute', - )); - } else { - $hash[$keypair['name']] = $keypair['value']; - } - } - } - - if(!$dry) { - // the current structure of attributes is not a terribly good one - $this->tree->emitToken($token); - } - - if(!$dry && is_int($this->tree->content_model)) { - $this->content_model = $this->tree->content_model; - $this->tree->content_model = null; - - } elseif($token['type'] === self::ENDTAG) { - $this->content_model = self::PCDATA; - } - } -} - diff --git a/libraries/html5/TreeBuilder.php b/libraries/html5/TreeBuilder.php deleted file mode 100644 index c4a48b2..0000000 --- a/libraries/html5/TreeBuilder.php +++ /dev/null @@ -1,3849 +0,0 @@ - -Copyright 2009 Edward Z. Yang - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -*/ - -// Tags for FIX ME!!!: (in order of priority) -// XXX - should be fixed NAO! -// XERROR - with regards to parse errors -// XSCRIPT - with regards to scripting mode -// XENCODING - with regards to encoding (for reparsing tests) -// XDOM - DOM specific code (tagName is explicitly not marked). -// this is not (yet) in helper functions. - -class HTML5_TreeBuilder { - public $stack = array(); - public $content_model; - - private $mode; - private $original_mode; - private $secondary_mode; - private $dom; - // Whether or not normal insertion of nodes should actually foster - // parent (used in one case in spec) - private $foster_parent = false; - private $a_formatting = array(); - - private $head_pointer = null; - private $form_pointer = null; - - private $flag_frameset_ok = true; - private $flag_force_quirks = false; - private $ignored = false; - private $quirks_mode = null; - // this gets to 2 when we want to ignore the next lf character, and - // is decrement at the beginning of each processed token (this way, - // code can check for (bool)$ignore_lf_token, but it phases out - // appropriately) - private $ignore_lf_token = 0; - private $fragment = false; - private $root; - - private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject'); - private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u'); - // dl and ds are speculative - private $special = array('address','area','article','aside','base','basefont','bgsound', - 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds', - 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5', - 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link', - 'listing','menu','meta','nav','noembed','noframes','noscript','ol', - 'p','param','plaintext','pre','script','select','spacer','style', - 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); - - private $pendingTableCharacters; - private $pendingTableCharactersDirty; - - // Tree construction modes - const INITIAL = 0; - const BEFORE_HTML = 1; - const BEFORE_HEAD = 2; - const IN_HEAD = 3; - const IN_HEAD_NOSCRIPT = 4; - const AFTER_HEAD = 5; - const IN_BODY = 6; - const IN_CDATA_RCDATA = 7; - const IN_TABLE = 8; - const IN_TABLE_TEXT = 9; - const IN_CAPTION = 10; - const IN_COLUMN_GROUP = 11; - const IN_TABLE_BODY = 12; - const IN_ROW = 13; - const IN_CELL = 14; - const IN_SELECT = 15; - const IN_SELECT_IN_TABLE= 16; - const IN_FOREIGN_CONTENT= 17; - const AFTER_BODY = 18; - const IN_FRAMESET = 19; - const AFTER_FRAMESET = 20; - const AFTER_AFTER_BODY = 21; - const AFTER_AFTER_FRAMESET = 22; - - /** - * Converts a magic number to a readable name. Use for debugging. - */ - private function strConst($number) { - static $lookup; - if (!$lookup) { - $lookup = array(); - $r = new ReflectionClass('HTML5_TreeBuilder'); - $consts = $r->getConstants(); - foreach ($consts as $const => $num) { - if (!is_int($num)) continue; - $lookup[$num] = $const; - } - } - return $lookup[$number]; - } - - // The different types of elements. - const SPECIAL = 100; - const SCOPING = 101; - const FORMATTING = 102; - const PHRASING = 103; - - // Quirks modes in $quirks_mode - const NO_QUIRKS = 200; - const QUIRKS_MODE = 201; - const LIMITED_QUIRKS_MODE = 202; - - // Marker to be placed in $a_formatting - const MARKER = 300; - - // Namespaces for foreign content - const NS_HTML = null; // to prevent DOM from requiring NS on everything - const NS_XHTML = 'http://www.w3.org/1999/xhtml'; - const NS_MATHML = 'http://www.w3.org/1998/Math/MathML'; - const NS_SVG = 'http://www.w3.org/2000/svg'; - const NS_XLINK = 'http://www.w3.org/1999/xlink'; - const NS_XML = 'http://www.w3.org/XML/1998/namespace'; - const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; - - // Different types of scopes to test for elements - const SCOPE = 0; - const SCOPE_LISTITEM = 1; - const SCOPE_TABLE = 2; - - public function __construct() { - $this->mode = self::INITIAL; - $this->dom = new DOMDocument; - - $this->dom->encoding = 'UTF-8'; - $this->dom->preserveWhiteSpace = true; - $this->dom->substituteEntities = true; - $this->dom->strictErrorChecking = false; - } - - // Process tag tokens - public function emitToken($token, $mode = null) { - // XXX: ignore parse errors... why are we emitting them, again? - if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return; - if ($mode === null) $mode = $this->mode; - - /* - $backtrace = debug_backtrace(); - if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n"; - echo $this->strConst($mode); - if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")"; - echo "\n "; - token_dump($token); - $this->printStack(); - $this->printActiveFormattingElements(); - if ($this->foster_parent) echo " -> this is a foster parent mode\n"; - if ($this->flag_frameset_ok) echo " -> frameset ok\n"; - */ - - if ($this->ignore_lf_token) $this->ignore_lf_token--; - $this->ignored = false; - // indenting is a little wonky, this can be changed later on - switch ($mode) { - - case self::INITIAL: - - /* A character token that is one of U+0009 CHARACTER TABULATION, - * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */ - if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { - /* Ignore the token. */ - $this->ignored = true; - } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { - if ( - $token['name'] !== 'html' || !empty($token['public']) || - !empty($token['system']) || $token !== 'about:legacy-compat' - ) { - /* If the DOCTYPE token's name is not a case-sensitive match - * for the string "html", or if the token's public identifier - * is not missing, or if the token's system identifier is - * neither missing nor a case-sensitive match for the string - * "about:legacy-compat", then there is a parse error (this - * is the DOCTYPE parse error). */ - // DOCTYPE parse error - } - /* Append a DocumentType node to the Document node, with the name - * attribute set to the name given in the DOCTYPE token, or the - * empty string if the name was missing; the publicId attribute - * set to the public identifier given in the DOCTYPE token, or - * the empty string if the public identifier was missing; the - * systemId attribute set to the system identifier given in the - * DOCTYPE token, or the empty string if the system identifier - * was missing; and the other attributes specific to - * DocumentType objects set to null and empty lists as - * appropriate. Associate the DocumentType node with the - * Document object so that it is returned as the value of the - * doctype attribute of the Document object. */ - if (!isset($token['public'])) $token['public'] = null; - if (!isset($token['system'])) $token['system'] = null; - // XDOM - // Yes this is hacky. I'm kind of annoyed that I can't appendChild - // a doctype to DOMDocument. Maybe I haven't chanted the right - // syllables. - $impl = new DOMImplementation(); - // This call can fail for particularly pathological cases (namely, - // the qualifiedName parameter ($token['name']) could be missing. - if ($token['name']) { - $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']); - $this->dom->appendChild($doctype); - } else { - // It looks like libxml's not actually *able* to express this case. - // So... don't. - $this->dom->emptyDoctype = true; - } - $public = is_null($token['public']) ? false : strtolower($token['public']); - $system = is_null($token['system']) ? false : strtolower($token['system']); - $publicStartsWithForQuirks = array( - "+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//", - ); - $publicSetToForQuirks = array( - "-//w3o//dtd w3 html strict 3.0//", - "-/w3c/dtd html 4.0 transitional/en", - "html", - ); - $publicStartsWithAndSystemForQuirks = array( - "-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//", - ); - $publicStartsWithForLimitedQuirks = array( - "-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//", - ); - $publicStartsWithAndSystemForLimitedQuirks = array( - "-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//", - ); - // first, do easy checks - if ( - !empty($token['force-quirks']) || - strtolower($token['name']) !== 'html' - ) { - $this->quirks_mode = self::QUIRKS_MODE; - } else { - do { - if ($system) { - foreach ($publicStartsWithAndSystemForQuirks as $x) { - if (strncmp($public, $x, strlen($x)) === 0) { - $this->quirks_mode = self::QUIRKS_MODE; - break; - } - } - if (!is_null($this->quirks_mode)) break; - foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) { - if (strncmp($public, $x, strlen($x)) === 0) { - $this->quirks_mode = self::LIMITED_QUIRKS_MODE; - break; - } - } - if (!is_null($this->quirks_mode)) break; - } - foreach ($publicSetToForQuirks as $x) { - if ($public === $x) { - $this->quirks_mode = self::QUIRKS_MODE; - break; - } - } - if (!is_null($this->quirks_mode)) break; - foreach ($publicStartsWithForLimitedQuirks as $x) { - if (strncmp($public, $x, strlen($x)) === 0) { - $this->quirks_mode = self::LIMITED_QUIRKS_MODE; - } - } - if (!is_null($this->quirks_mode)) break; - if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { - $this->quirks_mode = self::QUIRKS_MODE; - break; - } - foreach ($publicStartsWithForQuirks as $x) { - if (strncmp($public, $x, strlen($x)) === 0) { - $this->quirks_mode = self::QUIRKS_MODE; - break; - } - } - if (is_null($this->quirks_mode)) { - $this->quirks_mode = self::NO_QUIRKS; - } - } while (false); - } - $this->mode = self::BEFORE_HTML; - } else { - // parse error - /* Switch the insertion mode to "before html", then reprocess the - * current token. */ - $this->mode = self::BEFORE_HTML; - $this->quirks_mode = self::QUIRKS_MODE; - $this->emitToken($token); - } - break; - - case self::BEFORE_HTML: - - /* A DOCTYPE token */ - if($token['type'] === HTML5_Tokenizer::DOCTYPE) { - // Parse error. Ignore the token. - $this->ignored = true; - - /* A comment token */ - } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { - /* Append a Comment node to the Document object with the data - attribute set to the data given in the comment token. */ - // XDOM - $comment = $this->dom->createComment($token['data']); - $this->dom->appendChild($comment); - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { - /* Ignore the token. */ - $this->ignored = true; - - /* A start tag whose tag name is "html" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') { - /* Create an element for the token in the HTML namespace. Append it - * to the Document object. Put this element in the stack of open - * elements. */ - // XDOM - $html = $this->insertElement($token, false); - $this->dom->appendChild($html); - $this->stack[] = $html; - - $this->mode = self::BEFORE_HEAD; - - } else { - /* Create an html element. Append it to the Document object. Put - * this element in the stack of open elements. */ - // XDOM - $html = $this->dom->createElementNS(self::NS_HTML, 'html'); - $this->dom->appendChild($html); - $this->stack[] = $html; - - /* Switch the insertion mode to "before head", then reprocess the - * current token. */ - $this->mode = self::BEFORE_HEAD; - $this->emitToken($token); - } - break; - - case self::BEFORE_HEAD: - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { - /* Ignore the token. */ - $this->ignored = true; - - /* A comment token */ - } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A DOCTYPE token */ - } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { - /* Parse error. Ignore the token */ - $this->ignored = true; - // parse error - - /* A start tag token with the tag name "html" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { - /* Process the token using the rules for the "in body" - * insertion mode. */ - $this->processWithRulesFor($token, self::IN_BODY); - - /* A start tag token with the tag name "head" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') { - /* Insert an HTML element for the token. */ - $element = $this->insertElement($token); - - /* Set the head element pointer to this new element node. */ - $this->head_pointer = $element; - - /* Change the insertion mode to "in head". */ - $this->mode = self::IN_HEAD; - - /* An end tag whose tag name is one of: "head", "body", "html", "br" */ - } elseif( - $token['type'] === HTML5_Tokenizer::ENDTAG && ( - $token['name'] === 'head' || $token['name'] === 'body' || - $token['name'] === 'html' || $token['name'] === 'br' - )) { - /* Act as if a start tag token with the tag name "head" and no - * attributes had been seen, then reprocess the current token. */ - $this->emitToken(array( - 'name' => 'head', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => array() - )); - $this->emitToken($token); - - /* Any other end tag */ - } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) { - /* Parse error. Ignore the token. */ - $this->ignored = true; - - } else { - /* Act as if a start tag token with the tag name "head" and no - * attributes had been seen, then reprocess the current token. - * Note: This will result in an empty head element being - * generated, with the current token being reprocessed in the - * "after head" insertion mode. */ - $this->emitToken(array( - 'name' => 'head', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => array() - )); - $this->emitToken($token); - } - break; - - case self::IN_HEAD: - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE. */ - if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { - /* Insert the character into the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - /* A DOCTYPE token */ - } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { - /* Parse error. Ignore the token. */ - $this->ignored = true; - // parse error - - /* A start tag whose tag name is "html" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && - $token['name'] === 'html') { - $this->processWithRulesFor($token, self::IN_BODY); - - /* A start tag whose tag name is one of: "base", "command", "link" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && - ($token['name'] === 'base' || $token['name'] === 'command' || - $token['name'] === 'link')) { - /* Insert an HTML element for the token. Immediately pop the - * current node off the stack of open elements. */ - $this->insertElement($token); - array_pop($this->stack); - - // YYY: Acknowledge the token's self-closing flag, if it is set. - - /* A start tag whose tag name is "meta" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') { - /* Insert an HTML element for the token. Immediately pop the - * current node off the stack of open elements. */ - $this->insertElement($token); - array_pop($this->stack); - - // XERROR: Acknowledge the token's self-closing flag, if it is set. - - // XENCODING: If the element has a charset attribute, and its value is a - // supported encoding, and the confidence is currently tentative, - // then change the encoding to the encoding given by the value of - // the charset attribute. - // - // Otherwise, if the element has a content attribute, and applying - // the algorithm for extracting an encoding from a Content-Type to - // its value returns a supported encoding encoding, and the - // confidence is currently tentative, then change the encoding to - // the encoding encoding. - - /* A start tag with the tag name "title" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') { - $this->insertRCDATAElement($token); - - /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or - * A start tag whose tag name is one of: "noframes", "style" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && - ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) { - // XSCRIPT: Scripting flag not respected - $this->insertCDATAElement($token); - - // XSCRIPT: Scripting flag disable not implemented - - /* A start tag with the tag name "script" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') { - /* 1. Create an element for the token in the HTML namespace. */ - $node = $this->insertElement($token, false); - - /* 2. Mark the element as being "parser-inserted" */ - // Uhhh... XSCRIPT - - /* 3. If the parser was originally created for the HTML - * fragment parsing algorithm, then mark the script element as - * "already executed". (fragment case) */ - // ditto... XSCRIPT - - /* 4. Append the new element to the current node and push it onto - * the stack of open elements. */ - end($this->stack)->appendChild($node); - $this->stack[] = $node; - // I guess we could squash these together - - /* 6. Let the original insertion mode be the current insertion mode. */ - $this->original_mode = $this->mode; - /* 7. Switch the insertion mode to "in CDATA/RCDATA" */ - $this->mode = self::IN_CDATA_RCDATA; - /* 5. Switch the tokeniser's content model flag to the CDATA state. */ - $this->content_model = HTML5_Tokenizer::CDATA; - - /* An end tag with the tag name "head" */ - } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') { - /* Pop the current node (which will be the head element) off the stack of open elements. */ - array_pop($this->stack); - - /* Change the insertion mode to "after head". */ - $this->mode = self::AFTER_HEAD; - - // Slight logic inversion here to minimize duplication - /* A start tag with the tag name "head". */ - /* An end tag whose tag name is not one of: "body", "html", "br" */ - } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || - ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' && - $token['name'] !== 'body' && $token['name'] !== 'br')) { - // Parse error. Ignore the token. - $this->ignored = true; - - /* Anything else */ - } else { - /* Act as if an end tag token with the tag name "head" had been - * seen, and reprocess the current token. */ - $this->emitToken(array( - 'name' => 'head', - 'type' => HTML5_Tokenizer::ENDTAG - )); - - /* Then, reprocess the current token. */ - $this->emitToken($token); - } - break; - - case self::IN_HEAD_NOSCRIPT: - if ($token['type'] === HTML5_Tokenizer::DOCTYPE) { - // parse error - } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { - $this->processWithRulesFor($token, self::IN_BODY); - } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') { - /* Pop the current node (which will be a noscript element) from the - * stack of open elements; the new current node will be a head - * element. */ - array_pop($this->stack); - $this->mode = self::IN_HEAD; - } elseif ( - ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) || - ($token['type'] === HTML5_Tokenizer::COMMENT) || - ($token['type'] === HTML5_Tokenizer::STARTTAG && ( - $token['name'] === 'link' || $token['name'] === 'meta' || - $token['name'] === 'noframes' || $token['name'] === 'style'))) { - $this->processWithRulesFor($token, self::IN_HEAD); - // inverted logic - } elseif ( - ($token['type'] === HTML5_Tokenizer::STARTTAG && ( - $token['name'] === 'head' || $token['name'] === 'noscript')) || - ($token['type'] === HTML5_Tokenizer::ENDTAG && - $token['name'] !== 'br')) { - // parse error - } else { - // parse error - $this->emitToken(array( - 'type' => HTML5_Tokenizer::ENDTAG, - 'name' => 'noscript', - )); - $this->emitToken($token); - } - break; - - case self::AFTER_HEAD: - /* Handle the token as follows: */ - - /* A character token that is one of one of U+0009 CHARACTER TABULATION, - U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), - or U+0020 SPACE */ - if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { - /* Append the character to the current node. */ - $this->insertText($token['data']); - - /* A comment token */ - } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { - /* Append a Comment node to the current node with the data attribute - set to the data given in the comment token. */ - $this->insertComment($token['data']); - - } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { - // parse error - - } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { - $this->processWithRulesFor($token, self::IN_BODY); - - /* A start tag token with the tag name "body" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') { - $this->insertElement($token); - - /* Set the frameset-ok flag to "not ok". */ - $this->flag_frameset_ok = false; - - /* Change the insertion mode to "in body". */ - $this->mode = self::IN_BODY; - - /* A start tag token with the tag name "frameset" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') { - /* Insert a frameset element for the token. */ - $this->insertElement($token); - - /* Change the insertion mode to "in frameset". */ - $this->mode = self::IN_FRAMESET; - - /* A start tag token whose tag name is one of: "base", "link", "meta", - "script", "style", "title" */ - } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'], - array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) { - // parse error - /* Push the node pointed to by the head element pointer onto the - * stack of open elements. */ - $this->stack[] = $this->head_pointer; - $this->processWithRulesFor($token, self::IN_HEAD); - array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1); - - // inversion of specification - } elseif( - ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || - ($token['type'] === HTML5_Tokenizer::ENDTAG && - $token['name'] !== 'body' && $token['name'] !== 'html' && - $token['name'] !== 'br')) { - // parse error - - /* Anything else */ - } else { - $this->emitToken(array( - 'name' => 'body', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => array() - )); - $this->flag_frameset_ok = true; - $this->emitToken($token); - } - break; - - case self::IN_BODY: - /* Handle the token as follows: */ - - switch($token['type']) { - /* A character token */ - case HTML5_Tokenizer::CHARACTER: - case HTML5_Tokenizer::SPACECHARACTER: - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Append the token's character to the current node. */ - $this->insertText($token['data']); - - /* If the token is not one of U+0009 CHARACTER TABULATION, - * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 - * SPACE, then set the frameset-ok flag to "not ok". */ - // i.e., if any of the characters is not whitespace - if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) { - $this->flag_frameset_ok = false; - } - break; - - /* A comment token */ - case HTML5_Tokenizer::COMMENT: - /* Append a Comment node to the current node with the data - attribute set to the data given in the comment token. */ - $this->insertComment($token['data']); - break; - - case HTML5_Tokenizer::DOCTYPE: - // parse error - break; - - case HTML5_Tokenizer::EOF: - // parse error - break; - - case HTML5_Tokenizer::STARTTAG: - switch($token['name']) { - case 'html': - // parse error - /* For each attribute on the token, check to see if the - * attribute is already present on the top element of the - * stack of open elements. If it is not, add the attribute - * and its corresponding value to that element. */ - foreach($token['attr'] as $attr) { - if(!$this->stack[0]->hasAttribute($attr['name'])) { - $this->stack[0]->setAttribute($attr['name'], $attr['value']); - } - } - break; - - case 'base': case 'command': case 'link': case 'meta': case 'noframes': - case 'script': case 'style': case 'title': - /* Process the token as if the insertion mode had been "in - head". */ - $this->processWithRulesFor($token, self::IN_HEAD); - break; - - /* A start tag token with the tag name "body" */ - case 'body': - /* Parse error. If the second element on the stack of open - elements is not a body element, or, if the stack of open - elements has only one node on it, then ignore the token. - (fragment case) */ - if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { - $this->ignored = true; - // Ignore - - /* Otherwise, for each attribute on the token, check to see - if the attribute is already present on the body element (the - second element) on the stack of open elements. If it is not, - add the attribute and its corresponding value to that - element. */ - } else { - foreach($token['attr'] as $attr) { - if(!$this->stack[1]->hasAttribute($attr['name'])) { - $this->stack[1]->setAttribute($attr['name'], $attr['value']); - } - } - } - break; - - case 'frameset': - // parse error - /* If the second element on the stack of open elements is - * not a body element, or, if the stack of open elements - * has only one node on it, then ignore the token. - * (fragment case) */ - if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { - $this->ignored = true; - // Ignore - } elseif (!$this->flag_frameset_ok) { - $this->ignored = true; - // Ignore - } else { - /* 1. Remove the second element on the stack of open - * elements from its parent node, if it has one. */ - if($this->stack[1]->parentNode) { - $this->stack[1]->parentNode->removeChild($this->stack[1]); - } - - /* 2. Pop all the nodes from the bottom of the stack of - * open elements, from the current node up to the root - * html element. */ - array_splice($this->stack, 1); - - $this->insertElement($token); - $this->mode = self::IN_FRAMESET; - } - break; - - // in spec, there is a diversion here - - case 'address': case 'article': case 'aside': case 'blockquote': - case 'center': case 'datagrid': case 'details': case 'dir': - case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer': - case 'header': case 'hgroup': case 'menu': case 'nav': - case 'ol': case 'p': case 'section': case 'ul': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", - "h5", "h6" */ - case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* If the current node is an element whose tag name is one - * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a - * parse error; pop the current node off the stack of open - * elements. */ - $peek = array_pop($this->stack); - if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) { - // parse error - } else { - $this->stack[] = $peek; - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - break; - - case 'pre': case 'listing': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - $this->insertElement($token); - /* If the next token is a U+000A LINE FEED (LF) character - * token, then ignore that token and move on to the next - * one. (Newlines at the start of pre blocks are ignored as - * an authoring convenience.) */ - $this->ignore_lf_token = 2; - $this->flag_frameset_ok = false; - break; - - /* A start tag whose tag name is "form" */ - case 'form': - /* If the form element pointer is not null, ignore the - token with a parse error. */ - if($this->form_pointer !== null) { - $this->ignored = true; - // Ignore. - - /* Otherwise: */ - } else { - /* If the stack of open elements has a p element in - scope, then act as if an end tag with the tag name p - had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Insert an HTML element for the token, and set the - form element pointer to point to the element created. */ - $element = $this->insertElement($token); - $this->form_pointer = $element; - } - break; - - // condensed specification - case 'li': case 'dc': case 'dd': case 'ds': case 'dt': - /* 1. Set the frameset-ok flag to "not ok". */ - $this->flag_frameset_ok = false; - - $stack_length = count($this->stack) - 1; - for($n = $stack_length; 0 <= $n; $n--) { - /* 2. Initialise node to be the current node (the - bottommost node of the stack). */ - $stop = false; - $node = $this->stack[$n]; - $cat = $this->getElementCategory($node); - - // for case 'li': - /* 3. If node is an li element, then act as if an end - * tag with the tag name "li" had been seen, then jump - * to the last step. */ - // for case 'dc': case 'dd': case 'ds': case 'dt': - /* If node is a dc, dd, ds or dt element, then act as if an end - * tag with the same tag name as node had been seen, then - * jump to the last step. */ - if(($token['name'] === 'li' && $node->tagName === 'li') || - ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional - $this->emitToken(array( - 'type' => HTML5_Tokenizer::ENDTAG, - 'name' => $node->tagName, - )); - break; - } - - /* 4. If node is not in the formatting category, and is - not in the phrasing category, and is not an address, - div or p element, then stop this algorithm. */ - if($cat !== self::FORMATTING && $cat !== self::PHRASING && - $node->tagName !== 'address' && $node->tagName !== 'div' && - $node->tagName !== 'p') { - break; - } - - /* 5. Otherwise, set node to the previous entry in the - * stack of open elements and return to step 2. */ - } - - /* 6. This is the last step. */ - - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Finally, insert an HTML element with the same tag - name as the token's. */ - $this->insertElement($token); - break; - - /* A start tag token whose tag name is "plaintext" */ - case 'plaintext': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been - seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - $this->content_model = HTML5_Tokenizer::PLAINTEXT; - break; - - // more diversions - - /* A start tag whose tag name is "a" */ - case 'a': - /* If the list of active formatting elements contains - an element whose tag name is "a" between the end of the - list and the last marker on the list (or the start of - the list if there is no marker on the list), then this - is a parse error; act as if an end tag with the tag name - "a" had been seen, then remove that element from the list - of active formatting elements and the stack of open - elements if the end tag didn't already remove it (it - might not have if the element is not in table scope). */ - $leng = count($this->a_formatting); - - for($n = $leng - 1; $n >= 0; $n--) { - if($this->a_formatting[$n] === self::MARKER) { - break; - - } elseif($this->a_formatting[$n]->tagName === 'a') { - $a = $this->a_formatting[$n]; - $this->emitToken(array( - 'name' => 'a', - 'type' => HTML5_Tokenizer::ENDTAG - )); - if (in_array($a, $this->a_formatting)) { - $a_i = array_search($a, $this->a_formatting, true); - if($a_i !== false) array_splice($this->a_formatting, $a_i, 1); - } - if (in_array($a, $this->stack)) { - $a_i = array_search($a, $this->stack, true); - if ($a_i !== false) array_splice($this->stack, $a_i, 1); - } - break; - } - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - case 'b': case 'big': case 'code': case 'em': case 'font': case 'i': - case 's': case 'small': case 'strike': - case 'strong': case 'tt': case 'u': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - case 'nobr': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* If the stack of open elements has a nobr element in - * scope, then this is a parse error; act as if an end tag - * with the tag name "nobr" had been seen, then once again - * reconstruct the active formatting elements, if any. */ - if ($this->elementInScope('nobr')) { - $this->emitToken(array( - 'name' => 'nobr', - 'type' => HTML5_Tokenizer::ENDTAG, - )); - $this->reconstructActiveFormattingElements(); - } - - /* Insert an HTML element for the token. */ - $el = $this->insertElement($token); - - /* Add that element to the list of active formatting - elements. */ - $this->a_formatting[] = $el; - break; - - // another diversion - - /* A start tag token whose tag name is "button" */ - case 'button': - /* If the stack of open elements has a button element in scope, - then this is a parse error; act as if an end tag with the tag - name "button" had been seen, then reprocess the token. (We don't - do that. Unnecessary.) (I hope you're right! -- ezyang) */ - if($this->elementInScope('button')) { - $this->emitToken(array( - 'name' => 'button', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - - $this->flag_frameset_ok = false; - break; - - case 'applet': case 'marquee': case 'object': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Insert a marker at the end of the list of active - formatting elements. */ - $this->a_formatting[] = self::MARKER; - - $this->flag_frameset_ok = false; - break; - - // spec diversion - - /* A start tag whose tag name is "table" */ - case 'table': - /* If the Document is not set to quirks mode, and the - * stack of open elements has a p element in scope, then - * act as if an end tag with the tag name "p" had been - * seen. */ - if($this->quirks_mode !== self::QUIRKS_MODE && - $this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - $this->flag_frameset_ok = false; - - /* Change the insertion mode to "in table". */ - $this->mode = self::IN_TABLE; - break; - - /* A start tag whose tag name is one of: "area", "basefont", - "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ - case 'area': case 'basefont': case 'bgsound': case 'br': - case 'embed': case 'img': case 'input': case 'keygen': case 'spacer': - case 'wbr': - /* Reconstruct the active formatting elements, if any. */ - $this->reconstructActiveFormattingElements(); - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - - // YYY: Acknowledge the token's self-closing flag, if it is set. - - $this->flag_frameset_ok = false; - break; - - case 'param': case 'source': - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - - // YYY: Acknowledge the token's self-closing flag, if it is set. - break; - - /* A start tag whose tag name is "hr" */ - case 'hr': - /* If the stack of open elements has a p element in scope, - then act as if an end tag with the tag name p had been seen. */ - if($this->elementInScope('p')) { - $this->emitToken(array( - 'name' => 'p', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } - - /* Insert an HTML element for the token. */ - $this->insertElement($token); - - /* Immediately pop the current node off the stack of open elements. */ - array_pop($this->stack); - - // YYY: Acknowledge the token's self-closing flag, if it is set. - - $this->flag_frameset_ok = false; - break; - - /* A start tag whose tag name is "image" */ - case 'image': - /* Parse error. Change the token's tag name to "img" and - reprocess it. (Don't ask.) */ - $token['name'] = 'img'; - $this->emitToken($token); - break; - - /* A start tag whose tag name is "isindex" */ - case 'isindex': - /* Parse error. */ - - /* If the form element pointer is not null, - then ignore the token. */ - if($this->form_pointer === null) { - /* Act as if a start tag token with the tag name "form" had - been seen. */ - /* If the token has an attribute called "action", set - * the action attribute on the resulting form - * element to the value of the "action" attribute of - * the token. */ - $attr = array(); - $action = $this->getAttr($token, 'action'); - if ($action !== false) { - $attr[] = array('name' => 'action', 'value' => $action); - } - $this->emitToken(array( - 'name' => 'form', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => $attr - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->emitToken(array( - 'name' => 'hr', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => array() - )); - - /* Act as if a start tag token with the tag name "label" - had been seen. */ - $this->emitToken(array( - 'name' => 'label', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => array() - )); - - /* Act as if a stream of character tokens had been seen. */ - $prompt = $this->getAttr($token, 'prompt'); - if ($prompt === false) { - $prompt = 'This is a searchable index. '. - 'Insert your search keywords here: '; - } - $this->emitToken(array( - 'data' => $prompt, - 'type' => HTML5_Tokenizer::CHARACTER, - )); - - /* Act as if a start tag token with the tag name "input" - had been seen, with all the attributes from the "isindex" - token, except with the "name" attribute set to the value - "isindex" (ignoring any explicit "name" attribute). */ - $attr = array(); - foreach ($token['attr'] as $keypair) { - if ($keypair['name'] === 'name' || $keypair['name'] === 'action' || - $keypair['name'] === 'prompt') continue; - $attr[] = $keypair; - } - $attr[] = array('name' => 'name', 'value' => 'isindex'); - - $this->emitToken(array( - 'name' => 'input', - 'type' => HTML5_Tokenizer::STARTTAG, - 'attr' => $attr - )); - - /* Act as if an end tag token with the tag name "label" - had been seen. */ - $this->emitToken(array( - 'name' => 'label', - 'type' => HTML5_Tokenizer::ENDTAG - )); - - /* Act as if a start tag token with the tag name "hr" had - been seen. */ - $this->emitToken(array( - 'name' => 'hr', - 'type' => HTML5_Tokenizer::STARTTAG - )); - - /* Act as if an end tag token with the tag name "form" had - been seen. */ - $this->emitToken(array( - 'name' => 'form', - 'type' => HTML5_Tokenizer::ENDTAG - )); - } else { - $this->ignored = true; - } - break; - - /* A start tag whose tag name is "textarea" */ - case 'textarea': - $this->insertElement($token); - - /* If the next token is a U+000A LINE FEED (LF) - * character token, then ignore that token and move on to - * the next one. (Newlines at the start of textarea - * elements are ignored as an authoring convenience.) - * need flag, see also
     */
    -                    $this->ignore_lf_token = 2;
    -
    -                    $this->original_mode = $this->mode;
    -                    $this->flag_frameset_ok = false;
    -                    $this->mode = self::IN_CDATA_RCDATA;
    -
    -                    /* Switch the tokeniser's content model flag to the
    -                    RCDATA state. */
    -                    $this->content_model = HTML5_Tokenizer::RCDATA;
    -                break;
    -
    -                /* A start tag token whose tag name is "xmp" */
    -                case 'xmp':
    -                    /* If the stack of open elements has a p element in
    -                    scope, then act as if an end tag with the tag name
    -                    "p" has been seen. */
    -                    if ($this->elementInScope('p')) {
    -                        $this->emitToken(array(
    -                            'name' => 'p',
    -                            'type' => HTML5_Tokenizer::ENDTAG
    -                        ));
    -                    }
    -
    -                    /* Reconstruct the active formatting elements, if any. */
    -                    $this->reconstructActiveFormattingElements();
    -
    -                    $this->flag_frameset_ok = false;
    -
    -                    $this->insertCDATAElement($token);
    -                break;
    -
    -                case 'iframe':
    -                    $this->flag_frameset_ok = false;
    -                    $this->insertCDATAElement($token);
    -                break;
    -
    -                case 'noembed': case 'noscript':
    -                    // XSCRIPT: should check scripting flag
    -                    $this->insertCDATAElement($token);
    -                break;
    -
    -                /* A start tag whose tag name is "select" */
    -                case 'select':
    -                    /* Reconstruct the active formatting elements, if any. */
    -                    $this->reconstructActiveFormattingElements();
    -
    -                    /* Insert an HTML element for the token. */
    -                    $this->insertElement($token);
    -
    -                    $this->flag_frameset_ok = false;
    -
    -                    /* If the insertion mode is one of in table", "in caption",
    -                     * "in column group", "in table body", "in row", or "in
    -                     * cell", then switch the insertion mode to "in select in
    -                     * table". Otherwise, switch the insertion mode  to "in
    -                     * select". */
    -                    if (
    -                        $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
    -                        $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
    -                        $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
    -                    ) {
    -                        $this->mode = self::IN_SELECT_IN_TABLE;
    -                    } else {
    -                        $this->mode = self::IN_SELECT;
    -                    }
    -                break;
    -
    -                case 'option': case 'optgroup':
    -                    if ($this->elementInScope('option')) {
    -                        $this->emitToken(array(
    -                            'name' => 'option',
    -                            'type' => HTML5_Tokenizer::ENDTAG,
    -                        ));
    -                    }
    -                    $this->reconstructActiveFormattingElements();
    -                    $this->insertElement($token);
    -                break;
    -
    -                case 'rp': case 'rt':
    -                    /* If the stack of open elements has a ruby element in scope, then generate
    -                     * implied end tags. If the current node is not then a ruby element, this is
    -                     * a parse error; pop all the nodes from the current node up to the node
    -                     * immediately before the bottommost ruby element on the stack of open elements.
    -                     */
    -                    if ($this->elementInScope('ruby')) {
    -                        $this->generateImpliedEndTags();
    -                    }
    -                    $peek = false;
    -                    do {
    -                        if ($peek) {
    -                            // parse error
    -                        }
    -                        $peek = array_pop($this->stack);
    -                    } while ($peek->tagName !== 'ruby');
    -                    $this->stack[] = $peek; // we popped one too many
    -                    $this->insertElement($token);
    -                break;
    -
    -                // spec diversion
    -
    -                case 'math':
    -                    $this->reconstructActiveFormattingElements();
    -                    $token = $this->adjustMathMLAttributes($token);
    -                    $token = $this->adjustForeignAttributes($token);
    -                    $this->insertForeignElement($token, self::NS_MATHML);
    -                    if (isset($token['self-closing'])) {
    -                        // XERROR: acknowledge the token's self-closing flag
    -                        array_pop($this->stack);
    -                    }
    -                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
    -                        $this->secondary_mode = $this->mode;
    -                        $this->mode = self::IN_FOREIGN_CONTENT;
    -                    }
    -                break;
    -
    -                case 'svg':
    -                    $this->reconstructActiveFormattingElements();
    -                    $token = $this->adjustSVGAttributes($token);
    -                    $token = $this->adjustForeignAttributes($token);
    -                    $this->insertForeignElement($token, self::NS_SVG);
    -                    if (isset($token['self-closing'])) {
    -                        // XERROR: acknowledge the token's self-closing flag
    -                        array_pop($this->stack);
    -                    }
    -                    if ($this->mode !== self::IN_FOREIGN_CONTENT) {
    -                        $this->secondary_mode = $this->mode;
    -                        $this->mode = self::IN_FOREIGN_CONTENT;
    -                    }
    -                break;
    -
    -                case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
    -                case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
    -                    // parse error
    -                break;
    -
    -                /* A start tag token not covered by the previous entries */
    -                default:
    -                    /* Reconstruct the active formatting elements, if any. */
    -                    $this->reconstructActiveFormattingElements();
    -
    -                    $this->insertElement($token);
    -                    /* This element will be a phrasing  element. */
    -                break;
    -            }
    -            break;
    -
    -            case HTML5_Tokenizer::ENDTAG:
    -            switch($token['name']) {
    -                /* An end tag with the tag name "body" */
    -                case 'body':
    -                    /* If the stack of open elements does not have a body 
    -                     * element in scope, this is a parse error; ignore the 
    -                     * token. */
    -                    if(!$this->elementInScope('body')) {
    -                        $this->ignored = true;
    -
    -                    /* Otherwise, if there is a node in the stack of open 
    -                     * elements that is not either a dc element, a dd element, 
    -                     * a ds element, a dt element, an li element, an optgroup 
    -                     * element, an option element, a p element, an rp element, 
    -                     * an rt element, a tbody element, a td element, a tfoot 
    -                     * element, a th element, a thead element, a tr element, 
    -                     * the body element, or the html element, then this is a 
    -                     * parse error.
    -                     */
    -                    } else {
    -                        // XERROR: implement this check for parse error
    -                    }
    -
    -                    /* Change the insertion mode to "after body". */
    -                    $this->mode = self::AFTER_BODY;
    -                break;
    -
    -                /* An end tag with the tag name "html" */
    -                case 'html':
    -                    /* Act as if an end tag with tag name "body" had been seen,
    -                    then, if that token wasn't ignored, reprocess the current
    -                    token. */
    -                    $this->emitToken(array(
    -                        'name' => 'body',
    -                        'type' => HTML5_Tokenizer::ENDTAG
    -                    ));
    -
    -                    if (!$this->ignored) $this->emitToken($token);
    -                break;
    -
    -                case 'address': case 'article': case 'aside': case 'blockquote':
    -                case 'center': case 'datagrid': case 'details': case 'dir':
    -                case 'div': case 'dl': case 'fieldset': case 'footer':
    -                case 'header': case 'hgroup': case 'listing': case 'menu':
    -                case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
    -                    /* If the stack of open elements has an element in scope
    -                    with the same tag name as that of the token, then generate
    -                    implied end tags. */
    -                    if($this->elementInScope($token['name'])) {
    -                        $this->generateImpliedEndTags();
    -
    -                        /* Now, if the current node is not an element with
    -                        the same tag name as that of the token, then this
    -                        is a parse error. */
    -                        // XERROR: implement parse error logic
    -
    -                        /* If the stack of open elements has an element in
    -                        scope with the same tag name as that of the token,
    -                        then pop elements from this stack until an element
    -                        with that tag name has been popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while ($node->tagName !== $token['name']);
    -                    } else {
    -                        // parse error
    -                    }
    -                break;
    -
    -                /* An end tag whose tag name is "form" */
    -                case 'form':
    -                    /* Let node be the element that the form element pointer is set to. */
    -                    $node = $this->form_pointer;
    -                    /* Set the form element pointer  to null. */
    -                    $this->form_pointer = null;
    -                    /* If node is null or the stack of open elements does not 
    -                        * have node in scope, then this is a parse error; ignore the token. */
    -                    if ($node === null || !in_array($node, $this->stack)) {
    -                        // parse error
    -                        $this->ignored = true;
    -                    } else {
    -                        /* 1. Generate implied end tags. */
    -                        $this->generateImpliedEndTags();
    -                        /* 2. If the current node is not node, then this is a parse error.  */
    -                        if (end($this->stack) !== $node) {
    -                            // parse error
    -                        }
    -                        /* 3. Remove node from the stack of open elements. */
    -                        array_splice($this->stack, array_search($node, $this->stack, true), 1);
    -                    }
    -
    -                break;
    -
    -                /* An end tag whose tag name is "p" */
    -                case 'p':
    -                    /* If the stack of open elements has a p element in scope,
    -                    then generate implied end tags, except for p elements. */
    -                    if($this->elementInScope('p')) {
    -                        /* Generate implied end tags, except for elements with
    -                         * the same tag name as the token. */
    -                        $this->generateImpliedEndTags(array('p'));
    -
    -                        /* If the current node is not a p element, then this is
    -                        a parse error. */
    -                        // XERROR: implement
    -
    -                        /* Pop elements from the stack of open elements  until
    -                         * an element with the same tag name as the token has
    -                         * been popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while ($node->tagName !== 'p');
    -
    -                    } else {
    -                        // parse error
    -                        $this->emitToken(array(
    -                            'name' => 'p',
    -                            'type' => HTML5_Tokenizer::STARTTAG,
    -                        ));
    -                        $this->emitToken($token);
    -                    }
    -                break;
    -
    -                /* An end tag whose tag name is "li" */
    -                case 'li':
    -                    /* If the stack of open elements does not have an element
    -                     * in list item scope with the same tag name as that of the
    -                     * token, then this is a parse error; ignore the token. */
    -                    if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
    -                        /* Generate implied end tags, except for elements with the
    -                         * same tag name as the token. */
    -                        $this->generateImpliedEndTags(array($token['name']));
    -                        /* If the current node is not an element with the same tag
    -                         * name as that of the token, then this is a parse error. */
    -                        // XERROR: parse error
    -                        /* Pop elements from the stack of open elements  until an
    -                         * element with the same tag name as the token has been
    -                         * popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while ($node->tagName !== $token['name']);
    -                    } else {
    -                        // XERROR: parse error
    -                    }
    -                break;
    -
    -                /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
    -                case 'dc': case 'dd': case 'ds': case 'dt':
    -                    if($this->elementInScope($token['name'])) {
    -                        $this->generateImpliedEndTags(array($token['name']));
    -
    -                        /* If the current node is not an element with the same
    -                        tag name as the token, then this is a parse error. */
    -                        // XERROR: implement parse error
    -
    -                        /* Pop elements from the stack of open elements  until
    -                         * an element with the same tag name as the token has
    -                         * been popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while ($node->tagName !== $token['name']);
    -
    -                    } else {
    -                        // XERROR: parse error
    -                    }
    -                break;
    -
    -                /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
    -                "h5", "h6" */
    -                case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
    -                    $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
    -
    -                    /* If the stack of open elements has in scope an element whose
    -                    tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
    -                    generate implied end tags. */
    -                    if($this->elementInScope($elements)) {
    -                        $this->generateImpliedEndTags();
    -
    -                        /* Now, if the current node is not an element with the same
    -                        tag name as that of the token, then this is a parse error. */
    -                        // XERROR: implement parse error
    -
    -                        /* If the stack of open elements has in scope an element
    -                        whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
    -                        "h6", then pop elements from the stack until an element
    -                        with one of those tag names has been popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while (!in_array($node->tagName, $elements));
    -                    } else {
    -                        // parse error
    -                    }
    -                break;
    -
    -                /* An end tag whose tag name is one of: "a", "b", "big", "em",
    -                "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
    -                case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
    -                case 'i': case 'nobr': case 's': case 'small': case 'strike':
    -                case 'strong': case 'tt': case 'u':
    -                    // XERROR: generally speaking this needs parse error logic
    -                    /* 1. Let the formatting element be the last element in
    -                    the list of active formatting elements that:
    -                        * is between the end of the list and the last scope
    -                        marker in the list, if any, or the start of the list
    -                        otherwise, and
    -                        * has the same tag name as the token.
    -                    */
    -                    while(true) {
    -                        for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
    -                            if($this->a_formatting[$a] === self::MARKER) {
    -                                break;
    -
    -                            } elseif($this->a_formatting[$a]->tagName === $token['name']) {
    -                                $formatting_element = $this->a_formatting[$a];
    -                                $in_stack = in_array($formatting_element, $this->stack, true);
    -                                $fe_af_pos = $a;
    -                                break;
    -                            }
    -                        }
    -
    -                        /* If there is no such node, or, if that node is
    -                        also in the stack of open elements but the element
    -                        is not in scope, then this is a parse error. Abort
    -                        these steps. The token is ignored. */
    -                        if(!isset($formatting_element) || ($in_stack &&
    -                        !$this->elementInScope($token['name']))) {
    -                            $this->ignored = true;
    -                            break;
    -
    -                        /* Otherwise, if there is such a node, but that node
    -                        is not in the stack of open elements, then this is a
    -                        parse error; remove the element from the list, and
    -                        abort these steps. */
    -                        } elseif(isset($formatting_element) && !$in_stack) {
    -                            unset($this->a_formatting[$fe_af_pos]);
    -                            $this->a_formatting = array_merge($this->a_formatting);
    -                            break;
    -                        }
    -
    -                        /* Otherwise, there is a formatting element and that
    -                         * element is in the stack and is in scope. If the
    -                         * element is not the current node, this is a parse
    -                         * error. In any case, proceed with the algorithm as
    -                         * written in the following steps. */
    -                        // XERROR: implement me
    -
    -                        /* 2. Let the furthest block be the topmost node in the
    -                        stack of open elements that is lower in the stack
    -                        than the formatting element, and is not an element in
    -                        the phrasing or formatting categories. There might
    -                        not be one. */
    -                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
    -                        $length = count($this->stack);
    -
    -                        for($s = $fe_s_pos + 1; $s < $length; $s++) {
    -                            $category = $this->getElementCategory($this->stack[$s]);
    -
    -                            if($category !== self::PHRASING && $category !== self::FORMATTING) {
    -                                $furthest_block = $this->stack[$s];
    -                                break;
    -                            }
    -                        }
    -
    -                        /* 3. If there is no furthest block, then the UA must
    -                        skip the subsequent steps and instead just pop all
    -                        the nodes from the bottom of the stack of open
    -                        elements, from the current node up to the formatting
    -                        element, and remove the formatting element from the
    -                        list of active formatting elements. */
    -                        if(!isset($furthest_block)) {
    -                            for($n = $length - 1; $n >= $fe_s_pos; $n--) {
    -                                array_pop($this->stack);
    -                            }
    -
    -                            unset($this->a_formatting[$fe_af_pos]);
    -                            $this->a_formatting = array_merge($this->a_formatting);
    -                            break;
    -                        }
    -
    -                        /* 4. Let the common ancestor be the element
    -                        immediately above the formatting element in the stack
    -                        of open elements. */
    -                        $common_ancestor = $this->stack[$fe_s_pos - 1];
    -
    -                        /* 5. Let a bookmark note the position of the
    -                        formatting element in the list of active formatting
    -                        elements relative to the elements on either side
    -                        of it in the list. */
    -                        $bookmark = $fe_af_pos;
    -
    -                        /* 6. Let node and last node  be the furthest block.
    -                        Follow these steps: */
    -                        $node = $furthest_block;
    -                        $last_node = $furthest_block;
    -
    -                        while(true) {
    -                            for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
    -                                /* 6.1 Let node be the element immediately
    -                                prior to node in the stack of open elements. */
    -                                $node = $this->stack[$n];
    -
    -                                /* 6.2 If node is not in the list of active
    -                                formatting elements, then remove node from
    -                                the stack of open elements and then go back
    -                                to step 1. */
    -                                if(!in_array($node, $this->a_formatting, true)) {
    -                                    array_splice($this->stack, $n, 1);
    -
    -                                } else {
    -                                    break;
    -                                }
    -                            }
    -
    -                            /* 6.3 Otherwise, if node is the formatting
    -                            element, then go to the next step in the overall
    -                            algorithm. */
    -                            if($node === $formatting_element) {
    -                                break;
    -
    -                            /* 6.4 Otherwise, if last node is the furthest
    -                            block, then move the aforementioned bookmark to
    -                            be immediately after the node in the list of
    -                            active formatting elements. */
    -                            } elseif($last_node === $furthest_block) {
    -                                $bookmark = array_search($node, $this->a_formatting, true) + 1;
    -                            }
    -
    -                            /* 6.5 Create an element for the token for which
    -                             * the element node was created, replace the entry
    -                             * for node in the list of active formatting
    -                             * elements with an entry for the new element,
    -                             * replace the entry for node in the stack of open
    -                             * elements with an entry for the new element, and
    -                             * let node be the new element. */
    -                            // we don't know what the token is anymore
    -                            // XDOM
    -                            $clone = $node->cloneNode();
    -                            $a_pos = array_search($node, $this->a_formatting, true);
    -                            $s_pos = array_search($node, $this->stack, true);
    -                            $this->a_formatting[$a_pos] = $clone;
    -                            $this->stack[$s_pos] = $clone;
    -                            $node = $clone;
    -
    -                            /* 6.6 Insert last node into node, first removing
    -                            it from its previous parent node if any. */
    -                            // XDOM
    -                            if($last_node->parentNode !== null) {
    -                                $last_node->parentNode->removeChild($last_node);
    -                            }
    -
    -                            // XDOM
    -                            $node->appendChild($last_node);
    -
    -                            /* 6.7 Let last node be node. */
    -                            $last_node = $node;
    -
    -                            /* 6.8 Return to step 1 of this inner set of steps. */
    -                        }
    -
    -                        /* 7. If the common ancestor node is a table, tbody,
    -                         * tfoot, thead, or tr element, then, foster parent
    -                         * whatever last node ended up being in the previous
    -                         * step, first removing it from its previous parent
    -                         * node if any. */
    -                        // XDOM
    -                        if ($last_node->parentNode) { // common step
    -                            $last_node->parentNode->removeChild($last_node);
    -                        }
    -                        if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
    -                            $this->fosterParent($last_node);
    -                        /* Otherwise, append whatever last node  ended up being
    -                         * in the previous step to the common ancestor node,
    -                         * first removing it from its previous parent node if
    -                         * any. */
    -                        } else {
    -                            // XDOM
    -                            $common_ancestor->appendChild($last_node);
    -                        }
    -
    -                        /* 8. Create an element for the token for which the
    -                         * formatting element was created. */
    -                        // XDOM
    -                        $clone = $formatting_element->cloneNode();
    -
    -                        /* 9. Take all of the child nodes of the furthest
    -                        block and append them to the element created in the
    -                        last step. */
    -                        // XDOM
    -                        while($furthest_block->hasChildNodes()) {
    -                            $child = $furthest_block->firstChild;
    -                            $furthest_block->removeChild($child);
    -                            $clone->appendChild($child);
    -                        }
    -
    -                        /* 10. Append that clone to the furthest block. */
    -                        // XDOM
    -                        $furthest_block->appendChild($clone);
    -
    -                        /* 11. Remove the formatting element from the list
    -                        of active formatting elements, and insert the new element
    -                        into the list of active formatting elements at the
    -                        position of the aforementioned bookmark. */
    -                        $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
    -                        array_splice($this->a_formatting, $fe_af_pos, 1);
    -
    -                        $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
    -                        $af_part2 = array_slice($this->a_formatting, $bookmark);
    -                        $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
    -
    -                        /* 12. Remove the formatting element from the stack
    -                        of open elements, and insert the new element into the stack
    -                        of open elements immediately below the position of the
    -                        furthest block in that stack. */
    -                        $fe_s_pos = array_search($formatting_element, $this->stack, true);
    -                        array_splice($this->stack, $fe_s_pos, 1);
    -
    -                        $fb_s_pos = array_search($furthest_block, $this->stack, true);
    -                        $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
    -                        $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
    -                        $this->stack = array_merge($s_part1, array($clone), $s_part2);
    -
    -                        /* 13. Jump back to step 1 in this series of steps. */
    -                        unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
    -                    }
    -                break;
    -
    -                case 'applet': case 'button': case 'marquee': case 'object':
    -                    /* If the stack of open elements has an element in scope whose
    -                    tag name matches the tag name of the token, then generate implied
    -                    tags. */
    -                    if($this->elementInScope($token['name'])) {
    -                        $this->generateImpliedEndTags();
    -
    -                        /* Now, if the current node is not an element with the same
    -                        tag name as the token, then this is a parse error. */
    -                        // XERROR: implement logic
    -
    -                        /* Pop elements from the stack of open elements  until
    -                         * an element with the same tag name as the token has
    -                         * been popped from the stack. */
    -                        do {
    -                            $node = array_pop($this->stack);
    -                        } while ($node->tagName !== $token['name']);
    -
    -                        /* Clear the list of active formatting elements up to the
    -                         * last marker. */
    -                        $keys = array_keys($this->a_formatting, self::MARKER, true);
    -                        $marker = end($keys);
    -
    -                        for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
    -                            array_pop($this->a_formatting);
    -                        }
    -                    } else {
    -                        // parse error
    -                    }
    -                break;
    -
    -                case 'br':
    -                    // Parse error
    -                    $this->emitToken(array(
    -                        'name' => 'br',
    -                        'type' => HTML5_Tokenizer::STARTTAG,
    -                    ));
    -                break;
    -
    -                /* An end tag token not covered by the previous entries */
    -                default:
    -                    for($n = count($this->stack) - 1; $n >= 0; $n--) {
    -                        /* Initialise node to be the current node (the bottommost
    -                        node of the stack). */
    -                        $node = $this->stack[$n];
    -
    -                        /* If node has the same tag name as the end tag token,
    -                        then: */
    -                        if($token['name'] === $node->tagName) {
    -                            /* Generate implied end tags. */
    -                            $this->generateImpliedEndTags();
    -
    -                            /* If the tag name of the end tag token does not
    -                            match the tag name of the current node, this is a
    -                            parse error. */
    -                            // XERROR: implement this
    -
    -                            /* Pop all the nodes from the current node up to
    -                            node, including node, then stop these steps. */
    -                            // XSKETCHY
    -                            do {
    -                                $pop = array_pop($this->stack);
    -                            } while ($pop !== $node);
    -                            break;
    -
    -                        } else {
    -                            $category = $this->getElementCategory($node);
    -
    -                            if($category !== self::FORMATTING && $category !== self::PHRASING) {
    -                                /* Otherwise, if node is in neither the formatting
    -                                category nor the phrasing category, then this is a
    -                                parse error. Stop this algorithm. The end tag token
    -                                is ignored. */
    -                                $this->ignored = true;
    -                                break;
    -                                // parse error
    -                            }
    -                        }
    -                        /* Set node to the previous entry in the stack of open elements. Loop. */
    -                    }
    -                break;
    -            }
    -            break;
    -        }
    -        break;
    -
    -    case self::IN_CDATA_RCDATA:
    -        if (
    -            $token['type'] === HTML5_Tokenizer::CHARACTER ||
    -            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
    -        ) {
    -            $this->insertText($token['data']);
    -        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
    -            // parse error
    -            /* If the current node is a script  element, mark the script
    -             * element as "already executed". */
    -            // probably not necessary
    -            array_pop($this->stack);
    -            $this->mode = $this->original_mode;
    -            $this->emitToken($token);
    -        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
    -            array_pop($this->stack);
    -            $this->mode = $this->original_mode;
    -            // we're ignoring all of the execution stuff
    -        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
    -            array_pop($this->stack);
    -            $this->mode = $this->original_mode;
    -        }
    -    break;
    -
    -    case self::IN_TABLE:
    -        $clear = array('html', 'table');
    -
    -        /* A character token */
    -        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
    -            $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            /* Let the pending table character tokens
    -             * be an empty list of tokens. */
    -            $this->pendingTableCharacters = "";
    -            $this->pendingTableCharactersDirty = false;
    -            /* Let the original insertion mode be the current
    -             * insertion mode. */
    -            $this->original_mode = $this->mode;
    -            /* Switch the insertion mode to
    -             * "in table text" and
    -             * reprocess the token. */
    -            $this->mode = self::IN_TABLE_TEXT;
    -            $this->emitToken($token);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the current node with the data
    -            attribute set to the data given in the comment token. */
    -            $this->insertComment($token['data']);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        /* A start tag whose tag name is "caption" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'caption') {
    -            /* Clear the stack back to a table context. */
    -            $this->clearStackToTableContext($clear);
    -
    -            /* Insert a marker at the end of the list of active
    -            formatting elements. */
    -            $this->a_formatting[] = self::MARKER;
    -
    -            /* Insert an HTML element for the token, then switch the
    -            insertion mode to "in caption". */
    -            $this->insertElement($token);
    -            $this->mode = self::IN_CAPTION;
    -
    -        /* A start tag whose tag name is "colgroup" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'colgroup') {
    -            /* Clear the stack back to a table context. */
    -            $this->clearStackToTableContext($clear);
    -
    -            /* Insert an HTML element for the token, then switch the
    -            insertion mode to "in column group". */
    -            $this->insertElement($token);
    -            $this->mode = self::IN_COLUMN_GROUP;
    -
    -        /* A start tag whose tag name is "col" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'col') {
    -            $this->emitToken(array(
    -                'name' => 'colgroup',
    -                'type' => HTML5_Tokenizer::STARTTAG,
    -                'attr' => array()
    -            ));
    -
    -            $this->emitToken($token);
    -
    -        /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
    -        array('tbody', 'tfoot', 'thead'))) {
    -            /* Clear the stack back to a table context. */
    -            $this->clearStackToTableContext($clear);
    -
    -            /* Insert an HTML element for the token, then switch the insertion
    -            mode to "in table body". */
    -            $this->insertElement($token);
    -            $this->mode = self::IN_TABLE_BODY;
    -
    -        /* A start tag whose tag name is one of: "td", "th", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        in_array($token['name'], array('td', 'th', 'tr'))) {
    -            /* Act as if a start tag token with the tag name "tbody" had been
    -            seen, then reprocess the current token. */
    -            $this->emitToken(array(
    -                'name' => 'tbody',
    -                'type' => HTML5_Tokenizer::STARTTAG,
    -                'attr' => array()
    -            ));
    -
    -            $this->emitToken($token);
    -
    -        /* A start tag whose tag name is "table" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'table') {
    -            /* Parse error. Act as if an end tag token with the tag name "table"
    -            had been seen, then, if that token wasn't ignored, reprocess the
    -            current token. */
    -            $this->emitToken(array(
    -                'name' => 'table',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -
    -            if (!$this->ignored) $this->emitToken($token);
    -
    -        /* An end tag whose tag name is "table" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'table') {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. (fragment case) */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                do {
    -                    $node = array_pop($this->stack);
    -                } while ($node->tagName !== 'table');
    -
    -                /* Reset the insertion mode appropriately. */
    -                $this->resetInsertionMode();
    -            }
    -
    -        /* An end tag whose tag name is one of: "body", "caption", "col",
    -        "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
    -        'tfoot', 'th', 'thead', 'tr'))) {
    -            // Parse error. Ignore the token.
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        ($token['name'] === 'style' || $token['name'] === 'script')) {
    -            $this->processWithRulesFor($token, self::IN_HEAD);
    -
    -        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
    -        // assignment is intentional
    -        /* If the token does not have an attribute with the name "type", or
    -         * if it does, but that attribute's value is not an ASCII
    -         * case-insensitive match for the string "hidden", then: act as
    -         * described in the "anything else" entry below. */
    -        ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
    -            // I.e., if its an input with the type attribute == 'hidden'
    -            /* Otherwise */
    -            // parse error
    -            $this->insertElement($token);
    -            array_pop($this->stack);
    -        } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
    -            /* If the current node is not the root html element, then this is a parse error. */
    -            if (end($this->stack)->tagName !== 'html') {
    -                // Note: It can only be the current node in the fragment case.
    -                // parse error
    -            }
    -            /* Stop parsing. */
    -        /* Anything else */
    -        } else {
    -            /* Parse error. Process the token as if the insertion mode was "in
    -            body", with the following exception: */
    -
    -            $old = $this->foster_parent;
    -            $this->foster_parent = true;
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -            $this->foster_parent = $old;
    -        }
    -    break;
    -
    -    case self::IN_TABLE_TEXT:
    -        /* A character token */
    -        if($token['type'] === HTML5_Tokenizer::CHARACTER) {
    -            /* Append the character token to the pending table
    -             * character tokens list. */
    -            $this->pendingTableCharacters .= $token['data'];
    -            $this->pendingTableCharactersDirty = true;
    -        } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            $this->pendingTableCharacters .= $token['data'];
    -        /* Anything else */
    -        } else {
    -            if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
    -                /* If any of the tokens in the pending table character tokens list 
    -                 * are character tokens that are not one of U+0009 CHARACTER 
    -                 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or 
    -                 * U+0020 SPACE, then reprocess those character tokens using the 
    -                 * rules given in the "anything else" entry in the in table" 
    -                 * insertion mode.*/
    -                if ($this->pendingTableCharactersDirty) {
    -                    /* Parse error. Process the token using the rules for the 
    -                     * "in body" insertion mode, except that if the current 
    -                     * node is a table, tbody, tfoot, thead, or tr element, 
    -                     * then, whenever a node would be inserted into the current 
    -                     * node, it must instead be foster parented. */
    -                    // XERROR
    -                    $old = $this->foster_parent;
    -                    $this->foster_parent = true;
    -                    $text_token = array(
    -                        'type' => HTML5_Tokenizer::CHARACTER,
    -                        'data' => $this->pendingTableCharacters,
    -                    );
    -                    $this->processWithRulesFor($text_token, self::IN_BODY);
    -                    $this->foster_parent = $old;
    -
    -                /* Otherwise, insert the characters given by the pending table 
    -                 * character tokens list into the current node. */
    -                } else {
    -                    $this->insertText($this->pendingTableCharacters);
    -                }
    -                $this->pendingTableCharacters = null;
    -                $this->pendingTableCharactersNull = null;
    -            }
    -
    -            /* Switch the insertion mode to the original insertion mode and 
    -             * reprocess the token.
    -             */
    -            $this->mode = $this->original_mode;
    -            $this->emitToken($token);
    -        }
    -    break;
    -
    -    case self::IN_CAPTION:
    -        /* An end tag whose tag name is "caption" */
    -        if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. (fragment case) */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->ignored = true;
    -                // Ignore
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Generate implied end tags. */
    -                $this->generateImpliedEndTags();
    -
    -                /* Now, if the current node is not a caption element, then this
    -                is a parse error. */
    -                // XERROR: implement
    -
    -                /* Pop elements from this stack until a caption element has
    -                been popped from the stack. */
    -                do {
    -                    $node = array_pop($this->stack);
    -                } while ($node->tagName !== 'caption');
    -
    -                /* Clear the list of active formatting elements up to the last
    -                marker. */
    -                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
    -
    -                /* Switch the insertion mode to "in table". */
    -                $this->mode = self::IN_TABLE;
    -            }
    -
    -        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
    -        "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
    -        name is "table" */
    -        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
    -        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
    -        'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'table')) {
    -            /* Parse error. Act as if an end tag with the tag name "caption"
    -            had been seen, then, if that token wasn't ignored, reprocess the
    -            current token. */
    -            $this->emitToken(array(
    -                'name' => 'caption',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -
    -            if (!$this->ignored) $this->emitToken($token);
    -
    -        /* An end tag whose tag name is one of: "body", "col", "colgroup",
    -        "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
    -        'thead', 'tr'))) {
    -            // Parse error. Ignore the token.
    -            $this->ignored = true;
    -
    -        /* Anything else */
    -        } else {
    -            /* Process the token as if the insertion mode was "in body". */
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -        }
    -    break;
    -
    -    case self::IN_COLUMN_GROUP:
    -        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
    -        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
    -        or U+0020 SPACE */
    -        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            /* Append the character to the current node. */
    -            $this->insertText($token['data']);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the current node with the data
    -            attribute set to the data given in the comment token. */
    -            $this->insertToken($token['data']);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* A start tag whose tag name is "col" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
    -            /* Insert a col element for the token. Immediately pop the current
    -            node off the stack of open elements. */
    -            $this->insertElement($token);
    -            array_pop($this->stack);
    -            // XERROR: Acknowledge the token's self-closing flag, if it is set.
    -
    -        /* An end tag whose tag name is "colgroup" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'colgroup') {
    -            /* If the current node is the root html element, then this is a
    -            parse error, ignore the token. (fragment case) */
    -            if(end($this->stack)->tagName === 'html') {
    -                $this->ignored = true;
    -
    -            /* Otherwise, pop the current node (which will be a colgroup
    -            element) from the stack of open elements. Switch the insertion
    -            mode to "in table". */
    -            } else {
    -                array_pop($this->stack);
    -                $this->mode = self::IN_TABLE;
    -            }
    -
    -        /* An end tag whose tag name is "col" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -
    -        /* An end-of-file token */
    -        /* If the current node is the root html  element */
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
    -            /* Stop parsing */
    -
    -        /* Anything else */
    -        } else {
    -            /* Act as if an end tag with the tag name "colgroup" had been seen,
    -            and then, if that token wasn't ignored, reprocess the current token. */
    -            $this->emitToken(array(
    -                'name' => 'colgroup',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -
    -            if (!$this->ignored) $this->emitToken($token);
    -        }
    -    break;
    -
    -    case self::IN_TABLE_BODY:
    -        $clear = array('tbody', 'tfoot', 'thead', 'html');
    -
    -        /* A start tag whose tag name is "tr" */
    -        if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
    -            /* Clear the stack back to a table body context. */
    -            $this->clearStackToTableContext($clear);
    -
    -            /* Insert a tr element for the token, then switch the insertion
    -            mode to "in row". */
    -            $this->insertElement($token);
    -            $this->mode = self::IN_ROW;
    -
    -        /* A start tag whose tag name is one of: "th", "td" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        ($token['name'] === 'th' ||    $token['name'] === 'td')) {
    -            /* Parse error. Act as if a start tag with the tag name "tr" had
    -            been seen, then reprocess the current token. */
    -            $this->emitToken(array(
    -                'name' => 'tr',
    -                'type' => HTML5_Tokenizer::STARTTAG,
    -                'attr' => array()
    -            ));
    -
    -            $this->emitToken($token);
    -
    -        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                // Parse error
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Clear the stack back to a table body context. */
    -                $this->clearStackToTableContext($clear);
    -
    -                /* Pop the current node from the stack of open elements. Switch
    -                the insertion mode to "in table". */
    -                array_pop($this->stack);
    -                $this->mode = self::IN_TABLE;
    -            }
    -
    -        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
    -        "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
    -        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
    -        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
    -        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
    -            /* If the stack of open elements does not have a tbody, thead, or
    -            tfoot element in table scope, this is a parse error. Ignore the
    -            token. (fragment case) */
    -            if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
    -                // parse error
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Clear the stack back to a table body context. */
    -                $this->clearStackToTableContext($clear);
    -
    -                /* Act as if an end tag with the same tag name as the current
    -                node ("tbody", "tfoot", or "thead") had been seen, then
    -                reprocess the current token. */
    -                $this->emitToken(array(
    -                    'name' => end($this->stack)->tagName,
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -
    -                $this->emitToken($token);
    -            }
    -
    -        /* An end tag whose tag name is one of: "body", "caption", "col",
    -        "colgroup", "html", "td", "th", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -
    -        /* Anything else */
    -        } else {
    -            /* Process the token as if the insertion mode was "in table". */
    -            $this->processWithRulesFor($token, self::IN_TABLE);
    -        }
    -    break;
    -
    -    case self::IN_ROW:
    -        $clear = array('tr', 'html');
    -
    -        /* A start tag whose tag name is one of: "th", "td" */
    -        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        ($token['name'] === 'th' || $token['name'] === 'td')) {
    -            /* Clear the stack back to a table row context. */
    -            $this->clearStackToTableContext($clear);
    -
    -            /* Insert an HTML element for the token, then switch the insertion
    -            mode to "in cell". */
    -            $this->insertElement($token);
    -            $this->mode = self::IN_CELL;
    -
    -            /* Insert a marker at the end of the list of active formatting
    -            elements. */
    -            $this->a_formatting[] = self::MARKER;
    -
    -        /* An end tag whose tag name is "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. (fragment case) */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                // Ignore.
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Clear the stack back to a table row context. */
    -                $this->clearStackToTableContext($clear);
    -
    -                /* Pop the current node (which will be a tr element) from the
    -                stack of open elements. Switch the insertion mode to "in table
    -                body". */
    -                array_pop($this->stack);
    -                $this->mode = self::IN_TABLE_BODY;
    -            }
    -
    -        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
    -        "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
    -        } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
    -        array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
    -        ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
    -            /* Act as if an end tag with the tag name "tr" had been seen, then,
    -            if that token wasn't ignored, reprocess the current token. */
    -            $this->emitToken(array(
    -                'name' => 'tr',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -            if (!$this->ignored) $this->emitToken($token);
    -
    -        /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Otherwise, act as if an end tag with the tag name "tr" had
    -                been seen, then reprocess the current token. */
    -                $this->emitToken(array(
    -                    'name' => 'tr',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -
    -                $this->emitToken($token);
    -            }
    -
    -        /* An end tag whose tag name is one of: "body", "caption", "col",
    -        "colgroup", "html", "td", "th" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -
    -        /* Anything else */
    -        } else {
    -            /* Process the token as if the insertion mode was "in table". */
    -            $this->processWithRulesFor($token, self::IN_TABLE);
    -        }
    -    break;
    -
    -    case self::IN_CELL:
    -        /* An end tag whose tag name is one of: "td", "th" */
    -        if($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        ($token['name'] === 'td' || $token['name'] === 'th')) {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as that of the token, then this is a
    -            parse error and the token must be ignored. */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->ignored = true;
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Generate implied end tags, except for elements with the same
    -                tag name as the token. */
    -                $this->generateImpliedEndTags(array($token['name']));
    -
    -                /* Now, if the current node is not an element with the same tag
    -                name as the token, then this is a parse error. */
    -                // XERROR: Implement parse error code
    -
    -                /* Pop elements from this stack until an element with the same
    -                tag name as the token has been popped from the stack. */
    -                do {
    -                    $node = array_pop($this->stack);
    -                } while ($node->tagName !== $token['name']);
    -
    -                /* Clear the list of active formatting elements up to the last
    -                marker. */
    -                $this->clearTheActiveFormattingElementsUpToTheLastMarker();
    -
    -                /* Switch the insertion mode to "in row". (The current node
    -                will be a tr element at this point.) */
    -                $this->mode = self::IN_ROW;
    -            }
    -
    -        /* A start tag whose tag name is one of: "caption", "col", "colgroup",
    -        "tbody", "td", "tfoot", "th", "thead", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
    -        array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
    -        'thead', 'tr'))) {
    -            /* If the stack of open elements does not have a td or th element
    -            in table scope, then this is a parse error; ignore the token.
    -            (fragment case) */
    -            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
    -                // parse error
    -                $this->ignored = true;
    -
    -            /* Otherwise, close the cell (see below) and reprocess the current
    -            token. */
    -            } else {
    -                $this->closeCell();
    -                $this->emitToken($token);
    -            }
    -
    -        /* An end tag whose tag name is one of: "body", "caption", "col",
    -        "colgroup", "html" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('body', 'caption', 'col', 'colgroup', 'html'))) {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -
    -        /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
    -        "thead", "tr" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
    -        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
    -            /* If the stack of open elements does not have a td or th element
    -            in table scope, then this is a parse error; ignore the token.
    -            (innerHTML case) */
    -            if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
    -                // Parse error
    -                $this->ignored = true;
    -
    -            /* Otherwise, close the cell (see below) and reprocess the current
    -            token. */
    -            } else {
    -                $this->closeCell();
    -                $this->emitToken($token);
    -            }
    -
    -        /* Anything else */
    -        } else {
    -            /* Process the token as if the insertion mode was "in body". */
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -        }
    -    break;
    -
    -    case self::IN_SELECT:
    -        /* Handle the token as follows: */
    -
    -        /* A character token */
    -        if(
    -            $token['type'] === HTML5_Tokenizer::CHARACTER ||
    -            $token['type'] === HTML5_Tokenizer::SPACECHARACTER
    -        ) {
    -            /* Append the token's character to the current node. */
    -            $this->insertText($token['data']);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the current node with the data
    -            attribute set to the data given in the comment token. */
    -            $this->insertComment($token['data']);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
    -            $this->processWithRulesFor($token, self::INBODY);
    -
    -        /* A start tag token whose tag name is "option" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'option') {
    -            /* If the current node is an option element, act as if an end tag
    -            with the tag name "option" had been seen. */
    -            if(end($this->stack)->tagName === 'option') {
    -                $this->emitToken(array(
    -                    'name' => 'option',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -            }
    -
    -            /* Insert an HTML element for the token. */
    -            $this->insertElement($token);
    -
    -        /* A start tag token whose tag name is "optgroup" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'optgroup') {
    -            /* If the current node is an option element, act as if an end tag
    -            with the tag name "option" had been seen. */
    -            if(end($this->stack)->tagName === 'option') {
    -                $this->emitToken(array(
    -                    'name' => 'option',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -            }
    -
    -            /* If the current node is an optgroup element, act as if an end tag
    -            with the tag name "optgroup" had been seen. */
    -            if(end($this->stack)->tagName === 'optgroup') {
    -                $this->emitToken(array(
    -                    'name' => 'optgroup',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -            }
    -
    -            /* Insert an HTML element for the token. */
    -            $this->insertElement($token);
    -
    -        /* An end tag token whose tag name is "optgroup" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'optgroup') {
    -            /* First, if the current node is an option element, and the node
    -            immediately before it in the stack of open elements is an optgroup
    -            element, then act as if an end tag with the tag name "option" had
    -            been seen. */
    -            $elements_in_stack = count($this->stack);
    -
    -            if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
    -            $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
    -                $this->emitToken(array(
    -                    'name' => 'option',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -            }
    -
    -            /* If the current node is an optgroup element, then pop that node
    -            from the stack of open elements. Otherwise, this is a parse error,
    -            ignore the token. */
    -            if(end($this->stack)->tagName === 'optgroup') {
    -                array_pop($this->stack);
    -            } else {
    -                // parse error
    -                $this->ignored = true;
    -            }
    -
    -        /* An end tag token whose tag name is "option" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'option') {
    -            /* If the current node is an option element, then pop that node
    -            from the stack of open elements. Otherwise, this is a parse error,
    -            ignore the token. */
    -            if(end($this->stack)->tagName === 'option') {
    -                array_pop($this->stack);
    -            } else {
    -                // parse error
    -                $this->ignored = true;
    -            }
    -
    -        /* An end tag whose tag name is "select" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'select') {
    -            /* If the stack of open elements does not have an element in table
    -            scope with the same tag name as the token, this is a parse error.
    -            Ignore the token. (fragment case) */
    -            if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->ignored = true;
    -                // parse error
    -
    -            /* Otherwise: */
    -            } else {
    -                /* Pop elements from the stack of open elements until a select
    -                element has been popped from the stack. */
    -                do {
    -                    $node = array_pop($this->stack);
    -                } while ($node->tagName !== 'select');
    -
    -                /* Reset the insertion mode appropriately. */
    -                $this->resetInsertionMode();
    -            }
    -
    -        /* A start tag whose tag name is "select" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
    -            /* Parse error. Act as if the token had been an end tag with the
    -            tag name "select" instead. */
    -            $this->emitToken(array(
    -                'name' => 'select',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        ($token['name'] === 'input' || $token['name'] === 'keygen' ||  $token['name'] === 'textarea')) {
    -            // parse error
    -            $this->emitToken(array(
    -                'name' => 'select',
    -                'type' => HTML5_Tokenizer::ENDTAG
    -            ));
    -            $this->emitToken($token);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
    -            $this->processWithRulesFor($token, self::IN_HEAD);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            // XERROR: If the current node is not the root html element, then this is a parse error.
    -            /* Stop parsing */
    -
    -        /* Anything else */
    -        } else {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -        }
    -    break;
    -
    -    case self::IN_SELECT_IN_TABLE:
    -
    -        if($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        in_array($token['name'], array('caption', 'table', 'tbody',
    -        'tfoot', 'thead', 'tr', 'td', 'th'))) {
    -            // parse error
    -            $this->emitToken(array(
    -                'name' => 'select',
    -                'type' => HTML5_Tokenizer::ENDTAG,
    -            ));
    -            $this->emitToken($token);
    -
    -        /* An end tag whose tag name is one of: "caption", "table", "tbody",
    -        "tfoot", "thead", "tr", "td", "th" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th')))  {
    -            /* Parse error. */
    -            // parse error
    -
    -            /* If the stack of open elements has an element in table scope with
    -            the same tag name as that of the token, then act as if an end tag
    -            with the tag name "select" had been seen, and reprocess the token.
    -            Otherwise, ignore the token. */
    -            if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
    -                $this->emitToken(array(
    -                    'name' => 'select',
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -
    -                $this->emitToken($token);
    -            } else {
    -                $this->ignored = true;
    -            }
    -        } else {
    -            $this->processWithRulesFor($token, self::IN_SELECT);
    -        }
    -    break;
    -
    -    case self::IN_FOREIGN_CONTENT:
    -        if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
    -        $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            $this->insertText($token['data']);
    -        } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            $this->insertComment($token['data']);
    -        } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // XERROR: parse error
    -        } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
    -        // XDOM
    -        end($this->stack)->namespaceURI === self::NS_SVG) {
    -            array_pop($this->stack);
    -            // a bunch of script running mumbo jumbo
    -        } elseif (
    -            ($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -                ((
    -                    $token['name'] !== 'mglyph' &&
    -                    $token['name'] !== 'malignmark' &&
    -                    // XDOM
    -                    end($this->stack)->namespaceURI === self::NS_MATHML &&
    -                    in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
    -                ) ||
    -                (
    -                    $token['name'] === 'svg' &&
    -                    // XDOM
    -                    end($this->stack)->namespaceURI === self::NS_MATHML &&
    -                    end($this->stack)->tagName === 'annotation-xml'
    -                ) ||
    -                (
    -                    // XDOM
    -                    end($this->stack)->namespaceURI === self::NS_SVG &&
    -                    in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
    -                ) ||
    -                (
    -                    // XSKETCHY && XDOM
    -                    end($this->stack)->namespaceURI === self::NS_HTML
    -                ))
    -            ) || $token['type'] === HTML5_Tokenizer::ENDTAG
    -        ) {
    -            $this->processWithRulesFor($token, $this->secondary_mode);
    -            /* If, after doing so, the insertion mode is still "in foreign 
    -             * content", but there is no element in scope that has a namespace 
    -             * other than the HTML namespace, switch the insertion mode to the 
    -             * secondary insertion mode. */
    -            if ($this->mode === self::IN_FOREIGN_CONTENT) {
    -                $found = false;
    -                // this basically duplicates elementInScope()
    -                for ($i = count($this->stack) - 1; $i >= 0; $i--) {
    -                    // XDOM
    -                    $node = $this->stack[$i];
    -                    if ($node->namespaceURI !== self::NS_HTML) {
    -                        $found = true;
    -                        break;
    -                    } elseif (in_array($node->tagName, array('table', 'html',
    -                    'applet', 'caption', 'td', 'th', 'button', 'marquee',
    -                    'object')) || ($node->tagName === 'foreignObject' &&
    -                    $node->namespaceURI === self::NS_SVG)) {
    -                        break;
    -                    }
    -                }
    -                if (!$found) {
    -                    $this->mode = $this->secondary_mode;
    -                }
    -            }
    -        } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
    -        $token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        (in_array($token['name'], array('b', "big", "blockquote", "body", "br", 
    -        "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2", 
    -        "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing", 
    -        "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s",  "small", 
    -        "span", "strong", "strike",  "sub", "sup", "table", "tt", "u", "ul", 
    -        "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
    -        $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
    -            // XERROR: parse error
    -            do {
    -                $node = array_pop($this->stack);
    -                // XDOM
    -            } while ($node->namespaceURI !== self::NS_HTML);
    -            $this->stack[] = $node;
    -            $this->mode = $this->secondary_mode;
    -            $this->emitToken($token);
    -        } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
    -            static $svg_lookup = array(
    -                'altglyph' => 'altGlyph',
    -                'altglyphdef' => 'altGlyphDef',
    -                'altglyphitem' => 'altGlyphItem',
    -                'animatecolor' => 'animateColor',
    -                'animatemotion' => 'animateMotion',
    -                'animatetransform' => 'animateTransform',
    -                'clippath' => 'clipPath',
    -                'feblend' => 'feBlend',
    -                'fecolormatrix' => 'feColorMatrix',
    -                'fecomponenttransfer' => 'feComponentTransfer',
    -                'fecomposite' => 'feComposite',
    -                'feconvolvematrix' => 'feConvolveMatrix',
    -                'fediffuselighting' => 'feDiffuseLighting',
    -                'fedisplacementmap' => 'feDisplacementMap',
    -                'fedistantlight' => 'feDistantLight',
    -                'feflood' => 'feFlood',
    -                'fefunca' => 'feFuncA',
    -                'fefuncb' => 'feFuncB',
    -                'fefuncg' => 'feFuncG',
    -                'fefuncr' => 'feFuncR',
    -                'fegaussianblur' => 'feGaussianBlur',
    -                'feimage' => 'feImage',
    -                'femerge' => 'feMerge',
    -                'femergenode' => 'feMergeNode',
    -                'femorphology' => 'feMorphology',
    -                'feoffset' => 'feOffset',
    -                'fepointlight' => 'fePointLight',
    -                'fespecularlighting' => 'feSpecularLighting',
    -                'fespotlight' => 'feSpotLight',
    -                'fetile' => 'feTile',
    -                'feturbulence' => 'feTurbulence',
    -                'foreignobject' => 'foreignObject',
    -                'glyphref' => 'glyphRef',
    -                'lineargradient' => 'linearGradient',
    -                'radialgradient' => 'radialGradient',
    -                'textpath' => 'textPath',
    -            );
    -            // XDOM
    -            $current = end($this->stack);
    -            if ($current->namespaceURI === self::NS_MATHML) {
    -                $token = $this->adjustMathMLAttributes($token);
    -            }
    -            if ($current->namespaceURI === self::NS_SVG &&
    -            isset($svg_lookup[$token['name']])) {
    -                $token['name'] = $svg_lookup[$token['name']];
    -            }
    -            if ($current->namespaceURI === self::NS_SVG) {
    -                $token = $this->adjustSVGAttributes($token);
    -            }
    -            $token = $this->adjustForeignAttributes($token);
    -            $this->insertForeignElement($token, $current->namespaceURI);
    -            if (isset($token['self-closing'])) {
    -                array_pop($this->stack);
    -                // XERROR: acknowledge self-closing flag
    -            }
    -        }
    -    break;
    -
    -    case self::AFTER_BODY:
    -        /* Handle the token as follows: */
    -
    -        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
    -        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
    -        or U+0020 SPACE */
    -        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            /* Process the token as it would be processed if the insertion mode
    -            was "in body". */
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the first element in the stack of open
    -            elements (the html element), with the data attribute set to the
    -            data given in the comment token. */
    -            // XDOM
    -            $comment = $this->dom->createComment($token['data']);
    -            $this->stack[0]->appendChild($comment);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* An end tag with the tag name "html" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
    -            /*     If the parser was originally created as part of the HTML
    -             *     fragment parsing algorithm, this is a parse error; ignore
    -             *     the token. (fragment case) */
    -            $this->ignored = true;
    -            // XERROR: implement this
    -
    -            $this->mode = self::AFTER_AFTER_BODY;
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            /* Stop parsing */
    -
    -        /* Anything else */
    -        } else {
    -            /* Parse error. Set the insertion mode to "in body" and reprocess
    -            the token. */
    -            $this->mode = self::IN_BODY;
    -            $this->emitToken($token);
    -        }
    -    break;
    -
    -    case self::IN_FRAMESET:
    -        /* Handle the token as follows: */
    -
    -        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
    -        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
    -        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
    -        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            /* Append the character to the current node. */
    -            $this->insertText($token['data']);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the current node with the data
    -            attribute set to the data given in the comment token. */
    -            $this->insertComment($token['data']);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        /* A start tag with the tag name "frameset" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'frameset') {
    -            $this->insertElement($token);
    -
    -        /* An end tag with the tag name "frameset" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'frameset') {
    -            /* If the current node is the root html element, then this is a
    -            parse error; ignore the token. (fragment case) */
    -            if(end($this->stack)->tagName === 'html') {
    -                $this->ignored = true;
    -                // Parse error
    -
    -            } else {
    -                /* Otherwise, pop the current node from the stack of open
    -                elements. */
    -                array_pop($this->stack);
    -
    -                /* If the parser was not originally created as part of the HTML 
    -                 * fragment parsing algorithm  (fragment case), and the current 
    -                 * node is no longer a frameset element, then switch the 
    -                 * insertion mode to "after frameset". */
    -                $this->mode = self::AFTER_FRAMESET;
    -            }
    -
    -        /* A start tag with the tag name "frame" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'frame') {
    -            /* Insert an HTML element for the token. */
    -            $this->insertElement($token);
    -
    -            /* Immediately pop the current node off the stack of open elements. */
    -            array_pop($this->stack);
    -
    -            // XERROR: Acknowledge the token's self-closing flag, if it is set.
    -
    -        /* A start tag with the tag name "noframes" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'noframes') {
    -            /* Process the token using the rules for the "in head" insertion mode. */
    -            $this->processwithRulesFor($token, self::IN_HEAD);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            // XERROR: If the current node is not the root html element, then this is a parse error.
    -            /* Stop parsing */
    -        /* Anything else */
    -        } else {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -        }
    -    break;
    -
    -    case self::AFTER_FRAMESET:
    -        /* Handle the token as follows: */
    -
    -        /* A character token that is one of one of U+0009 CHARACTER TABULATION,
    -        U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
    -        U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
    -        if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
    -            /* Append the character to the current node. */
    -            $this->insertText($token['data']);
    -
    -        /* A comment token */
    -        } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the current node with the data
    -            attribute set to the data given in the comment token. */
    -            $this->insertComment($token['data']);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
    -            // parse error
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* An end tag with the tag name "html" */
    -        } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
    -        $token['name'] === 'html') {
    -            $this->mode = self::AFTER_AFTER_FRAMESET;
    -
    -        /* A start tag with the tag name "noframes" */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
    -        $token['name'] === 'noframes') {
    -            $this->processWithRulesFor($token, self::IN_HEAD);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            /* Stop parsing */
    -
    -        /* Anything else */
    -        } else {
    -            /* Parse error. Ignore the token. */
    -            $this->ignored = true;
    -        }
    -    break;
    -
    -    case self::AFTER_AFTER_BODY:
    -        /* A comment token */
    -        if($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the Document object with the data
    -            attribute set to the data given in the comment token. */
    -            // XDOM
    -            $comment = $this->dom->createComment($token['data']);
    -            $this->dom->appendChild($comment);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
    -        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
    -        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* An end-of-file token */
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            /* OMG DONE!! */
    -        } else {
    -            // parse error
    -            $this->mode = self::IN_BODY;
    -            $this->emitToken($token);
    -        }
    -    break;
    -
    -    case self::AFTER_AFTER_FRAMESET:
    -        /* A comment token */
    -        if($token['type'] === HTML5_Tokenizer::COMMENT) {
    -            /* Append a Comment node to the Document object with the data
    -            attribute set to the data given in the comment token. */
    -            // XDOM
    -            $comment = $this->dom->createComment($token['data']);
    -            $this->dom->appendChild($comment);
    -
    -        } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
    -        $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
    -        ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
    -            $this->processWithRulesFor($token, self::IN_BODY);
    -
    -        /* An end-of-file token */
    -        } elseif($token['type'] === HTML5_Tokenizer::EOF) {
    -            /* OMG DONE!! */
    -        } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
    -            $this->processWithRulesFor($token, self::IN_HEAD);
    -        } else {
    -            // parse error
    -        }
    -    break;
    -    }
    -        // end funky indenting
    -        }
    -
    -    private function insertElement($token, $append = true) {
    -        //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
    -        $namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
    -        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
    -
    -        if (!empty($token['attr'])) {
    -            foreach($token['attr'] as $attr) {
    -
    -				// mike@macgirvin.com 2011-11-17, check attribute name for
    -				// validity (ignoring extenders and combiners) as illegal chars in names
    -				// causes everything to abort
    -
    - 				$valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
    -                if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
    -                    $el->setAttribute($attr['name'], $attr['value']);
    -                }
    -            }
    -        }
    -        if ($append) {
    -            $this->appendToRealParent($el);
    -            $this->stack[] = $el;
    -        }
    -
    -        return $el;
    -    }
    -
    -    private function insertText($data) {
    -        if ($data === '') return;
    -        if ($this->ignore_lf_token) {
    -            if ($data[0] === "\n") {
    -                $data = substr($data, 1);
    -                if ($data === false) return;
    -            }
    -        }
    -        $text = $this->dom->createTextNode($data);
    -        $this->appendToRealParent($text);
    -    }
    -
    -    private function insertComment($data) {
    -        $comment = $this->dom->createComment($data);
    -        $this->appendToRealParent($comment);
    -    }
    -
    -    private function appendToRealParent($node) {
    -        // this is only for the foster_parent case
    -        /* If the current node is a table, tbody, tfoot, thead, or tr
    -        element, then, whenever a node would be inserted into the current
    -        node, it must instead be inserted into the foster parent element. */
    -        if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
    -        array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
    -            end($this->stack)->appendChild($node);
    -        } else {
    -            $this->fosterParent($node);
    -        }
    -    }
    -
    -    private function elementInScope($el, $scope = self::SCOPE) {
    -        if(is_array($el)) {
    -            foreach($el as $element) {
    -                if($this->elementInScope($element, $scope)) {
    -                    return true;
    -                }
    -            }
    -
    -            return false;
    -        }
    -
    -        $leng = count($this->stack);
    -
    -        for($n = 0; $n < $leng; $n++) {
    -            /* 1. Initialise node to be the current node (the bottommost node of
    -            the stack). */
    -            $node = $this->stack[$leng - 1 - $n];
    -
    -            if($node->tagName === $el) {
    -                /* 2. If node is the target node, terminate in a match state. */
    -                return true;
    -
    -                // We've expanded the logic for these states a little differently;
    -                // Hixie's refactoring into "specific scope" is more general, but
    -                // this "gets the job done"
    -
    -            // these are the common states for all scopes
    -            } elseif($node->tagName === 'table' || $node->tagName === 'html') {
    -                return false;
    -
    -            // these are valid for "in scope" and "in list item scope"
    -            } elseif($scope !== self::SCOPE_TABLE &&
    -            (in_array($node->tagName, array('applet', 'caption', 'td',
    -                'th', 'button', 'marquee', 'object')) ||
    -                $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
    -                return false;
    -
    -
    -            // these are valid for "in list item scope"
    -            } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
    -                return false;
    -            }
    -
    -            /* Otherwise, set node to the previous entry in the stack of open
    -            elements and return to step 2. (This will never fail, since the loop
    -            will always terminate in the previous step if the top of the stack
    -            is reached.) */
    -        }
    -    }
    -
    -    private function reconstructActiveFormattingElements() {
    -        /* 1. If there are no entries in the list of active formatting elements,
    -        then there is nothing to reconstruct; stop this algorithm. */
    -        $formatting_elements = count($this->a_formatting);
    -
    -        if($formatting_elements === 0) {
    -            return false;
    -        }
    -
    -        /* 3. Let entry be the last (most recently added) element in the list
    -        of active formatting elements. */
    -        $entry = end($this->a_formatting);
    -
    -        /* 2. If the last (most recently added) entry in the list of active
    -        formatting elements is a marker, or if it is an element that is in the
    -        stack of open elements, then there is nothing to reconstruct; stop this
    -        algorithm. */
    -        if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
    -            return false;
    -        }
    -
    -        for($a = $formatting_elements - 1; $a >= 0; true) {
    -            /* 4. If there are no entries before entry in the list of active
    -            formatting elements, then jump to step 8. */
    -            if($a === 0) {
    -                $step_seven = false;
    -                break;
    -            }
    -
    -            /* 5. Let entry be the entry one earlier than entry in the list of
    -            active formatting elements. */
    -            $a--;
    -            $entry = $this->a_formatting[$a];
    -
    -            /* 6. If entry is neither a marker nor an element that is also in
    -            thetack of open elements, go to step 4. */
    -            if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
    -                break;
    -            }
    -        }
    -
    -        while(true) {
    -            /* 7. Let entry be the element one later than entry in the list of
    -            active formatting elements. */
    -            if(isset($step_seven) && $step_seven === true) {
    -                $a++;
    -                $entry = $this->a_formatting[$a];
    -            }
    -
    -            /* 8. Perform a shallow clone of the element entry to obtain clone. */
    -            $clone = $entry->cloneNode();
    -
    -            /* 9. Append clone to the current node and push it onto the stack
    -            of open elements  so that it is the new current node. */
    -            $this->appendToRealParent($clone);
    -            $this->stack[] = $clone;
    -
    -            /* 10. Replace the entry for entry in the list with an entry for
    -            clone. */
    -            $this->a_formatting[$a] = $clone;
    -
    -            /* 11. If the entry for clone in the list of active formatting
    -            elements is not the last entry in the list, return to step 7. */
    -            if(end($this->a_formatting) !== $clone) {
    -                $step_seven = true;
    -            } else {
    -                break;
    -            }
    -        }
    -    }
    -
    -    private function clearTheActiveFormattingElementsUpToTheLastMarker() {
    -        /* When the steps below require the UA to clear the list of active
    -        formatting elements up to the last marker, the UA must perform the
    -        following steps: */
    -
    -        while(true) {
    -            /* 1. Let entry be the last (most recently added) entry in the list
    -            of active formatting elements. */
    -            $entry = end($this->a_formatting);
    -
    -            /* 2. Remove entry from the list of active formatting elements. */
    -            array_pop($this->a_formatting);
    -
    -            /* 3. If entry was a marker, then stop the algorithm at this point.
    -            The list has been cleared up to the last marker. */
    -            if($entry === self::MARKER) {
    -                break;
    -            }
    -        }
    -    }
    -
    -    private function generateImpliedEndTags($exclude = array()) {
    -        /* When the steps below require the UA to generate implied end tags, 
    -         * then, while the current node is a dc element, a dd element, a ds 
    -         * element, a dt element, an li element, an option element, an optgroup 
    -         * element, a p element, an rp element, or an rt element, the UA must 
    -         * pop the current node off the stack of open elements. */
    -        $node = end($this->stack);
    -        $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
    -
    -        while(in_array(end($this->stack)->tagName, $elements)) {
    -            array_pop($this->stack);
    -        }
    -    }
    -
    -    private function getElementCategory($node) {
    -        if (!is_object($node)) debug_print_backtrace();
    -        $name = $node->tagName;
    -        if(in_array($name, $this->special))
    -            return self::SPECIAL;
    -
    -        elseif(in_array($name, $this->scoping))
    -            return self::SCOPING;
    -
    -        elseif(in_array($name, $this->formatting))
    -            return self::FORMATTING;
    -
    -        else
    -            return self::PHRASING;
    -    }
    -
    -    private function clearStackToTableContext($elements) {
    -        /* When the steps above require the UA to clear the stack back to a
    -        table context, it means that the UA must, while the current node is not
    -        a table element or an html element, pop elements from the stack of open
    -        elements. */
    -        while(true) {
    -            $name = end($this->stack)->tagName;
    -
    -            if(in_array($name, $elements)) {
    -                break;
    -            } else {
    -                array_pop($this->stack);
    -            }
    -        }
    -    }
    -
    -    private function resetInsertionMode($context = null) {
    -        /* 1. Let last be false. */
    -        $last = false;
    -        $leng = count($this->stack);
    -
    -        for($n = $leng - 1; $n >= 0; $n--) {
    -            /* 2. Let node be the last node in the stack of open elements. */
    -            $node = $this->stack[$n];
    -
    -            /* 3. If node is the first node in the stack of open elements, then 
    -             * set last to true and set node to the context  element. (fragment 
    -             * case) */
    -            if($this->stack[0]->isSameNode($node)) {
    -                $last = true;
    -                $node = $context;
    -            }
    -
    -            /* 4. If node is a select element, then switch the insertion mode to
    -            "in select" and abort these steps. (fragment case) */
    -            if($node->tagName === 'select') {
    -                $this->mode = self::IN_SELECT;
    -                break;
    -
    -            /* 5. If node is a td or th element, then switch the insertion mode
    -            to "in cell" and abort these steps. */
    -            } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
    -                $this->mode = self::IN_CELL;
    -                break;
    -
    -            /* 6. If node is a tr element, then switch the insertion mode to
    -            "in    row" and abort these steps. */
    -            } elseif($node->tagName === 'tr') {
    -                $this->mode = self::IN_ROW;
    -                break;
    -
    -            /* 7. If node is a tbody, thead, or tfoot element, then switch the
    -            insertion mode to "in table body" and abort these steps. */
    -            } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
    -                $this->mode = self::IN_TABLE_BODY;
    -                break;
    -
    -            /* 8. If node is a caption element, then switch the insertion mode
    -            to "in caption" and abort these steps. */
    -            } elseif($node->tagName === 'caption') {
    -                $this->mode = self::IN_CAPTION;
    -                break;
    -
    -            /* 9. If node is a colgroup element, then switch the insertion mode
    -            to "in column group" and abort these steps. (innerHTML case) */
    -            } elseif($node->tagName === 'colgroup') {
    -                $this->mode = self::IN_COLUMN_GROUP;
    -                break;
    -
    -            /* 10. If node is a table element, then switch the insertion mode
    -            to "in table" and abort these steps. */
    -            } elseif($node->tagName === 'table') {
    -                $this->mode = self::IN_TABLE;
    -                break;
    -
    -            /* 11. If node is an element from the MathML namespace or the SVG 
    -             * namespace, then switch the insertion mode to "in foreign 
    -             * content", let the secondary insertion mode be "in body", and 
    -             * abort these steps. */
    -            } elseif($node->namespaceURI === self::NS_SVG ||
    -            $node->namespaceURI === self::NS_MATHML) {
    -                $this->mode = self::IN_FOREIGN_CONTENT;
    -                $this->secondary_mode = self::IN_BODY;
    -                break;
    -
    -            /* 12. If node is a head element, then switch the insertion mode
    -            to "in body" ("in body"! not "in head"!) and abort these steps.
    -            (fragment case) */
    -            } elseif($node->tagName === 'head') {
    -                $this->mode = self::IN_BODY;
    -                break;
    -
    -            /* 13. If node is a body element, then switch the insertion mode to
    -            "in body" and abort these steps. */
    -            } elseif($node->tagName === 'body') {
    -                $this->mode = self::IN_BODY;
    -                break;
    -
    -            /* 14. If node is a frameset element, then switch the insertion
    -            mode to "in frameset" and abort these steps. (fragment case) */
    -            } elseif($node->tagName === 'frameset') {
    -                $this->mode = self::IN_FRAMESET;
    -                break;
    -
    -            /* 15. If node is an html element, then: if the head element
    -            pointer is null, switch the insertion mode to "before head",
    -            otherwise, switch the insertion mode to "after head". In either
    -            case, abort these steps. (fragment case) */
    -            } elseif($node->tagName === 'html') {
    -                $this->mode = ($this->head_pointer === null)
    -                    ? self::BEFORE_HEAD
    -                    : self::AFTER_HEAD;
    -
    -                break;
    -
    -            /* 16. If last is true, then set the insertion mode to "in body"
    -            and    abort these steps. (fragment case) */
    -            } elseif($last) {
    -                $this->mode = self::IN_BODY;
    -                break;
    -            }
    -        }
    -    }
    -
    -    private function closeCell() {
    -        /* If the stack of open elements has a td or th element in table scope,
    -        then act as if an end tag token with that tag name had been seen. */
    -        foreach(array('td', 'th') as $cell) {
    -            if($this->elementInScope($cell, self::SCOPE_TABLE)) {
    -                $this->emitToken(array(
    -                    'name' => $cell,
    -                    'type' => HTML5_Tokenizer::ENDTAG
    -                ));
    -
    -                break;
    -            }
    -        }
    -    }
    -
    -    private function processWithRulesFor($token, $mode) {
    -        /* "using the rules for the m insertion mode", where m is one of these
    -         * modes, the user agent must use the rules described under the m
    -         * insertion mode's section, but must leave the insertion mode
    -         * unchanged unless the rules in m themselves switch the insertion mode
    -         * to a new value. */
    -        return $this->emitToken($token, $mode);
    -    }
    -
    -    private function insertCDATAElement($token) {
    -        $this->insertElement($token);
    -        $this->original_mode = $this->mode;
    -        $this->mode = self::IN_CDATA_RCDATA;
    -        $this->content_model = HTML5_Tokenizer::CDATA;
    -    }
    -
    -    private function insertRCDATAElement($token) {
    -        $this->insertElement($token);
    -        $this->original_mode = $this->mode;
    -        $this->mode = self::IN_CDATA_RCDATA;
    -        $this->content_model = HTML5_Tokenizer::RCDATA;
    -    }
    -
    -    private function getAttr($token, $key) {
    -        if (!isset($token['attr'])) return false;
    -        $ret = false;
    -        foreach ($token['attr'] as $keypair) {
    -            if ($keypair['name'] === $key) $ret = $keypair['value'];
    -        }
    -        return $ret;
    -    }
    -
    -    private function getCurrentTable() {
    -        /* The current table is the last table  element in the stack of open 
    -         * elements, if there is one. If there is no table element in the stack 
    -         * of open elements (fragment case), then the current table is the 
    -         * first element in the stack of open elements (the html element). */
    -        for ($i = count($this->stack) - 1; $i >= 0; $i--) {
    -            if ($this->stack[$i]->tagName === 'table') {
    -                return $this->stack[$i];
    -            }
    -        }
    -        return $this->stack[0];
    -    }
    -
    -    private function getFosterParent() {
    -        /* The foster parent element is the parent element of the last
    -        table element in the stack of open elements, if there is a
    -        table element and it has such a parent element. If there is no
    -        table element in the stack of open elements (innerHTML case),
    -        then the foster parent element is the first element in the
    -        stack of open elements (the html  element). Otherwise, if there
    -        is a table element in the stack of open elements, but the last
    -        table element in the stack of open elements has no parent, or
    -        its parent node is not an element, then the foster parent
    -        element is the element before the last table element in the
    -        stack of open elements. */
    -        for($n = count($this->stack) - 1; $n >= 0; $n--) {
    -            if($this->stack[$n]->tagName === 'table') {
    -                $table = $this->stack[$n];
    -                break;
    -            }
    -        }
    -
    -        if(isset($table) && $table->parentNode !== null) {
    -            return $table->parentNode;
    -
    -        } elseif(!isset($table)) {
    -            return $this->stack[0];
    -
    -        } elseif(isset($table) && ($table->parentNode === null ||
    -        $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
    -            return $this->stack[$n - 1];
    -        }
    -    }
    -
    -    public function fosterParent($node) {
    -        $foster_parent = $this->getFosterParent();
    -        $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
    -        /* When a node node is to be foster parented, the node node must be
    -         * be inserted into the foster parent element. */
    -        /* If the foster parent element is the parent element of the last table 
    -         * element in the stack of open elements, then node must be inserted 
    -         * immediately before the last table element in the stack of open 
    -         * elements in the foster parent element; otherwise, node must be 
    -         * appended to the foster parent element. */
    -        if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
    -            $foster_parent->insertBefore($node, $table);
    -        } else {
    -            $foster_parent->appendChild($node);
    -        }
    -    }
    -
    -    /**
    -     * For debugging, prints the stack
    -     */
    -    private function printStack() {
    -        $names = array();
    -        foreach ($this->stack as $i => $element) {
    -            $names[] = $element->tagName;
    -        }
    -        echo "  -> stack [" . implode(', ', $names) . "]\n";
    -    }
    -
    -    /**
    -     * For debugging, prints active formatting elements
    -     */
    -    private function printActiveFormattingElements() {
    -        if (!$this->a_formatting) return;
    -        $names = array();
    -        foreach ($this->a_formatting as $node) {
    -            if ($node === self::MARKER) $names[] = 'MARKER';
    -            else $names[] = $node->tagName;
    -        }
    -        echo "  -> active formatting [" . implode(', ', $names) . "]\n";
    -    }
    -
    -    public function currentTableIsTainted() {
    -        return !empty($this->getCurrentTable()->tainted);
    -    }
    -
    -    /**
    -     * Sets up the tree constructor for building a fragment.
    -     */
    -    public function setupContext($context = null) {
    -        $this->fragment = true;
    -        if ($context) {
    -            $context = $this->dom->createElementNS(self::NS_HTML, $context);
    -            /* 4.1. Set the HTML parser's tokenization  stage's content model
    -             * flag according to the context element, as follows: */
    -            switch ($context->tagName) {
    -            case 'title': case 'textarea':
    -                $this->content_model = HTML5_Tokenizer::RCDATA;
    -                break;
    -            case 'style': case 'script': case 'xmp': case 'iframe':
    -            case 'noembed': case 'noframes':
    -                $this->content_model = HTML5_Tokenizer::CDATA;
    -                break;
    -            case 'noscript':
    -                // XSCRIPT: assuming scripting is enabled
    -                $this->content_model = HTML5_Tokenizer::CDATA;
    -                break;
    -            case 'plaintext':
    -                $this->content_model = HTML5_Tokenizer::PLAINTEXT;
    -                break;
    -            }
    -            /* 4.2. Let root be a new html element with no attributes. */
    -            $root = $this->dom->createElementNS(self::NS_HTML, 'html');
    -            $this->root = $root;
    -            /* 4.3 Append the element root to the Document node created above. */
    -            $this->dom->appendChild($root);
    -            /* 4.4 Set up the parser's stack of open elements so that it 
    -             * contains just the single element root. */
    -            $this->stack = array($root);
    -            /* 4.5 Reset the parser's insertion mode appropriately. */
    -            $this->resetInsertionMode($context);
    -            /* 4.6 Set the parser's form element pointer  to the nearest node 
    -             * to the context element that is a form element (going straight up 
    -             * the ancestor chain, and including the element itself, if it is a 
    -             * form element), or, if there is no such form element, to null. */
    -            $node = $context;
    -            do {
    -                if ($node->tagName === 'form') {
    -                    $this->form_pointer = $node;
    -                    break;
    -                }
    -            } while ($node = $node->parentNode);
    -        }
    -    }
    -
    -    public function adjustMathMLAttributes($token) {
    -        foreach ($token['attr'] as &$kp) {
    -            if ($kp['name'] === 'definitionurl') {
    -                $kp['name'] = 'definitionURL';
    -            }
    -        }
    -        return $token;
    -    }
    -
    -    public function adjustSVGAttributes($token) {
    -        static $lookup = array(
    -            'attributename' => 'attributeName',
    -            'attributetype' => 'attributeType',
    -            'basefrequency' => 'baseFrequency',
    -            'baseprofile' => 'baseProfile',
    -            'calcmode' => 'calcMode',
    -            'clippathunits' => 'clipPathUnits',
    -            'contentscripttype' => 'contentScriptType',
    -            'contentstyletype' => 'contentStyleType',
    -            'diffuseconstant' => 'diffuseConstant',
    -            'edgemode' => 'edgeMode',
    -            'externalresourcesrequired' => 'externalResourcesRequired',
    -            'filterres' => 'filterRes',
    -            'filterunits' => 'filterUnits',
    -            'glyphref' => 'glyphRef',
    -            'gradienttransform' => 'gradientTransform',
    -            'gradientunits' => 'gradientUnits',
    -            'kernelmatrix' => 'kernelMatrix',
    -            'kernelunitlength' => 'kernelUnitLength',
    -            'keypoints' => 'keyPoints',
    -            'keysplines' => 'keySplines',
    -            'keytimes' => 'keyTimes',
    -            'lengthadjust' => 'lengthAdjust',
    -            'limitingconeangle' => 'limitingConeAngle',
    -            'markerheight' => 'markerHeight',
    -            'markerunits' => 'markerUnits',
    -            'markerwidth' => 'markerWidth',
    -            'maskcontentunits' => 'maskContentUnits',
    -            'maskunits' => 'maskUnits',
    -            'numoctaves' => 'numOctaves',
    -            'pathlength' => 'pathLength',
    -            'patterncontentunits' => 'patternContentUnits',
    -            'patterntransform' => 'patternTransform',
    -            'patternunits' => 'patternUnits',
    -            'pointsatx' => 'pointsAtX',
    -            'pointsaty' => 'pointsAtY',
    -            'pointsatz' => 'pointsAtZ',
    -            'preservealpha' => 'preserveAlpha',
    -            'preserveaspectratio' => 'preserveAspectRatio',
    -            'primitiveunits' => 'primitiveUnits',
    -            'refx' => 'refX',
    -            'refy' => 'refY',
    -            'repeatcount' => 'repeatCount',
    -            'repeatdur' => 'repeatDur',
    -            'requiredextensions' => 'requiredExtensions',
    -            'requiredfeatures' => 'requiredFeatures',
    -            'specularconstant' => 'specularConstant',
    -            'specularexponent' => 'specularExponent',
    -            'spreadmethod' => 'spreadMethod',
    -            'startoffset' => 'startOffset',
    -            'stddeviation' => 'stdDeviation',
    -            'stitchtiles' => 'stitchTiles',
    -            'surfacescale' => 'surfaceScale',
    -            'systemlanguage' => 'systemLanguage',
    -            'tablevalues' => 'tableValues',
    -            'targetx' => 'targetX',
    -            'targety' => 'targetY',
    -            'textlength' => 'textLength',
    -            'viewbox' => 'viewBox',
    -            'viewtarget' => 'viewTarget',
    -            'xchannelselector' => 'xChannelSelector',
    -            'ychannelselector' => 'yChannelSelector',
    -            'zoomandpan' => 'zoomAndPan',
    -        );
    -        foreach ($token['attr'] as &$kp) {
    -            if (isset($lookup[$kp['name']])) {
    -                $kp['name'] = $lookup[$kp['name']];
    -            }
    -        }
    -        return $token;
    -    }
    -
    -    public function adjustForeignAttributes($token) {
    -        static $lookup = array(
    -            'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
    -            'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
    -            'xlink:href' => array('xlink', 'href', self::NS_XLINK),
    -            'xlink:role' => array('xlink', 'role', self::NS_XLINK),
    -            'xlink:show' => array('xlink', 'show', self::NS_XLINK),
    -            'xlink:title' => array('xlink', 'title', self::NS_XLINK),
    -            'xlink:type' => array('xlink', 'type', self::NS_XLINK),
    -            'xml:base' => array('xml', 'base', self::NS_XML),
    -            'xml:lang' => array('xml', 'lang', self::NS_XML),
    -            'xml:space' => array('xml', 'space', self::NS_XML),
    -            'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
    -            'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
    -        );
    -        foreach ($token['attr'] as &$kp) {
    -            if (isset($lookup[$kp['name']])) {
    -                $kp['name'] = $lookup[$kp['name']];
    -            }
    -        }
    -        return $token;
    -    }
    -
    -    public function insertForeignElement($token, $namespaceURI) {
    -        $el = $this->dom->createElementNS($namespaceURI, $token['name']);
    -        if (!empty($token['attr'])) {
    -            foreach ($token['attr'] as $kp) {
    -                $attr = $kp['name'];
    -                if (is_array($attr)) {
    -                    $ns = $attr[2];
    -                    $attr = $attr[1];
    -                } else {
    -                    $ns = self::NS_HTML;
    -                }
    -                if (!$el->hasAttributeNS($ns, $attr)) {
    -                    // XSKETCHY: work around godawful libxml bug
    -                    if ($ns === self::NS_XLINK) {
    -                        $el->setAttribute('xlink:'.$attr, $kp['value']);
    -                    } elseif ($ns === self::NS_HTML) {
    -                        // Another godawful libxml bug
    -                        $el->setAttribute($attr, $kp['value']);
    -                    } else {
    -                        $el->setAttributeNS($ns, $attr, $kp['value']);
    -                    }
    -                }
    -            }
    -        }
    -        $this->appendToRealParent($el);
    -        $this->stack[] = $el;
    -        // XERROR: see below
    -        /* If the newly created element has an xmlns attribute in the XMLNS 
    -         * namespace  whose value is not exactly the same as the element's 
    -         * namespace, that is a parse error. Similarly, if the newly created 
    -         * element has an xmlns:xlink attribute in the XMLNS namespace whose 
    -         * value is not the XLink Namespace, that is a parse error. */
    -    }
    -
    -    public function save() {
    -        $this->dom->normalize();
    -        if (!$this->fragment) {
    -            return $this->dom;
    -        } else {
    -            if ($this->root) {
    -                return $this->root->childNodes;
    -            } else {
    -                return $this->dom->childNodes;
    -            }
    -        }
    -    }
    -}
    -
    diff --git a/libraries/html5/named-character-references.ser b/libraries/html5/named-character-references.ser
    deleted file mode 100644
    index e3ae050..0000000
    --- a/libraries/html5/named-character-references.ser
    +++ /dev/null
    @@ -1 +0,0 @@
    -a:52:{s:1:"A";a:16:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:198;}s:9:"codepoint";i:198;}}}}s:1:"M";a:1:{s:1:"P";a:2:{s:1:";";a:1:{s:9:"codepoint";i:38;}s:9:"codepoint";i:38;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:193;}s:9:"codepoint";i:193;}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:258;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:194;}s:9:"codepoint";i:194;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1040;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120068;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:192;}s:9:"codepoint";i:192;}}}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:913;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:256;}}}}}s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10835;}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:260;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120120;}}}}s:1:"p";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"F";a:1:{s:1:"u";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8289;}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:197;}s:9:"codepoint";i:197;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119964;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8788;}}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:195;}s:9:"codepoint";i:195;}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:196;}s:9:"codepoint";i:196;}}}}s:1:"B";a:8:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}}}}s:1:"r";a:2:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10983;}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8966;}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1041;}}}s:1:"e";a:3:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8757;}}}}}}s:1:"r";a:1:{s:1:"n";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:914;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120069;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120121;}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:728;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8782;}}}}}}}s:1:"C";a:14:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1063;}}}}s:1:"O";a:1:{s:1:"P";a:1:{s:1:"Y";a:2:{s:1:";";a:1:{s:9:"codepoint";i:169;}s:9:"codepoint";i:169;}}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:262;}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8914;}s:1:"i";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:"i";a:1:{s:1:"f";a:1:{s:1:"f";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8517;}}}}}}}}}}}}}}}}}}}s:1:"y";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"y";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8493;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:268;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:199;}s:9:"codepoint";i:199;}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:264;}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8752;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:266;}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:184;}}}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:183;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8493;}}}s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:935;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:"e";a:4:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8857;}}}}s:1:"M";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8854;}}}}}}s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8853;}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8855;}}}}}}}}}}}s:1:"l";a:1:{s:1:"o";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"w";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8754;}}}}}}}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8221;}}}}}}}}}}}}s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8217;}}}}}}}}}}}}}}}s:1:"o";a:4:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8759;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10868;}}}}}s:1:"n";a:3:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8801;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8751;}}}}s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}}}}}}}}}}}s:1:"p";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8450;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8720;}}}}}}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"C";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"w";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8755;}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10799;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119966;}}}}s:1:"u";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8915;}s:1:"C";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8781;}}}}}}}s:1:"D";a:11:{s:1:"D";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8517;}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"h";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10513;}}}}}}}}s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1026;}}}}s:1:"S";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1029;}}}}s:1:"Z";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1039;}}}}s:1:"a";a:3:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8225;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8609;}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10980;}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:270;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1044;}}}s:1:"e";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8711;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:916;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120071;}}}s:1:"i";a:2:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:180;}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:729;}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:733;}}}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:96;}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:732;}}}}}}}}}}}}}}s:1:"m";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8900;}}}}}}s:1:"f";a:1:{s:1:"f";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8518;}}}}}}}}}}}}}s:1:"o";a:4:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120123;}}}s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:168;}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8412;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8784;}}}}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:6:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8751;}}}}}}}}}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:168;}}s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}}}}}}}s:1:"L";a:2:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10980;}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8872;}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:6:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8595;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10515;}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8693;}}}}}}}}}}}}}s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:785;}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:3:{s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10576;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10590;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8637;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10582;}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10591;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8641;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10583;}}}}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8868;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8615;}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119967;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:272;}}}}}}}s:1:"E";a:16:{s:1:"N";a:1:{s:1:"G";a:1:{s:1:";";a:1:{s:9:"codepoint";i:330;}}}s:1:"T";a:1:{s:1:"H";a:2:{s:1:";";a:1:{s:9:"codepoint";i:208;}s:9:"codepoint";i:208;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:201;}s:9:"codepoint";i:201;}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:282;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:202;}s:9:"codepoint";i:202;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1069;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:278;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120072;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:200;}s:9:"codepoint";i:200;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8712;}}}}}}}s:1:"m";a:2:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:274;}}}}s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:2:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9723;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9643;}}}}}}}}}}}}}}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:280;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120124;}}}}s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:917;}}}}}}}s:1:"q";a:1:{s:1:"u";a:2:{s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10869;}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}}}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8496;}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10867;}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:919;}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:203;}s:9:"codepoint";i:203;}}}s:1:"x";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8707;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}}}}}}}}}}}}s:1:"F";a:5:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1060;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120073;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9724;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"S";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"S";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}}}}}}}}}}}}}}}}}}s:1:"o";a:3:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120125;}}}s:1:"r";a:1:{s:1:"A";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8704;}}}}}s:1:"u";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8497;}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8497;}}}}}s:1:"G";a:12:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1027;}}}}s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:62;}s:9:"codepoint";i:62;}s:1:"a";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:915;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:988;}}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:286;}}}}}}s:1:"c";a:3:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:290;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:284;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1043;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:288;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120074;}}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8921;}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120126;}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:6:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}}}}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8807;}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10914;}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8823;}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10878;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8819;}}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119970;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8811;}}}s:1:"H";a:8:{s:1:"A";a:1:{s:1:"R";a:1:{s:1:"D";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1066;}}}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:711;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:94;}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:292;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8460;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"b";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}}}}}}}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8461;}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"z";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9472;}}}}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:294;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"H";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8782;}}}}}}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8783;}}}}}}}}}}s:1:"I";a:14:{s:1:"E";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1045;}}}}s:1:"J";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:306;}}}}}s:1:"O";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1025;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:205;}s:9:"codepoint";i:205;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:206;}s:9:"codepoint";i:206;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1048;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:304;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:204;}s:9:"codepoint";i:204;}}}}}s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8465;}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:298;}}}s:1:"g";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"I";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8520;}}}}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}}s:1:"n";a:2:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8748;}s:1:"e";a:2:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8747;}}}}}s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}}}}}}}}}s:1:"v";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"C";a:1:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8291;}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8290;}}}}}}}}}}}}}}s:1:"o";a:3:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:302;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120128;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:921;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8464;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:296;}}}}}}s:1:"u";a:2:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1030;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:207;}s:9:"codepoint";i:207;}}}}s:1:"J";a:5:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:308;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1049;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120077;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120129;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119973;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1032;}}}}}}s:1:"u";a:1:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1028;}}}}}}s:1:"K";a:7:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1061;}}}}s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1036;}}}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:922;}}}}}s:1:"c";a:2:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:310;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1050;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120078;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120130;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119974;}}}}}s:1:"L";a:11:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1033;}}}}s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:60;}s:9:"codepoint";i:60;}s:1:"a";a:5:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:313;}}}}}s:1:"m";a:1:{s:1:"b";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:923;}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10218;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8606;}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:317;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:315;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1051;}}}s:1:"e";a:2:{s:1:"f";a:1:{s:1:"t";a:10:{s:1:"A";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10216;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8676;}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}}}}}}}}}}}}}s:1:"C";a:1:{s:1:"e";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8968;}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10214;}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10593;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8643;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10585;}}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8970;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8596;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10574;}}}}}}}}}}}}s:1:"T";a:2:{s:1:"e";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8867;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8612;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10586;}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8882;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10703;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:3:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10577;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10592;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8639;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10584;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8636;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10578;}}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"s";a:6:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8806;}}}}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8822;}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10913;}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10877;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8818;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120079;}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8920;}s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8666;}}}}}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:319;}}}}}}s:1:"o";a:3:{s:1:"n";a:1:{s:1:"g";a:4:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120131;}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8601;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8600;}}}}}}}}}}}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8624;}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:321;}}}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8810;}}}s:1:"M";a:8:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10501;}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1052;}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8287;}}}}}}}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120080;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120132;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:924;}}}s:1:"N";a:9:{s:1:"J";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1034;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:323;}}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:327;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:325;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1053;}}}s:1:"e";a:3:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"v";a:1:{s:1:"e";a:3:{s:1:"M";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:2:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"y";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8811;}}}}}}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8810;}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120081;}}}s:1:"o";a:4:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8288;}}}}}}s:1:"n";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"k";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:160;}}}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8469;}}}s:1:"t";a:11:{s:1:";";a:1:{s:9:"codepoint";i:10988;}s:1:"C";a:2:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8802;}}}}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"C";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8813;}}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}}}}}}}}}}}s:1:"E";a:3:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8713;}}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8800;}}}}}s:1:"x";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8708;}}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8815;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8817;}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8825;}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8821;}}}}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:2:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"T";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8814;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8816;}}}}}}s:1:"G";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8824;}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8820;}}}}}}}}}}s:1:"P";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8832;}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8928;}}}}}}}}}}}}}}}}}}}s:1:"R";a:2:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8716;}}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"T";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}}}}}}}}}}}}}}s:1:"S";a:2:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"S";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8930;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8931;}}}}}}}}}}}}}}}}}}}s:1:"u";a:3:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}}}}}}}}}s:1:"c";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8833;}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8929;}}}}}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}}}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8769;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8772;}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8775;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8777;}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119977;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:209;}s:9:"codepoint";i:209;}}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:925;}}}s:1:"O";a:14:{s:1:"E";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:338;}}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:211;}s:9:"codepoint";i:211;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:212;}s:9:"codepoint";i:212;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1054;}}}s:1:"d";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:336;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120082;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:210;}s:9:"codepoint";i:210;}}}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:332;}}}}s:1:"e";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:937;}}}}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:927;}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120134;}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"C";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8220;}}}}}}}}}}}}s:1:"Q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8216;}}}}}}}}}}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10836;}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119978;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:216;}s:9:"codepoint";i:216;}}}}}s:1:"t";a:1:{s:1:"i";a:2:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:213;}s:9:"codepoint";i:213;}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10807;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:214;}s:9:"codepoint";i:214;}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"B";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:175;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9182;}}s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9140;}}}}}}}}s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9180;}}}}}}}}}}}}}}}}s:1:"P";a:9:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8706;}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1055;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120083;}}}s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:934;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:928;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"M";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:177;}}}}}}}}}s:1:"o";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8460;}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8473;}}}}s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10939;}s:1:"e";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10927;}}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}}}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8243;}}}}s:1:"o";a:2:{s:1:"d";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8719;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8759;}s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119979;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:936;}}}}s:1:"Q";a:4:{s:1:"U";a:1:{s:1:"O";a:1:{s:1:"T";a:2:{s:1:";";a:1:{s:9:"codepoint";i:34;}s:9:"codepoint";i:34;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120084;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8474;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119980;}}}}}s:1:"R";a:12:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10512;}}}}}s:1:"E";a:1:{s:1:"G";a:2:{s:1:";";a:1:{s:9:"codepoint";i:174;}s:9:"codepoint";i:174;}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:340;}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10219;}}}s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8608;}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10518;}}}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:344;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:342;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1056;}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8476;}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:2:{s:1:"E";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8651;}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10607;}}}}}}}}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8476;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:929;}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:8:{s:1:"A";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10217;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8677;}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}}}}}}}}}}}}s:1:"C";a:1:{s:1:"e";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8969;}}}}}}}}s:1:"D";a:1:{s:1:"o";a:2:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"B";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10215;}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:2:{s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10589;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8642;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10581;}}}}}}}}}}}}}}s:1:"F";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8971;}}}}}}s:1:"T";a:2:{s:1:"e";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8866;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8614;}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10587;}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8883;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10704;}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:3:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10575;}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10588;}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8638;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10580;}}}}}}}}}}}}s:1:"V";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8640;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10579;}}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}}}}}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8477;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:"I";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10608;}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8667;}}}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8475;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8625;}}}s:1:"u";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"D";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10740;}}}}}}}}}}}}s:1:"S";a:13:{s:1:"H";a:2:{s:1:"C";a:1:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1065;}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1064;}}}}s:1:"O";a:1:{s:1:"F";a:1:{s:1:"T";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1068;}}}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:346;}}}}}}s:1:"c";a:5:{s:1:";";a:1:{s:9:"codepoint";i:10940;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:352;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:350;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:348;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1057;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120086;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:4:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}}}}}}}}s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8592;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8594;}}}}}}}}}}}s:1:"U";a:1:{s:1:"p";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}}}}}}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:931;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"C";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8728;}}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120138;}}}}s:1:"q";a:2:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8730;}}}s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9633;}s:1:"I";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8851;}}}}}}}}}}}}}s:1:"S";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}}}}}}}}}}}}}s:1:"U";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8852;}}}}}}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119982;}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8902;}}}}s:1:"u";a:4:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8912;}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8912;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8838;}}}}}}}}}}s:1:"c";a:2:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10928;}}}}}}s:1:"S";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}}}}}}}}s:1:"h";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8721;}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8913;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8839;}}}}}}}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8913;}}}}}}}s:1:"T";a:11:{s:1:"H";a:1:{s:1:"O";a:1:{s:1:"R";a:1:{s:1:"N";a:2:{s:1:";";a:1:{s:9:"codepoint";i:222;}s:9:"codepoint";i:222;}}}}s:1:"R";a:1:{s:1:"A";a:1:{s:1:"D";a:1:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8482;}}}}}s:1:"S";a:2:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1035;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1062;}}}}s:1:"a";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:932;}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:356;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:354;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1058;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120087;}}}s:1:"h";a:2:{s:1:"e";a:2:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:920;}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8201;}}}}}}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8764;}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8771;}}}}}}s:1:"F";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8773;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120139;}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8411;}}}}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119983;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:358;}}}}}}}s:1:"U";a:14:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:218;}s:9:"codepoint";i:218;}}}}s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8607;}s:1:"o";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10569;}}}}}}}}s:1:"b";a:1:{s:1:"r";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1038;}}}s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:364;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:219;}s:9:"codepoint";i:219;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1059;}}}s:1:"d";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:368;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120088;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:217;}s:9:"codepoint";i:217;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:362;}}}}}s:1:"n";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"B";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:818;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9183;}}s:1:"k";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9141;}}}}}}}}s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9181;}}}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8899;}s:1:"P";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8846;}}}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:370;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120140;}}}}s:1:"p";a:8:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8593;}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10514;}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8645;}}}}}}}}}}}}}}}s:1:"D";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8597;}}}}}}}}}}s:1:"E";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10606;}}}}}}}}}}}}s:1:"T";a:1:{s:1:"e";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8869;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8613;}}}}}}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"r";a:2:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8598;}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8599;}}}}}}}}}}}}}}s:1:"s";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:978;}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:933;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:366;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119984;}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:360;}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:220;}s:9:"codepoint";i:220;}}}}s:1:"V";a:9:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8875;}}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10987;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1042;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8873;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10982;}}}}}}s:1:"e";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}s:1:"r";a:3:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8214;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8214;}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}}}s:1:"L";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}}s:1:"S";a:1:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10072;}}}}}}}}}}s:1:"T";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8768;}}}}}}}}}}}s:1:"y";a:1:{s:1:"T";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8202;}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120089;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120141;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119985;}}}}s:1:"v";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8874;}}}}}}}s:1:"W";a:5:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:372;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120090;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120142;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119986;}}}}}s:1:"X";a:4:{s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120091;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:926;}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120143;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119987;}}}}}s:1:"Y";a:9:{s:1:"A";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1071;}}}}s:1:"I";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1031;}}}}s:1:"U";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1070;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:221;}s:9:"codepoint";i:221;}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:374;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1067;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120092;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120144;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119988;}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:376;}}}}}s:1:"Z";a:8:{s:1:"H";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1046;}}}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:377;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:381;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1047;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:379;}}}}s:1:"e";a:2:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"W";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"S";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8203;}}}}}}}}}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:918;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8488;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8484;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119989;}}}}}s:1:"a";a:16:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:225;}s:9:"codepoint";i:225;}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:259;}}}}}}s:1:"c";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8766;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8767;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:226;}s:9:"codepoint";i:226;}}}s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:180;}s:9:"codepoint";i:180;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1072;}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:230;}s:9:"codepoint";i:230;}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8289;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120094;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:224;}s:9:"codepoint";i:224;}}}}}s:1:"l";a:2:{s:1:"e";a:2:{s:1:"f";a:1:{s:1:"s";a:1:{s:1:"y";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8501;}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8501;}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:945;}}}}}s:1:"m";a:2:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:257;}}}s:1:"l";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10815;}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:38;}s:9:"codepoint";i:38;}}s:1:"n";a:2:{s:1:"d";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8743;}s:1:"a";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10837;}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10844;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10840;}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10842;}}}s:1:"g";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8736;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10660;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8736;}}}s:1:"m";a:1:{s:1:"s";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8737;}s:1:"a";a:8:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10664;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10665;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10666;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10667;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10668;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10669;}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10670;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10671;}}}}}}s:1:"r";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8735;}s:1:"v";a:1:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8894;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10653;}}}}}}s:1:"s";a:2:{s:1:"p";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8738;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8491;}}}s:1:"z";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9084;}}}}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:261;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120146;}}}}s:1:"p";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10864;}}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10863;}}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8778;}}s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8779;}}}s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:39;}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8778;}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:229;}s:9:"codepoint";i:229;}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119990;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:42;}}s:1:"y";a:1:{s:1:"m";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8776;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8781;}}}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:227;}s:9:"codepoint";i:227;}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:228;}s:9:"codepoint";i:228;}}}s:1:"w";a:2:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8755;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10769;}}}}}}s:1:"b";a:16:{s:1:"N";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10989;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:4:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8780;}}}}}s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1014;}}}}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8245;}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8765;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8909;}}}}}}}}s:1:"r";a:2:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8893;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8965;}s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8965;}}}}}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9141;}s:1:"t";a:1:{s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9142;}}}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8780;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1073;}}}s:1:"d";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8222;}}}}}s:1:"e";a:5:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"u";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8757;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8757;}}}}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10672;}}}}}}s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1014;}}}}s:1:"r";a:1:{s:1:"n";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8492;}}}}}s:1:"t";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:946;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8502;}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8812;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120095;}}}s:1:"i";a:1:{s:1:"g";a:7:{s:1:"c";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9711;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8899;}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10752;}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10753;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10754;}}}}}}}s:1:"s";a:2:{s:1:"q";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10758;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9733;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9661;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9651;}}}}}}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10756;}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}}}}s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10509;}}}}}}s:1:"l";a:3:{s:1:"a";a:2:{s:1:"c";a:1:{s:1:"k";a:3:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"z";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10731;}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9652;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9662;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9666;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9656;}}}}}}}}}}}}}}}}s:1:"n";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9251;}}}}s:1:"k";a:2:{i:1;a:2:{i:2;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9618;}}i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9617;}}}i:3;a:1:{i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:9619;}}}}s:1:"o";a:1:{s:1:"c";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9608;}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8976;}}}}s:1:"o";a:4:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120147;}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8869;}s:1:"t";a:1:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8869;}}}}}s:1:"w";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8904;}}}}}s:1:"x";a:12:{s:1:"D";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9559;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9556;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9558;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9555;}}}s:1:"H";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9552;}s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9574;}}s:1:"U";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9577;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9572;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9575;}}}s:1:"U";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9565;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9562;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9564;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9561;}}}s:1:"V";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9553;}s:1:"H";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9580;}}s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9571;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9568;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9579;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9570;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9567;}}}s:1:"b";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10697;}}}}s:1:"d";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9557;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9554;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9488;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9484;}}}s:1:"h";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9472;}s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9573;}}s:1:"U";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9576;}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9516;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9524;}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8863;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8862;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8864;}}}}}}s:1:"u";a:4:{s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9563;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9560;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9496;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9492;}}}s:1:"v";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9474;}s:1:"H";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9578;}}s:1:"L";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9569;}}s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9566;}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9532;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9508;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9500;}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8245;}}}}}}s:1:"r";a:2:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:728;}}}}s:1:"v";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:166;}s:9:"codepoint";i:166;}}}}}s:1:"s";a:4:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119991;}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8271;}}}}s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8765;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8909;}}}}s:1:"o";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:92;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10693;}}}}}s:1:"u";a:2:{s:1:"l";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8226;}s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8226;}}}}}s:1:"m";a:1:{s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8782;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10926;}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8783;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8783;}}}}}}}s:1:"c";a:15:{s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:263;}}}}}s:1:"p";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8745;}s:1:"a";a:1:{s:1:"n";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10820;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10825;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10827;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10823;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10816;}}}}}s:1:"r";a:2:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8257;}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:711;}}}}}s:1:"c";a:4:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10829;}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:269;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:231;}s:9:"codepoint";i:231;}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:265;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10828;}s:1:"s";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10832;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:267;}}}}s:1:"e";a:3:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:184;}s:9:"codepoint";i:184;}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10674;}}}}}}s:1:"n";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:162;}s:9:"codepoint";i:162;s:1:"e";a:1:{s:1:"r";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:183;}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120096;}}}s:1:"h";a:3:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1095;}}}s:1:"e";a:1:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10003;}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10003;}}}}}}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:967;}}}s:1:"i";a:1:{s:1:"r";a:7:{s:1:";";a:1:{s:9:"codepoint";i:9675;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10691;}}s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:710;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8791;}}}s:1:"l";a:1:{s:1:"e";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8634;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8635;}}}}}}}}}}}s:1:"d";a:5:{s:1:"R";a:1:{s:1:";";a:1:{s:9:"codepoint";i:174;}}s:1:"S";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9416;}}s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8859;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8858;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8861;}}}}}}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8791;}}s:1:"f";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10768;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10991;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10690;}}}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9827;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9827;}}}}}}}}s:1:"o";a:4:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:58;}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8788;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8788;}}}}}}s:1:"m";a:2:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:44;}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64;}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8705;}s:1:"f";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8728;}}}s:1:"l";a:1:{s:1:"e";a:2:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8705;}}}}}s:1:"x";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8450;}}}}}}}}s:1:"n";a:2:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8773;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10861;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}}s:1:"p";a:3:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120148;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8720;}}}}s:1:"y";a:3:{s:1:";";a:1:{s:9:"codepoint";i:169;}s:9:"codepoint";i:169;s:1:"s";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8471;}}}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8629;}}}}s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10007;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119992;}}}s:1:"u";a:2:{s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10959;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10961;}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10960;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10962;}}}}}s:1:"t";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8943;}}}}}s:1:"u";a:7:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10552;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10549;}}}}}}s:1:"e";a:2:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8926;}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8927;}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8630;}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10557;}}}}}}s:1:"p";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8746;}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10824;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10822;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10826;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8845;}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10821;}}}}s:1:"r";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8631;}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10556;}}}}}s:1:"l";a:1:{s:1:"y";a:3:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8926;}}}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8927;}}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8910;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8911;}}}}}}}}s:1:"r";a:1:{s:1:"e";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:164;}s:9:"codepoint";i:164;}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8630;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8631;}}}}}}}}}}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8910;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8911;}}}}}s:1:"w";a:2:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8754;}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8753;}}}}}s:1:"y";a:1:{s:1:"l";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9005;}}}}}}}s:1:"d";a:19:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8659;}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10597;}}}}s:1:"a";a:4:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8224;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8504;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8208;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8867;}}}}}s:1:"b";a:2:{s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10511;}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:733;}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:271;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1076;}}}s:1:"d";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8518;}s:1:"a";a:2:{s:1:"g";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8225;}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8650;}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10871;}}}}}}}s:1:"e";a:3:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:176;}s:9:"codepoint";i:176;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:948;}}}}s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10673;}}}}}}}s:1:"f";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10623;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120097;}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8643;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8642;}}}}}s:1:"i";a:5:{s:1:"a";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8900;}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8900;}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9830;}}}}}}}}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9830;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:168;}}s:1:"g";a:1:{s:1:"a";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:989;}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8946;}}}}s:1:"v";a:3:{s:1:";";a:1:{s:9:"codepoint";i:247;}s:1:"i";a:1:{s:1:"d";a:1:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:247;}s:9:"codepoint";i:247;s:1:"o";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8903;}}}}}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8903;}}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1106;}}}}s:1:"l";a:1:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8990;}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8973;}}}}}}s:1:"o";a:5:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:36;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120149;}}}s:1:"t";a:5:{s:1:";";a:1:{s:9:"codepoint";i:729;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8784;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8785;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8760;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8724;}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8865;}}}}}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8966;}}}}}}}}}}}}}s:1:"w";a:1:{s:1:"n";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8595;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8650;}}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8643;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8642;}}}}}}}}}}}}}}}}s:1:"r";a:2:{s:1:"b";a:1:{s:1:"k";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10512;}}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8991;}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8972;}}}}}}s:1:"s";a:3:{s:1:"c";a:2:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119993;}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1109;}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10742;}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:273;}}}}}}s:1:"t";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8945;}}}}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9663;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9662;}}}}}s:1:"u";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8693;}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10607;}}}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10662;}}}}}}}s:1:"z";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1119;}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10239;}}}}}}}}}s:1:"e";a:18:{s:1:"D";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10871;}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8785;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:233;}s:9:"codepoint";i:233;}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10862;}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:283;}}}}}s:1:"i";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8790;}s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:234;}s:9:"codepoint";i:234;}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8789;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1101;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:279;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}s:1:"f";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8786;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120098;}}}s:1:"g";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10906;}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:232;}s:9:"codepoint";i:232;}}}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10902;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10904;}}}}}}s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10905;}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9191;}}}}}}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8467;}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10901;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10903;}}}}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:275;}}}}s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8709;}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}}s:1:"s";a:1:{s:1:"p";a:2:{i:1;a:2:{i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8196;}}i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8197;}}}s:1:";";a:1:{s:9:"codepoint";i:8195;}}}}s:1:"n";a:2:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:331;}}s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8194;}}}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:281;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120150;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8917;}s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10723;}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10865;}}}}s:1:"s";a:1:{s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:1013;}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}}s:1:"q";a:4:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8790;}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8789;}}}}}}s:1:"s";a:2:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:2:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10902;}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10901;}}}}}}}}}}s:1:"u";a:3:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:61;}}}}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8799;}}}}s:1:"i";a:1:{s:1:"v";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8801;}s:1:"D";a:1:{s:1:"D";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10872;}}}}}}s:1:"v";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10725;}}}}}}}}s:1:"r";a:2:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8787;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10609;}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8495;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8784;}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8770;}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:951;}}s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:240;}s:9:"codepoint";i:240;}}s:1:"u";a:2:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:235;}s:9:"codepoint";i:235;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8364;}}}}s:1:"x";a:3:{s:1:"c";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:33;}}}s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8707;}}}}s:1:"p";a:2:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8496;}}}}}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8519;}}}}}}}}}}}}}s:1:"f";a:11:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8786;}}}}}}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1092;}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9792;}}}}}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64259;}}}}}s:1:"l";a:2:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64256;}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64260;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120099;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64257;}}}}}s:1:"l";a:3:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9837;}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:64258;}}}}s:1:"t";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9649;}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:402;}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120151;}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8704;}}}}s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8916;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10969;}}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10765;}}}}}}}}s:1:"r";a:2:{s:1:"a";a:2:{s:1:"c";a:6:{i:1;a:6:{i:2;a:2:{s:1:";";a:1:{s:9:"codepoint";i:189;}s:9:"codepoint";i:189;}i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8531;}}i:4;a:2:{s:1:";";a:1:{s:9:"codepoint";i:188;}s:9:"codepoint";i:188;}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8533;}}i:6;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8537;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8539;}}}i:2;a:2:{i:3;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8532;}}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8534;}}}i:3;a:3:{i:4;a:2:{s:1:";";a:1:{s:9:"codepoint";i:190;}s:9:"codepoint";i:190;}i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8535;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8540;}}}i:4;a:1:{i:5;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8536;}}}i:5;a:2:{i:6;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8538;}}i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8541;}}}i:7;a:1:{i:8;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8542;}}}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8260;}}}}s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8994;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119995;}}}}}s:1:"g";a:16:{s:1:"E";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8807;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10892;}}}s:1:"a";a:3:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:501;}}}}}s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:947;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:989;}}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10886;}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:287;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:285;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1075;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:289;}}}}s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}s:1:"q";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8805;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8807;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10878;}}}}}}}s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10878;}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10921;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10880;}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10882;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10884;}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10900;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120100;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8811;}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8921;}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8503;}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1107;}}}}s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8823;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10898;}}s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10917;}}s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10916;}}}s:1:"n";a:4:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8809;}}s:1:"a";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10890;}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10890;}}}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10888;}s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10888;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8809;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8935;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120152;}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:96;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8458;}}}s:1:"i";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8819;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10894;}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10896;}}}}}s:1:"t";a:7:{s:1:";";a:1:{s:9:"codepoint";i:62;}s:9:"codepoint";i:62;s:1:"c";a:2:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10919;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10874;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8919;}}}}s:1:"l";a:1:{s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10645;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10876;}}}}}}s:1:"r";a:5:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10886;}}}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10616;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8919;}}}}s:1:"e";a:1:{s:1:"q";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8923;}}}}}s:1:"q";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10892;}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8823;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8819;}}}}}}}s:1:"h";a:10:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}}}s:1:"a";a:4:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8202;}}}}}s:1:"l";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:189;}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8459;}}}}}s:1:"r";a:2:{s:1:"d";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1098;}}}}s:1:"r";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8596;}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10568;}}}}s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8621;}}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:293;}}}}}s:1:"e";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9829;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9829;}}}}}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8230;}}}}}s:1:"r";a:1:{s:1:"c";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8889;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120101;}}}s:1:"k";a:1:{s:1:"s";a:2:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10533;}}}}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10534;}}}}}}}}s:1:"o";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8703;}}}}s:1:"m";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8763;}}}}}s:1:"o";a:1:{s:1:"k";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8617;}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8618;}}}}}}}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120153;}}}s:1:"r";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8213;}}}}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119997;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:295;}}}}}}s:1:"y";a:2:{s:1:"b";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8259;}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8208;}}}}}}}s:1:"i";a:15:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:237;}s:9:"codepoint";i:237;}}}}}s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8291;}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:238;}s:9:"codepoint";i:238;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1080;}}}s:1:"e";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1077;}}}s:1:"x";a:1:{s:1:"c";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:161;}s:9:"codepoint";i:161;}}}}s:1:"f";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8660;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120102;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:236;}s:9:"codepoint";i:236;}}}}}s:1:"i";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8520;}s:1:"i";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10764;}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8749;}}}}s:1:"n";a:1:{s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10716;}}}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8489;}}}}}s:1:"j";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:307;}}}}}s:1:"m";a:3:{s:1:"a";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:299;}}}s:1:"g";a:3:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8464;}}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8465;}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:305;}}}}s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8887;}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:437;}}}}}s:1:"n";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8712;}s:1:"c";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8453;}}}}}s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8734;}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10717;}}}}}}}s:1:"o";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:305;}}}}}s:1:"t";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8747;}s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8890;}}}}s:1:"e";a:2:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8484;}}}}}s:1:"r";a:1:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8890;}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10775;}}}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10812;}}}}}}}s:1:"o";a:4:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1105;}}}s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:303;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120154;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:953;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10812;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:191;}s:9:"codepoint";i:191;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119998;}}}s:1:"i";a:1:{s:1:"n";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8712;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8953;}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8949;}}}}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8948;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8947;}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8712;}}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8290;}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:297;}}}}}}s:1:"u";a:2:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1110;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:239;}s:9:"codepoint";i:239;}}}}s:1:"j";a:6:{s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:309;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1081;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120103;}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:567;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120155;}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:119999;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1112;}}}}}}s:1:"u";a:1:{s:1:"k";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1108;}}}}}}s:1:"k";a:8:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:2:{s:1:";";a:1:{s:9:"codepoint";i:954;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1008;}}}}}}s:1:"c";a:2:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:311;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1082;}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120104;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:312;}}}}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1093;}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1116;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120156;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120000;}}}}}s:1:"l";a:22:{s:1:"A";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8666;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8656;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10523;}}}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10510;}}}}}s:1:"E";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8806;}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10891;}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10594;}}}}s:1:"a";a:9:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:314;}}}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10676;}}}}}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8466;}}}}}s:1:"m";a:1:{s:1:"b";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:955;}}}}}s:1:"n";a:1:{s:1:"g";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10216;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10641;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10216;}}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10885;}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:171;}s:9:"codepoint";i:171;}}}s:1:"r";a:1:{s:1:"r";a:8:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8676;}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10527;}}}}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10525;}}}s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8617;}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8619;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10553;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10611;}}}}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8610;}}}}}s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:10923;}s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10521;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10925;}}}}s:1:"b";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10508;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10098;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:123;}}s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:91;}}}}s:1:"k";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10635;}}s:1:"s";a:1:{s:1:"l";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10639;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10637;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:318;}}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:316;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8968;}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:123;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1083;}}}s:1:"d";a:4:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10550;}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8220;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8222;}}}}}s:1:"r";a:2:{s:1:"d";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10599;}}}}}s:1:"u";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10571;}}}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8626;}}}}s:1:"e";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8804;}s:1:"f";a:1:{s:1:"t";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8592;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8610;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8637;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8636;}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8647;}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8596;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8651;}}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8621;}}}}}}}}}}}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8907;}}}}}}}}}}}}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}s:1:"q";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8804;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8806;}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10877;}}}}}}}s:1:"s";a:5:{s:1:";";a:1:{s:9:"codepoint";i:10877;}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10920;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10879;}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10881;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10883;}}}}}}s:1:"g";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10899;}}}}s:1:"s";a:5:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10885;}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8918;}}}}s:1:"e";a:1:{s:1:"q";a:2:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8922;}}}}s:1:"q";a:1:{s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10891;}}}}}}}s:1:"g";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8822;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8818;}}}}}}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10620;}}}}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8970;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120105;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8822;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10897;}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8637;}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8636;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10602;}}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9604;}}}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1113;}}}}s:1:"l";a:5:{s:1:";";a:1:{s:9:"codepoint";i:8810;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8647;}}}}s:1:"c";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8990;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10603;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9722;}}}}}s:1:"m";a:2:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:320;}}}}}s:1:"o";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9136;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9136;}}}}}}}}}}s:1:"n";a:4:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8808;}}s:1:"a";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10889;}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10889;}}}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10887;}s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10887;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8808;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8934;}}}}}s:1:"o";a:8:{s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10220;}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8701;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10214;}}}}s:1:"n";a:1:{s:1:"g";a:3:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}}}}}}}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10236;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}}}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8619;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8620;}}}}}}}}}}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10629;}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120157;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10797;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10804;}}}}}}s:1:"w";a:2:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8727;}}}}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:95;}}}}}s:1:"z";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9674;}s:1:"e";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9674;}}}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10731;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:40;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10643;}}}}}}s:1:"r";a:5:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8646;}}}}s:1:"c";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8991;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8651;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10605;}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8206;}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8895;}}}}}s:1:"s";a:6:{s:1:"a";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8249;}}}}}s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120001;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8624;}}s:1:"i";a:1:{s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8818;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10893;}}s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10895;}}}}s:1:"q";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:91;}}s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8216;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8218;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:322;}}}}}}s:1:"t";a:9:{s:1:";";a:1:{s:9:"codepoint";i:60;}s:9:"codepoint";i:60;s:1:"c";a:2:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10918;}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10873;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8918;}}}}s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8907;}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8905;}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10614;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10875;}}}}}}s:1:"r";a:2:{s:1:"P";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10646;}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9667;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9666;}}}}}s:1:"u";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10570;}}}}}}s:1:"u";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10598;}}}}}}}}s:1:"m";a:14:{s:1:"D";a:1:{s:1:"D";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8762;}}}}}s:1:"a";a:4:{s:1:"c";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:175;}s:9:"codepoint";i:175;}}s:1:"l";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9794;}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10016;}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10016;}}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8614;}s:1:"s";a:1:{s:1:"t";a:1:{s:1:"o";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8614;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8615;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8612;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8613;}}}}}}}s:1:"r";a:1:{s:1:"k";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9646;}}}}}}s:1:"c";a:2:{s:1:"o";a:1:{s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10793;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1084;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8212;}}}}}s:1:"e";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8737;}}}}}}}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120106;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8487;}}}s:1:"i";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:181;}s:9:"codepoint";i:181;}}}s:1:"d";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8739;}s:1:"a";a:1:{s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:42;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10992;}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:183;}s:9:"codepoint";i:183;}}}}s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8722;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8863;}}s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8760;}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10794;}}}}}}}s:1:"l";a:2:{s:1:"c";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10971;}}}s:1:"d";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8230;}}}}s:1:"n";a:1:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}}}}}s:1:"o";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8871;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120158;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8723;}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120002;}}}s:1:"t";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8766;}}}}}}s:1:"u";a:3:{s:1:";";a:1:{s:9:"codepoint";i:956;}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8888;}}}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8888;}}}}}}s:1:"n";a:23:{s:1:"L";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8653;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8654;}}}}}}}}}}}}}}}s:1:"R";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8655;}}}}}}}}}}}s:1:"V";a:2:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8879;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8878;}}}}}}s:1:"a";a:4:{s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8711;}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:324;}}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8777;}s:1:"o";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:329;}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8777;}}}}}}s:1:"t";a:1:{s:1:"u";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9838;}s:1:"a";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9838;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8469;}}}}}}}}s:1:"b";a:1:{s:1:"s";a:1:{s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:160;}s:9:"codepoint";i:160;}}}s:1:"c";a:5:{s:1:"a";a:2:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10819;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:328;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:326;}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8775;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10818;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1085;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8211;}}}}}s:1:"e";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8800;}s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8663;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10532;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8599;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8599;}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8802;}}}}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10536;}}}}}s:1:"x";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8708;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8708;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120107;}}}s:1:"g";a:3:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8817;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8817;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8821;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8815;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8815;}}}}s:1:"h";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8654;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8622;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10994;}}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8715;}s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8956;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8954;}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8715;}}}s:1:"j";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1114;}}}}s:1:"l";a:6:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8653;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8602;}}}}s:1:"d";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8229;}}}s:1:"e";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8816;}s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8602;}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8622;}}}}}}}}}}}}}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8816;}}s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8814;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8820;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8814;}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"o";a:2:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120159;}}}s:1:"t";a:4:{s:1:";";a:1:{s:9:"codepoint";i:172;}s:9:"codepoint";i:172;s:1:"i";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8713;}s:1:"v";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8713;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8951;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8950;}}}}}s:1:"n";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8716;}s:1:"v";a:3:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8716;}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8958;}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8957;}}}}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8742;}s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10772;}}}}}}s:1:"r";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8832;}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8928;}}}}s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8832;}}}}}s:1:"r";a:4:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8655;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8603;}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8603;}}}}}}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}s:1:"s";a:7:{s:1:"c";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8833;}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8929;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120003;}}}s:1:"h";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:2:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}}}}}}}}}}s:1:"i";a:1:{s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8769;}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8772;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8772;}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8740;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8742;}}}}s:1:"q";a:1:{s:1:"s";a:1:{s:1:"u";a:2:{s:1:"b";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8930;}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8931;}}}}}}s:1:"u";a:3:{s:1:"b";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8836;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8840;}}}}}}}s:1:"c";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8833;}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8837;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8841;}}}}}}}}}s:1:"t";a:4:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8825;}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:241;}s:9:"codepoint";i:241;}}}}s:1:"l";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8824;}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8938;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8940;}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8939;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8941;}}}}}}}}}}}}}}}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:957;}s:1:"m";a:3:{s:1:";";a:1:{s:9:"codepoint";i:35;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8470;}}}}s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8199;}}}}}s:1:"v";a:6:{s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8877;}}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10500;}}}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8876;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"f";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10718;}}}}}}s:1:"l";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10498;}}}}}s:1:"r";a:1:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10499;}}}}}}s:1:"w";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8662;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10531;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8598;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8598;}}}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10535;}}}}}}}s:1:"o";a:18:{s:1:"S";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9416;}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:243;}s:9:"codepoint";i:243;}}}}s:1:"s";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8859;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8858;}s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:244;}s:9:"codepoint";i:244;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1086;}}}s:1:"d";a:5:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8861;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:337;}}}}}s:1:"i";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10808;}}}s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8857;}}}s:1:"s";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10684;}}}}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:339;}}}}}s:1:"f";a:2:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10687;}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120108;}}}s:1:"g";a:3:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:731;}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:242;}s:9:"codepoint";i:242;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10689;}}}s:1:"h";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10677;}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8486;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8750;}}}}s:1:"l";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8634;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10686;}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"s";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10683;}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8254;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10688;}}}s:1:"m";a:3:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:333;}}}}s:1:"e";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:969;}}}}s:1:"i";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:959;}}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10678;}}s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8854;}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120160;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10679;}}}s:1:"e";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10681;}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8853;}}}}}s:1:"r";a:7:{s:1:";";a:1:{s:9:"codepoint";i:8744;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8635;}}}}s:1:"d";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10845;}s:1:"e";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8500;}s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8500;}}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:170;}s:9:"codepoint";i:170;}s:1:"m";a:2:{s:1:";";a:1:{s:9:"codepoint";i:186;}s:9:"codepoint";i:186;}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8886;}}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10838;}}}s:1:"s";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10839;}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10843;}}}s:1:"s";a:3:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8500;}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:2:{s:1:";";a:1:{s:9:"codepoint";i:248;}s:9:"codepoint";i:248;}}}}s:1:"o";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8856;}}}}s:1:"t";a:1:{s:1:"i";a:2:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:245;}s:9:"codepoint";i:245;}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8855;}s:1:"a";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10806;}}}}}}}}s:1:"u";a:1:{s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:246;}s:9:"codepoint";i:246;}}}s:1:"v";a:1:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9021;}}}}}}s:1:"p";a:12:{s:1:"a";a:1:{s:1:"r";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8741;}s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:182;}s:9:"codepoint";i:182;s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}s:1:"s";a:2:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10995;}}}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:11005;}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8706;}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1087;}}}s:1:"e";a:1:{s:1:"r";a:5:{s:1:"c";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:37;}}}}s:1:"i";a:1:{s:1:"o";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:46;}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8240;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8869;}}s:1:"t";a:1:{s:1:"e";a:1:{s:1:"n";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8241;}}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120109;}}}s:1:"h";a:3:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:966;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:966;}}}s:1:"m";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8499;}}}}}s:1:"o";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9742;}}}}}s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:960;}s:1:"t";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8916;}}}}}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:982;}}}s:1:"l";a:2:{s:1:"a";a:1:{s:1:"n";a:2:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8463;}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8462;}}}}s:1:"k";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8463;}}}}}s:1:"u";a:1:{s:1:"s";a:9:{s:1:";";a:1:{s:9:"codepoint";i:43;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10787;}}}}}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8862;}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10786;}}}}s:1:"d";a:2:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8724;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10789;}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10866;}}s:1:"m";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:177;}s:9:"codepoint";i:177;}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10790;}}}}s:1:"t";a:1:{s:1:"w";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10791;}}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:177;}}s:1:"o";a:3:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10773;}}}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120161;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"d";a:2:{s:1:";";a:1:{s:9:"codepoint";i:163;}s:9:"codepoint";i:163;}}}}s:1:"r";a:10:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10931;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10935;}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10927;}s:1:"c";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8826;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10935;}}}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8828;}}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10927;}}}s:1:"n";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10937;}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10933;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8936;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8242;}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8473;}}}}}s:1:"n";a:3:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10933;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10937;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8936;}}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8719;}}s:1:"f";a:3:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9006;}}}}}s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8978;}}}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8979;}}}}}}s:1:"p";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8733;}s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8830;}}}}s:1:"u";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8880;}}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120005;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:968;}}}s:1:"u";a:1:{s:1:"n";a:1:{s:1:"c";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8200;}}}}}}}s:1:"q";a:6:{s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120110;}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10764;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120162;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8279;}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120006;}}}}s:1:"u";a:3:{s:1:"a";a:1:{s:1:"t";a:2:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"n";a:1:{s:1:"i";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8461;}}}}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10774;}}}}}}s:1:"e";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:63;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8799;}}}}}}s:1:"o";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:34;}s:9:"codepoint";i:34;}}}}s:1:"r";a:21:{s:1:"A";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8667;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8658;}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10524;}}}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10511;}}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10596;}}}}s:1:"a";a:7:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10714;}}s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:341;}}}}}s:1:"d";a:1:{s:1:"i";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8730;}}}}s:1:"e";a:1:{s:1:"m";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"y";a:1:{s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10675;}}}}}}}s:1:"n";a:1:{s:1:"g";a:4:{s:1:";";a:1:{s:9:"codepoint";i:10217;}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10642;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10661;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10217;}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:187;}s:9:"codepoint";i:187;}}}s:1:"r";a:1:{s:1:"r";a:11:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10613;}}}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8677;}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10528;}}}}s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10547;}}s:1:"f";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10526;}}}s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8618;}}}s:1:"l";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8620;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10565;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10612;}}}}s:1:"t";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8611;}}}s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8605;}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10522;}}}}s:1:"i";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8758;}s:1:"n";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8474;}}}}}}}}}s:1:"b";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10509;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10099;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:125;}}s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:93;}}}}s:1:"k";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10636;}}s:1:"s";a:1:{s:1:"l";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10638;}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10640;}}}}}}}s:1:"c";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:345;}}}}}s:1:"e";a:2:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:343;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8969;}}}}s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:125;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1088;}}}s:1:"d";a:4:{s:1:"c";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10551;}}}s:1:"l";a:1:{s:1:"d";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10601;}}}}}}s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8221;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8221;}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8627;}}}}s:1:"e";a:3:{s:1:"a";a:1:{s:1:"l";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8476;}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8475;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8476;}}}}}s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8477;}}}}s:1:"c";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9645;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:174;}s:9:"codepoint";i:174;}}s:1:"f";a:3:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10621;}}}}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8971;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120111;}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8641;}}s:1:"u";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8640;}s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10604;}}}}}s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:961;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1009;}}}}s:1:"i";a:3:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:6:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8594;}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8611;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8641;}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8640;}}}}}}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8649;}}}}}}}}}}}}s:1:"s";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8605;}}}}}}}}}}}s:1:"t";a:1:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8908;}}}}}}}}}}}}}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:730;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8787;}}}}}}}}}}}}s:1:"l";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8644;}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8652;}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8207;}}}s:1:"m";a:1:{s:1:"o";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9137;}s:1:"a";a:1:{s:1:"c";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9137;}}}}}}}}}}s:1:"n";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10990;}}}}}s:1:"o";a:4:{s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10221;}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8702;}}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10215;}}}}s:1:"p";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10630;}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120163;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10798;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10805;}}}}}}}s:1:"p";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:41;}s:1:"g";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10644;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10770;}}}}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8649;}}}}}s:1:"s";a:4:{s:1:"a";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8250;}}}}}s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120007;}}}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8625;}}s:1:"q";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:93;}}s:1:"u";a:1:{s:1:"o";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8217;}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8217;}}}}}}s:1:"t";a:3:{s:1:"h";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8908;}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8906;}}}}}s:1:"r";a:1:{s:1:"i";a:4:{s:1:";";a:1:{s:9:"codepoint";i:9657;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9656;}}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10702;}}}}}}}}s:1:"u";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10600;}}}}}}}s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8478;}}}s:1:"s";a:19:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:347;}}}}}}s:1:"b";a:1:{s:1:"q";a:1:{s:1:"u";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8218;}}}}}s:1:"c";a:10:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10932;}}s:1:"a";a:2:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10936;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:353;}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10928;}s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:351;}}}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:349;}}}}s:1:"n";a:3:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10934;}}s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10938;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8937;}}}}}s:1:"p";a:1:{s:1:"o";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10771;}}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1089;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8901;}s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8865;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10854;}}}}}s:1:"e";a:7:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8664;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10533;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8600;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8600;}}}}}}s:1:"c";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:167;}s:9:"codepoint";i:167;}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:59;}}}s:1:"s";a:1:{s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10537;}}}}}s:1:"t";a:1:{s:1:"m";a:2:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}s:1:"x";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10038;}}}}s:1:"f";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:120112;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8994;}}}}}}s:1:"h";a:4:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9839;}}}}s:1:"c";a:2:{s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1097;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1096;}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"t";a:2:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}}}s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}}}}}}}}}s:1:"y";a:2:{s:1:";";a:1:{s:9:"codepoint";i:173;}s:9:"codepoint";i:173;}}s:1:"i";a:2:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:963;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}}}}s:1:"m";a:8:{s:1:";";a:1:{s:9:"codepoint";i:8764;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10858;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8771;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8771;}}}s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10910;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10912;}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10909;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10911;}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8774;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10788;}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10610;}}}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8592;}}}}}s:1:"m";a:4:{s:1:"a";a:2:{s:1:"l";a:1:{s:1:"l";a:1:{s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}}}}}}}s:1:"s";a:1:{s:1:"h";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10803;}}}}}s:1:"e";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"s";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10724;}}}}}}}s:1:"i";a:2:{s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8739;}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8995;}}}}s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10922;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10924;}}}}s:1:"o";a:3:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1100;}}}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:47;}s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10692;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9023;}}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120164;}}}}s:1:"p";a:1:{s:1:"a";a:2:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:"s";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9824;}s:1:"u";a:1:{s:1:"i";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9824;}}}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8741;}}}}s:1:"q";a:3:{s:1:"c";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8851;}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8852;}}}}s:1:"s";a:1:{s:1:"u";a:2:{s:1:"b";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8847;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8849;}}}}}}}s:1:"p";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}s:1:"s";a:1:{s:1:"e";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8848;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8850;}}}}}}}}}s:1:"u";a:3:{s:1:";";a:1:{s:9:"codepoint";i:9633;}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9633;}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9642;}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8594;}}}}}s:1:"s";a:4:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120008;}}}s:1:"e";a:1:{s:1:"t";a:1:{s:1:"m";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8726;}}}}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8995;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8902;}}}}}}s:1:"t";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9734;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9733;}}}}s:1:"r";a:2:{s:1:"a";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1013;}}}}}}}}s:1:"p";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:981;}}}}}}}}}s:1:"n";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:175;}}}}}s:1:"u";a:5:{s:1:"b";a:9:{s:1:";";a:1:{s:9:"codepoint";i:8834;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10949;}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10941;}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8838;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10947;}}}}}s:1:"m";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10945;}}}}}s:1:"n";a:2:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10955;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8842;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10943;}}}}}s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10617;}}}}}s:1:"s";a:3:{s:1:"e";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8834;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8838;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10949;}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8842;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10955;}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10951;}}}s:1:"u";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10965;}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10963;}}}}}s:1:"c";a:1:{s:1:"c";a:6:{s:1:";";a:1:{s:9:"codepoint";i:8827;}s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10936;}}}}}}}s:1:"c";a:1:{s:1:"u";a:1:{s:1:"r";a:1:{s:1:"l";a:1:{s:1:"y";a:1:{s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8829;}}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10928;}}}s:1:"n";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10938;}}}}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10934;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8937;}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8831;}}}}}}s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8721;}}s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9834;}}}s:1:"p";a:13:{i:1;a:2:{s:1:";";a:1:{s:9:"codepoint";i:185;}s:9:"codepoint";i:185;}i:2;a:2:{s:1:";";a:1:{s:9:"codepoint";i:178;}s:9:"codepoint";i:178;}i:3;a:2:{s:1:";";a:1:{s:9:"codepoint";i:179;}s:9:"codepoint";i:179;}s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10950;}}s:1:"d";a:2:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10942;}}}s:1:"s";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10968;}}}}}s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8839;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10948;}}}}}s:1:"h";a:1:{s:1:"s";a:1:{s:1:"u";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10967;}}}}}s:1:"l";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10619;}}}}}s:1:"m";a:1:{s:1:"u";a:1:{s:1:"l";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10946;}}}}}s:1:"n";a:2:{s:1:"E";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10956;}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8843;}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10944;}}}}}s:1:"s";a:3:{s:1:"e";a:1:{s:1:"t";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8835;}s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8839;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10950;}}}}s:1:"n";a:1:{s:1:"e";a:1:{s:1:"q";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8843;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10956;}}}}}}}s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10952;}}}s:1:"u";a:2:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10964;}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10966;}}}}}}s:1:"w";a:3:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8665;}}}}s:1:"a";a:1:{s:1:"r";a:2:{s:1:"h";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10534;}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8601;}s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8601;}}}}}}s:1:"n";a:1:{s:1:"w";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10538;}}}}}}s:1:"z";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"g";a:2:{s:1:";";a:1:{s:9:"codepoint";i:223;}s:9:"codepoint";i:223;}}}}}s:1:"t";a:13:{s:1:"a";a:2:{s:1:"r";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8982;}}}}}s:1:"u";a:1:{s:1:";";a:1:{s:9:"codepoint";i:964;}}}s:1:"b";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9140;}}}}s:1:"c";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:357;}}}}}s:1:"e";a:1:{s:1:"d";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:355;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1090;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8411;}}}}s:1:"e";a:1:{s:1:"l";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8981;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120113;}}}s:1:"h";a:4:{s:1:"e";a:2:{s:1:"r";a:1:{s:1:"e";a:2:{i:4;a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}s:1:"f";a:1:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8756;}}}}}}}s:1:"t";a:1:{s:1:"a";a:3:{s:1:";";a:1:{s:9:"codepoint";i:952;}s:1:"s";a:1:{s:1:"y";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}}s:1:"i";a:2:{s:1:"c";a:1:{s:1:"k";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"x";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8764;}}}}}}s:1:"n";a:1:{s:1:"s";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8201;}}}}}s:1:"k";a:2:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8776;}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8764;}}}}}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:254;}s:9:"codepoint";i:254;}}}}s:1:"i";a:3:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:732;}}}}s:1:"m";a:1:{s:1:"e";a:1:{s:1:"s";a:4:{s:1:";";a:1:{s:9:"codepoint";i:215;}s:9:"codepoint";i:215;s:1:"b";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8864;}s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10801;}}}}s:1:"d";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10800;}}}}}s:1:"n";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8749;}}}}s:1:"o";a:3:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10536;}}}s:1:"p";a:4:{s:1:";";a:1:{s:9:"codepoint";i:8868;}s:1:"b";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9014;}}}}s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10993;}}}}s:1:"f";a:2:{s:1:";";a:1:{s:9:"codepoint";i:120165;}s:1:"o";a:1:{s:1:"r";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10970;}}}}}}s:1:"s";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10537;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8244;}}}}}}s:1:"r";a:3:{s:1:"a";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8482;}}}}s:1:"i";a:7:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:5:{s:1:";";a:1:{s:9:"codepoint";i:9653;}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9663;}}}}}s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9667;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8884;}}}}}}}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8796;}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9657;}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8885;}}}}}}}}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9708;}}}}s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8796;}}s:1:"m";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10810;}}}}}}s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10809;}}}}}s:1:"s";a:1:{s:1:"b";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10701;}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10811;}}}}}}s:1:"p";a:1:{s:1:"e";a:1:{s:1:"z";a:1:{s:1:"i";a:1:{s:1:"u";a:1:{s:1:"m";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9186;}}}}}}}}s:1:"s";a:3:{s:1:"c";a:2:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120009;}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1094;}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1115;}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:359;}}}}}}s:1:"w";a:2:{s:1:"i";a:1:{s:1:"x";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8812;}}}}s:1:"o";a:1:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"a";a:1:{s:1:"d";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8606;}}}}}}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8608;}}}}}}}}}}}}}}}}}}s:1:"u";a:18:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8657;}}}}s:1:"H";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10595;}}}}s:1:"a";a:2:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:250;}s:9:"codepoint";i:250;}}}}s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}s:1:"b";a:1:{s:1:"r";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1118;}}}s:1:"e";a:1:{s:1:"v";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:365;}}}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:2:{s:1:";";a:1:{s:9:"codepoint";i:251;}s:9:"codepoint";i:251;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1091;}}}s:1:"d";a:3:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8645;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:369;}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10606;}}}}}s:1:"f";a:2:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10622;}}}}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120114;}}}s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"v";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:249;}s:9:"codepoint";i:249;}}}}}s:1:"h";a:2:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:"l";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8639;}}s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8638;}}}}s:1:"b";a:1:{s:1:"l";a:1:{s:1:"k";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9600;}}}}}s:1:"l";a:2:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8988;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8988;}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8975;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9720;}}}}}s:1:"m";a:2:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:363;}}}}s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:168;}s:9:"codepoint";i:168;}}s:1:"o";a:2:{s:1:"g";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:371;}}}}s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120166;}}}}s:1:"p";a:6:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8593;}}}}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"n";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8597;}}}}}}}}}}s:1:"h";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:"o";a:1:{s:1:"o";a:1:{s:1:"n";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8639;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8638;}}}}}}}}}}}}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8846;}}}}s:1:"s";a:1:{s:1:"i";a:3:{s:1:";";a:1:{s:9:"codepoint";i:965;}s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:978;}}s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:965;}}}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"w";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8648;}}}}}}}}}}s:1:"r";a:3:{s:1:"c";a:2:{s:1:"o";a:1:{s:1:"r";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8989;}s:1:"e";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8989;}}}}}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8974;}}}}}s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:367;}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9721;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120010;}}}}s:1:"t";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8944;}}}}s:1:"i";a:1:{s:1:"l";a:1:{s:1:"d";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:361;}}}}}s:1:"r";a:1:{s:1:"i";a:2:{s:1:";";a:1:{s:9:"codepoint";i:9653;}s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9652;}}}}}s:1:"u";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8648;}}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:252;}s:9:"codepoint";i:252;}}}s:1:"w";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10663;}}}}}}}}s:1:"v";a:14:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8661;}}}}s:1:"B";a:1:{s:1:"a";a:1:{s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:10984;}s:1:"v";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10985;}}}}}s:1:"D";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8872;}}}}}s:1:"a";a:2:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10652;}}}}}s:1:"r";a:7:{s:1:"e";a:1:{s:1:"p";a:1:{s:1:"s";a:1:{s:1:"i";a:1:{s:1:"l";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:949;}}}}}}}}s:1:"k";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:"p";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1008;}}}}}}s:1:"n";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8709;}}}}}}}}s:1:"p";a:3:{s:1:"h";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:966;}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:982;}}s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:"t";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8597;}s:1:"h";a:1:{s:1:"o";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1009;}}}}s:1:"s";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"m";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:962;}}}}}}s:1:"t";a:2:{s:1:"h";a:1:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:977;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"a";a:1:{s:1:"n";a:1:{s:1:"g";a:1:{s:1:"l";a:1:{s:1:"e";a:2:{s:1:"l";a:1:{s:1:"e";a:1:{s:1:"f";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8882;}}}}}s:1:"r";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"h";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8883;}}}}}}}}}}}}}}}}s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1074;}}}s:1:"d";a:1:{s:1:"a";a:1:{s:1:"s";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8866;}}}}}s:1:"e";a:3:{s:1:"e";a:3:{s:1:";";a:1:{s:9:"codepoint";i:8744;}s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8891;}}}}s:1:"e";a:1:{s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8794;}}}}s:1:"l";a:1:{s:1:"l";a:1:{s:1:"i";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8942;}}}}}s:1:"r";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:124;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120115;}}}s:1:"l";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8882;}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120167;}}}}s:1:"p";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8733;}}}}}s:1:"r";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8883;}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120011;}}}}s:1:"z";a:1:{s:1:"i";a:1:{s:1:"g";a:1:{s:1:"z";a:1:{s:1:"a";a:1:{s:1:"g";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10650;}}}}}}}}s:1:"w";a:7:{s:1:"c";a:1:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:373;}}}}}s:1:"e";a:2:{s:1:"d";a:2:{s:1:"b";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10847;}}}}s:1:"g";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8743;}s:1:"q";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8793;}}}}}s:1:"i";a:1:{s:1:"e";a:1:{s:1:"r";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8472;}}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120116;}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120168;}}}}s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8472;}}s:1:"r";a:2:{s:1:";";a:1:{s:9:"codepoint";i:8768;}s:1:"e";a:1:{s:1:"a";a:1:{s:1:"t";a:1:{s:1:"h";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8768;}}}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120012;}}}}}s:1:"x";a:14:{s:1:"c";a:3:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8898;}}}s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9711;}}}}s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8899;}}}}s:1:"d";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9661;}}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120117;}}}s:1:"h";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10234;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10231;}}}}}s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:958;}}s:1:"l";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10232;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10229;}}}}}s:1:"m";a:1:{s:1:"a";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10236;}}}}s:1:"n";a:1:{s:1:"i";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8955;}}}}s:1:"o";a:3:{s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10752;}}}}s:1:"p";a:2:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120169;}}s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10753;}}}}}s:1:"t";a:1:{s:1:"i";a:1:{s:1:"m";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10754;}}}}}}s:1:"r";a:2:{s:1:"A";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10233;}}}}s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10230;}}}}}s:1:"s";a:2:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120013;}}}s:1:"q";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"p";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10758;}}}}}}s:1:"u";a:2:{s:1:"p";a:1:{s:1:"l";a:1:{s:1:"u";a:1:{s:1:"s";a:1:{s:1:";";a:1:{s:9:"codepoint";i:10756;}}}}}s:1:"t";a:1:{s:1:"r";a:1:{s:1:"i";a:1:{s:1:";";a:1:{s:9:"codepoint";i:9651;}}}}}s:1:"v";a:1:{s:1:"e";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8897;}}}}s:1:"w";a:1:{s:1:"e";a:1:{s:1:"d";a:1:{s:1:"g";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8896;}}}}}}}s:1:"y";a:8:{s:1:"a";a:1:{s:1:"c";a:2:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:2:{s:1:";";a:1:{s:9:"codepoint";i:253;}s:9:"codepoint";i:253;}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1103;}}}}s:1:"c";a:2:{s:1:"i";a:1:{s:1:"r";a:1:{s:1:"c";a:1:{s:1:";";a:1:{s:9:"codepoint";i:375;}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1099;}}}s:1:"e";a:1:{s:1:"n";a:2:{s:1:";";a:1:{s:9:"codepoint";i:165;}s:9:"codepoint";i:165;}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120118;}}}s:1:"i";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1111;}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120170;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120014;}}}}s:1:"u";a:2:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1102;}}}s:1:"m";a:1:{s:1:"l";a:2:{s:1:";";a:1:{s:9:"codepoint";i:255;}s:9:"codepoint";i:255;}}}}s:1:"z";a:10:{s:1:"a";a:1:{s:1:"c";a:1:{s:1:"u";a:1:{s:1:"t";a:1:{s:1:"e";a:1:{s:1:";";a:1:{s:9:"codepoint";i:378;}}}}}}s:1:"c";a:2:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"o";a:1:{s:1:"n";a:1:{s:1:";";a:1:{s:9:"codepoint";i:382;}}}}}s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1079;}}}s:1:"d";a:1:{s:1:"o";a:1:{s:1:"t";a:1:{s:1:";";a:1:{s:9:"codepoint";i:380;}}}}s:1:"e";a:2:{s:1:"e";a:1:{s:1:"t";a:1:{s:1:"r";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8488;}}}}}s:1:"t";a:1:{s:1:"a";a:1:{s:1:";";a:1:{s:9:"codepoint";i:950;}}}}s:1:"f";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120119;}}}s:1:"h";a:1:{s:1:"c";a:1:{s:1:"y";a:1:{s:1:";";a:1:{s:9:"codepoint";i:1078;}}}}s:1:"i";a:1:{s:1:"g";a:1:{s:1:"r";a:1:{s:1:"a";a:1:{s:1:"r";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8669;}}}}}}}s:1:"o";a:1:{s:1:"p";a:1:{s:1:"f";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120171;}}}}s:1:"s";a:1:{s:1:"c";a:1:{s:1:"r";a:1:{s:1:";";a:1:{s:9:"codepoint";i:120015;}}}}s:1:"w";a:2:{s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8205;}}s:1:"n";a:1:{s:1:"j";a:1:{s:1:";";a:1:{s:9:"codepoint";i:8204;}}}}}}
    \ No newline at end of file
    diff --git a/libraries/html5php/HTML5.php b/libraries/html5php/HTML5.php
    new file mode 100644
    index 0000000..7295fb4
    --- /dev/null
    +++ b/libraries/html5php/HTML5.php
    @@ -0,0 +1,220 @@
    + FALSE,
    +  );
    +
    +  /**
    +   * Load and parse an HTML file.
    +   *
    +   * This will apply the HTML5 parser, which is tolerant of many 
    +   * varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML 
    +   * 3. Note that in these cases, not all of the old data will be 
    +   * preserved. For example, XHTML's XML declaration will be removed.
    +   *
    +   * The rules governing parsing are set out in the HTML 5 spec.
    +   *
    +   * @param string $file
    +   *   The path to the file to parse. If this is a resource, it is 
    +   *   assumed to be an open stream whose pointer is set to the first 
    +   *   byte of input.
    +   * @return \DOMDocument
    +   *   A DOM document. These object type is defined by the libxml 
    +   *   library, and should have been included with your version of PHP.
    +   */
    +  public static function load($file) {
    +
    +    // Handle the case where file is a resource.
    +    if (is_resource($file)) {
    +      // FIXME: We need a StreamInputStream class.
    +      return static::loadHTML(stream_get_contents($file));
    +    }
    +
    +    $input = new FileInputStream($file);
    +    return static::parse($input);
    +  }
    +
    +  /**
    +   * Parse a HTML Document from a string.
    +   * 
    +   * Take a string of HTML 5 (or earlier) and parse it into a 
    +   * DOMDocument.
    +   *
    +   * @param string $string
    +   *   A html5 document as a string.
    +   * @return \DOMDocument
    +   *   A DOM document. DOM is part of libxml, which is included with 
    +   *   almost all distribtions of PHP.
    +   */
    +  public static function loadHTML($string) {
    +    $input = new StringInputStream($string);
    +    return static::parse($input);
    +  }
    +
    +  /**
    +   * Convenience function to load an HTML file.
    +   *
    +   * This is here to provide backwards compatibility with the
    +   * PHP DOM implementation. It simply calls load().
    +   *
    +   * @param string $file
    +   *   The path to the file to parse. If this is a resource, it is 
    +   *   assumed to be an open stream whose pointer is set to the first 
    +   *   byte of input.
    +   *
    +   * @return \DOMDocument
    +   *   A DOM document. These object type is defined by the libxml 
    +   *   library, and should have been included with your version of PHP.
    +   */
    +  public static function loadHTMLFile($file, $options = NULL) {
    +    return static::load($file, $options);
    +  }
    +
    +  /**
    +   * Parse a HTML fragment from a string.
    +   *
    +   * @param string $string
    +   *   The html5 fragment as a string.
    +   *
    +   * @return \DOMDocumentFragment
    +   *   A DOM fragment. The DOM is part of libxml, which is included with
    +   *   almost all distributions of PHP.
    +   */
    +  public static function loadHTMLFragment($string) {
    +    $input = new StringInputStream($string);
    +    return static::parseFragment($input);
    +  }
    +
    +  /**
    +   * Save a DOM into a given file as HTML5.
    +   *
    +   * @param mixed $dom
    +   *   The DOM to be serialized.
    +   * @param string $file
    +   *   The filename to be written.
    +   * @param array $options
    +   *   Configuration options when serializing the DOM. These include:
    +   *   - encode_entities: Text written to the output is escaped by default and not all
    +   *     entities are encoded. If this is set to TRUE all entities will be encoded.
    +   *     Defaults to FALSE.
    +   */
    +  public static function save($dom, $file, $options = array()) {
    +    $options = $options + static::options();
    +    $close = TRUE;
    +    if (is_resource($file)) {
    +      $stream = $file;
    +      $close = FALSE;
    +     }
    +    else {
    +      $stream = fopen($file, 'w');
    +    }
    +    $rules = new OutputRules($stream, $options);
    +    $trav = new Traverser($dom, $stream, $rules, $options);
    +
    +    $trav->walk();
    +
    +    if ($close) {
    +      fclose($stream);
    +    }
    +  }
    +
    +  /**
    +   * Convert a DOM into an HTML5 string.
    +   *
    +   * @param mixed $dom
    +   *   The DOM to be serialized.
    +   * @param array $options
    +   *   Configuration options when serializing the DOM. These include:
    +   *   - encode_entities: Text written to the output is escaped by default and not all
    +   *     entities are encoded. If this is set to TRUE all entities will be encoded.
    +   *     Defaults to FALSE.
    +   *
    +   * @return string
    +   *   A HTML5 documented generated from the DOM.
    +   */
    +  public static function saveHTML($dom, $options = array()) {
    +    $stream = fopen('php://temp', 'w');
    +    static::save($dom, $stream, $options);
    +    return stream_get_contents($stream, -1, 0);
    +  }
    +
    +  /**
    +   * Parse an input stream.
    +   *
    +   * Lower-level loading function. This requires an input stream instead 
    +   * of a string, file, or resource.
    +   */
    +  public static function parse(\HTML5\Parser\InputStream $input) {
    +    $events = new DOMTreeBuilder();
    +    $scanner = new Scanner($input);
    +    $parser = new Tokenizer($scanner, $events);
    +
    +    $parser->parse();
    +
    +    return $events->document();
    +  }
    +
    +  /**
    +   * Parse an input stream where the stream is a fragment.
    +   *
    +   * Lower-level loading function. This requires an input stream instead 
    +   * of a string, file, or resource.
    +   */
    +  public static function parseFragment(\HTML5\Parser\InputStream $input) {
    +    $events = new DOMTreeBuilder(TRUE);
    +    $scanner = new Scanner($input);
    +    $parser = new Tokenizer($scanner, $events);
    +
    +    $parser->parse();
    +
    +    return $events->fragment();
    +  }
    +
    +  /**
    +   * Get the default options.
    +   *
    +   * @return array
    +   *   The default options.
    +   */
    +  public static function options() {
    +    return static::$options;
    +  }
    +
    +  /**
    +   * Set a default option.
    +   *
    +   * @param string $name
    +   *   The option name.
    +   * @param mixed $value
    +   *   The option value.
    +   */
    +  public static function setOption($name, $value) {
    +    static::$options[$name] = $value;
    +  }
    +
    +}
    diff --git a/libraries/html5php/HTML5/Elements.php b/libraries/html5php/HTML5/Elements.php
    new file mode 100644
    index 0000000..69d3882
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Elements.php
    @@ -0,0 +1,614 @@
    + 1,
    +    "abbr" => 1,
    +    "address" => 89, // NORMAL | VOID_TAG |  AUTOCLOSE_P | BLOCK_TAG
    +    "area" => 9,  // NORMAL | VOID_TAG
    +    "article" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "aside" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "audio" => 65, // NORMAL | BLOCK_TAG
    +    "b" => 1,
    +    "base" => 9, // NORMAL | VOID_TAG
    +    "bdi" => 1,
    +    "bdo" => 1,
    +    "blockquote" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "body" => 1,
    +    "br" => 9, // NORMAL | VOID_TAG
    +    "button" => 1,
    +    "canvas" => 65, // NORMAL | BLOCK_TAG
    +    "caption" => 1,
    +    "cite" => 1,
    +    "code" => 1,
    +    "col" => 9,  // NORMAL | VOID_TAG
    +    "colgroup" => 1,
    +    "command" => 9, // NORMAL | VOID_TAG
    +    //"data" => 1,    // This is highly experimental and only part of the whatwg spec (not w3c). See https://developer.mozilla.org/en-US/docs/HTML/Element/data
    +    "datalist" => 1,
    +    "dd" => 65, // NORMAL | BLOCK_TAG 
    +    "del" => 1,
    +    "details" => 17, // NORMAL | AUTOCLOSE_P,
    +    "dfn" => 1,
    +    "dialog" => 17, // NORMAL | AUTOCLOSE_P,
    +    "div" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "dl" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "dt" => 1,
    +    "em" => 1,
    +    "embed" => 9, // NORMAL | VOID_TAG
    +    "fieldset" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "figcaption" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "figure" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "footer" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "form" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h1" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h2" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h3" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h4" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h5" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "h6" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "head" => 1,
    +    "header" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "hgroup" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "hr" => 73, // NORMAL | VOID_TAG | BLOCK_TAG
    +    "html" => 1,
    +    "i" => 1,
    +    "iframe" => 3, // NORMAL | TEXT_RAW
    +    "img" => 9, // NORMAL | VOID_TAG
    +    "input" => 9, // NORMAL | VOID_TAG
    +    "kbd" => 1,
    +    "ins" => 1,
    +    "keygen" => 9, // NORMAL | VOID_TAG
    +    "label" => 1,
    +    "legend" => 1,
    +    "li" => 1,
    +    "link" => 9, // NORMAL | VOID_TAG
    +    "map" => 1,
    +    "mark" => 1,
    +    "menu" => 17, // NORMAL | AUTOCLOSE_P,
    +    "meta" => 9, // NORMAL | VOID_TAG
    +    "meter" => 1,
    +    "nav" => 17, // NORMAL | AUTOCLOSE_P,
    +    "noscript" => 67, // NORMAL | TEXT_RAW | BLOCK_TAG
    +    "object" => 1,
    +    "ol" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "optgroup" => 1,
    +    "option" => 1,
    +    "output" => 65, // NORMAL | BLOCK_TAG
    +    "p" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "param" => 9, // NORMAL | VOID_TAG
    +    "pre" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "progress" => 1,
    +    "q" => 1,
    +    "rp" => 1,
    +    "rt" => 1,
    +    "ruby" => 1,
    +    "s" => 1,
    +    "samp" => 1,
    +    "script" => 3, // NORMAL | TEXT_RAW
    +    "section" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "select" => 1,
    +    "small" => 1,
    +    "source" => 9, // NORMAL | VOID_TAG
    +    "span" => 1,
    +    "strong" => 1,
    +    "style" => 1,
    +    "sub" => 1,
    +    "summary" => 17, // NORMAL | AUTOCLOSE_P,
    +    "sup" => 1,
    +    "table" => 65, // NORMAL | BLOCK_TAG
    +    "tbody" => 1,
    +    "td" => 1,
    +    "textarea" => 5, // NORMAL | TEXT_RCDATA
    +    "tfoot" => 65, // NORMAL | BLOCK_TAG
    +    "th" => 1,
    +    "thead" => 1,
    +    "time" => 1,
    +    "title" => 5, // NORMAL | TEXT_RCDATA
    +    "tr" => 1,
    +    "track" => 9, // NORMAL | VOID_TAG
    +    "u" => 1,
    +    "ul" => 81, // NORMAL | AUTOCLOSE_P | BLOCK_TAG
    +    "var" => 1,
    +    "video" => 65, // NORMAL | BLOCK_TAG
    +    "wbr" => 9, // NORMAL | VOID_TAG
    +
    +    // Legacy?
    +    'basefont' => 8, // VOID_TAG
    +    'bgsound' => 8, // VOID_TAG
    +    'noframes' => 2, // RAW_TEXT
    +    'frame' => 9,  // NORMAL | VOID_TAG
    +    'frameset' => 1,
    +    'center' => 16, 'dir' => 16, 'listing' => 16, // AUTOCLOSE_P
    +    'plaintext' => 48, // AUTOCLOSE_P | TEXT_PLAINTEXT
    +    'applet' => 0,
    +    'marquee' => 0,
    +    'isindex' => 8, // VOID_TAG
    +    'xmp' => 20, // AUTOCLOSE_P | VOID_TAG | RAW_TEXT
    +    'noembed' => 2, // RAW_TEXT
    +  );
    +
    +  /**
    +   * The MathML elements. See http://www.w3.org/wiki/MathML/Elements.
    +   *
    +   * In our case we are only concerned with presentation MathML and not content
    +   * MathML. There is a nice list of this subset at https://developer.mozilla.org/en-US/docs/MathML/Element.
    +   * 
    +   * @var array
    +   */
    +  public static $mathml = array(
    +    "maction" => 1,
    +    "maligngroup" => 1,
    +    "malignmark" => 1,
    +    "math" => 1,
    +    "menclose" => 1,
    +    "merror" => 1,
    +    "mfenced" => 1,
    +    "mfrac" => 1,
    +    "mglyph" => 1,
    +    "mi" => 1,
    +    "mlabeledtr" => 1,
    +    "mlongdiv" => 1,
    +    "mmultiscripts" => 1,
    +    "mn" => 1,
    +    "mo" => 1,
    +    "mover" => 1,
    +    "mpadded" => 1,
    +    "mphantom" => 1,
    +    "mroot" => 1,
    +    "mrow" => 1,
    +    "ms" => 1,
    +    "mscarries" => 1,
    +    "mscarry" => 1,
    +    "msgroup" => 1,
    +    "msline" => 1,
    +    "mspace" => 1,
    +    "msqrt" => 1,
    +    "msrow" => 1,
    +    "mstack" => 1,
    +    "mstyle" => 1,
    +    "msub" => 1,
    +    "msup" => 1,
    +    "msubsup" => 1,
    +    "mtable" => 1,
    +    "mtd" => 1,
    +    "mtext" => 1,
    +    "mtr" => 1,
    +    "munder" => 1,
    +    "munderover" => 1,
    +  );
    +
    +  /**
    +   * The svg elements.
    +   *
    +   * The Mozilla documentation has a good list at https://developer.mozilla.org/en-US/docs/SVG/Element.
    +   * The w3c list appears to be lacking in some areas like filter effect elements.
    +   * That list can be found at http://www.w3.org/wiki/SVG/Elements.
    +   *
    +   * Note, FireFox appears to do a better job rendering filter effects than chrome.
    +   * While they are in the spec I'm not sure how widely implemented they are.
    +   *
    +   * @var array
    +   */
    +  public static $svg = array(
    +    "a" => 1,
    +    "altGlyph" => 1,
    +    "altGlyphDef" => 1,
    +    "altGlyphItem" => 1,
    +    "animate" => 1,
    +    "animateColor" => 1,
    +    "animateMotion" => 1,
    +    "animateTransform" => 1,
    +    "circle" => 1,
    +    "clipPath" => 1,
    +    "color-profile" => 1,
    +    "cursor" => 1,
    +    "defs" => 1,
    +    "desc" => 1,
    +    "ellipse" => 1,
    +    "feBlend" => 1,
    +    "feColorMatrix" => 1,
    +    "feComponentTransfer" => 1,
    +    "feComposite" => 1,
    +    "feConvolveMatrix" => 1,
    +    "feDiffuseLighting" => 1,
    +    "feDisplacementMap" => 1,
    +    "feDistantLight" => 1,
    +    "feFlood" => 1,
    +    "feFuncA" => 1,
    +    "feFuncB" => 1,
    +    "feFuncG" => 1,
    +    "feFuncR" => 1,
    +    "feGaussianBlur" => 1,
    +    "feImage" => 1,
    +    "feMerge" => 1,
    +    "feMergeNode" => 1,
    +    "feMorphology" => 1,
    +    "feOffset" => 1,
    +    "fePointLight" => 1,
    +    "feSpecularLighting" => 1,
    +    "feSpotLight" => 1,
    +    "feTile" => 1,
    +    "feTurbulence" => 1,
    +    "filter" => 1,
    +    "font" => 1,
    +    "font-face" => 1,
    +    "font-face-format" => 1,
    +    "font-face-name" => 1,
    +    "font-face-src" => 1,
    +    "font-face-uri" => 1,
    +    "foreignObject" => 1,
    +    "g" => 1,
    +    "glyph" => 1,
    +    "glyphRef" => 1,
    +    "hkern" => 1,
    +    "image" => 1,
    +    "line" => 1,
    +    "linearGradient" => 1,
    +    "marker" => 1,
    +    "mask" => 1,
    +    "metadata" => 1,
    +    "missing-glyph" => 1,
    +    "mpath" => 1,
    +    "path" => 1,
    +    "pattern" => 1,
    +    "polygon" => 1,
    +    "polyline" => 1,
    +    "radialGradient" => 1,
    +    "rect" => 1,
    +    "script" => 3, // NORMAL | RAW_TEXT
    +    "set" => 1,
    +    "stop" => 1,
    +    "style" => 3, // NORMAL | RAW_TEXT
    +    "svg" => 1,
    +    "switch" => 1,
    +    "symbol" => 1,
    +    "text" => 1,
    +    "textPath" => 1,
    +    "title" => 1,
    +    "tref" => 1,
    +    "tspan" => 1,
    +    "use" => 1,
    +    "view" => 1,
    +    "vkern" => 1,
    +  );
    +
    +  /**
    +   * Some attributes in SVG are case sensetitive.
    +   *
    +   * This map contains key/value pairs with the key as the lowercase attribute
    +   * name and the value with the correct casing.
    +   */
    +  public static $svgCaseSensitiveAttributeMap = array(
    +    'attributename' => 'attributeName',
    +    'attributetype' => 'attributeType',
    +    'basefrequency' => 'baseFrequency',
    +    'baseprofile' => 'baseProfile',
    +    'calcmode' => 'calcMode',
    +    'clippathunits' => 'clipPathUnits',
    +    'contentscripttype' => 'contentScriptType',
    +    'contentstyletype' => 'contentStyleType',
    +    'diffuseconstant' => 'diffuseConstant',
    +    'edgemode' => 'edgeMode',
    +    'externalresourcesrequired' => 'externalResourcesRequired',
    +    'filterres' => 'filterRes',
    +    'filterunits' => 'filterUnits',
    +    'glyphref' => 'glyphRef',
    +    'gradienttransform' => 'gradientTransform',
    +    'gradientunits' => 'gradientUnits',
    +    'kernelmatrix' => 'kernelMatrix',
    +    'kernelunitlength' => 'kernelUnitLength',
    +    'keypoints' => 'keyPoints',
    +    'keysplines' => 'keySplines',
    +    'keytimes' => 'keyTimes',
    +    'lengthadjust' => 'lengthAdjust',
    +    'limitingconeangle' => 'limitingConeAngle',
    +    'markerheight' => 'markerHeight',
    +    'markerunits' => 'markerUnits',
    +    'markerwidth' => 'markerWidth',
    +    'maskcontentunits' => 'maskContentUnits',
    +    'maskunits' => 'maskUnits',
    +    'numoctaves' => 'numOctaves',
    +    'pathlength' => 'pathLength',
    +    'patterncontentunits' => 'patternContentUnits',
    +    'patterntransform' => 'patternTransform',
    +    'patternunits' => 'patternUnits',
    +    'pointsatx' => 'pointsAtX',
    +    'pointsaty' => 'pointsAtY',
    +    'pointsatz' => 'pointsAtZ',
    +    'preservealpha' => 'preserveAlpha',
    +    'preserveaspectratio' => 'preserveAspectRatio',
    +    'primitiveunits' => 'primitiveUnits',
    +    'refx' => 'refX',
    +    'refy' => 'refY',
    +    'repeatcount' => 'repeatCount',
    +    'repeatdur' => 'repeatDur',
    +    'requiredextensions' => 'requiredExtensions',
    +    'requiredfeatures' => 'requiredFeatures',
    +    'specularconstant' => 'specularConstant',
    +    'specularexponent' => 'specularExponent',
    +    'spreadmethod' => 'spreadMethod',
    +    'startoffset' => 'startOffset',
    +    'stddeviation' => 'stdDeviation',
    +    'stitchtiles' => 'stitchTiles',
    +    'surfacescale' => 'surfaceScale',
    +    'systemlanguage' => 'systemLanguage',
    +    'tablevalues' => 'tableValues',
    +    'targetx' => 'targetX',
    +    'targety' => 'targetY',
    +    'textlength' => 'textLength',
    +    'viewbox' => 'viewBox',
    +    'viewtarget' => 'viewTarget',
    +    'xchannelselector' => 'xChannelSelector',
    +    'ychannelselector' => 'yChannelSelector',
    +    'zoomandpan' => 'zoomAndPan',
    +  );
    +
    +  /**
    +   * Some SVG elements are case sensetitive. This map contains these.
    +   *
    +   * The map contains key/value store of the name is lowercase as the keys and
    +   * the correct casing as the value.
    +   */
    +  public static $svgCaseSensitiveElementMap = array(
    +    'altglyph' => 'altGlyph',
    +    'altglyphdef' => 'altGlyphDef',
    +    'altglyphitem' => 'altGlyphItem',
    +    'animatecolor' => 'animateColor',
    +    'animatemotion' => 'animateMotion',
    +    'animatetransform' => 'animateTransform',
    +    'clippath' => 'clipPath',
    +    'feblend' => 'feBlend',
    +    'fecolormatrix' => 'feColorMatrix',
    +    'fecomponenttransfer' => 'feComponentTransfer',
    +    'fecomposite' => 'feComposite',
    +    'feconvolvematrix' => 'feConvolveMatrix',
    +    'fediffuselighting' => 'feDiffuseLighting',
    +    'fedisplacementmap' => 'feDisplacementMap',
    +    'fedistantlight' => 'feDistantLight',
    +    'feflood' => 'feFlood',
    +    'fefunca' => 'feFuncA',
    +    'fefuncb' => 'feFuncB',
    +    'fefuncg' => 'feFuncG',
    +    'fefuncr' => 'feFuncR',
    +    'fegaussianblur' => 'feGaussianBlur',
    +    'feimage' => 'feImage',
    +    'femerge' => 'feMerge',
    +    'femergenode' => 'feMergeNode',
    +    'femorphology' => 'feMorphology',
    +    'feoffset' => 'feOffset',
    +    'fepointlight' => 'fePointLight',
    +    'fespecularlighting' => 'feSpecularLighting',
    +    'fespotlight' => 'feSpotLight',
    +    'fetile' => 'feTile',
    +    'feturbulence' => 'feTurbulence',
    +    'foreignobject' => 'foreignObject',
    +    'glyphref' => 'glyphRef',
    +    'lineargradient' => 'linearGradient',
    +    'radialgradient' => 'radialGradient',
    +    'textpath' => 'textPath',
    +  );
    +
    +  /**
    +   * Check whether the given element meets the given criterion.
    +   *
    +   * Example:
    +   *
    +   *     Elements::isA('script', Elements::TEXT_RAW); // Returns true.
    +   *
    +   *     Elements::isA('script', Elements::TEXT_RCDATA); // Returns false.
    +   *
    +   * @param string $name
    +   *   The element name.
    +   * @param int $mask
    +   *   One of the constants on this class.
    +   * @return boolean
    +   *   TRUE if the element matches the mask, FALSE otherwise.
    +   */
    +  public static function isA($name, $mask) {
    +    if (!static::isElement($name)) {
    +      return FALSE;
    +    }
    +
    +    return (static::element($name) & $mask) == $mask;
    +  }
    +
    +  /**
    +   * Test if an element is a valid html5 element.
    +   *
    +   * @param string $name
    +   *   The name of the element.
    +   *
    +   * @return bool
    +   *   True if a html5 element and false otherwise.
    +   */
    +  public static function isHtml5Element($name) {
    +
    +    // html5 element names are case insensetitive. Forcing lowercase for the check.
    +    // Do we need this check or will all data passed here already be lowercase?
    +    return isset(static::$html5[strtolower($name)]);
    +  }
    +
    +  /**
    +   * Test if an element name is a valid MathML presentation element.
    +   *
    +   * @param string $name
    +   *   The name of the element.
    +   *
    +   * @return bool
    +   *   True if a MathML name and false otherwise.
    +   */
    +  public static function isMathMLElement($name) {
    +
    +    // MathML is case-sensetitive unlike html5 elements.
    +    return isset(static::$mathml[$name]);
    +  }
    +
    +  /**
    +   * Test if an element is a valid SVG element.
    +   *
    +   * @param string $name
    +   *   The name of the element.
    +   *
    +   * @return boolean
    +   *   True if a SVG element and false otherise.
    +   */
    +  public static function isSvgElement($name) {
    +
    +    // SVG is case-sensetitive unlike html5 elements.
    +    return isset(static::$svg[$name]);
    +  }
    +
    +  /**
    +   * Is an element name valid in an html5 document.
    +   *
    +   * This includes html5 elements along with other allowed embedded content
    +   * such as svg and mathml.
    +   * 
    +   * @param string $name 
    +   *   The name of the element.
    +   *
    +   * @return bool
    +   *   True if valid and false otherwise.
    +   */
    +  public static function isElement($name) {
    +    return static::isHtml5Element($name) || static::isMathMLElement($name) || static::isSvgElement($name);
    +  }
    +
    +  /**
    +   * Get the element mask for the given element name.
    +   *
    +   * @param string $name
    +   *   The name of the element.
    +   *
    +   * @return int
    +   *   The element mask.
    +   */
    +  public static function element($name) {
    +    if (isset(static::$html5[$name])) {
    +      return static::$html5[$name];
    +    }
    +    if (isset(static::$svg[$name])) {
    +      return static::$svg[$name];
    +    }
    +    if (isset(static::$mathml[$name])) {
    +      return static::$mathml[$name];
    +    }
    +
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Normalize a SVG element name to its proper case and form.
    +   *
    +   * @param string $name
    +   *   The name of the element.
    +   *
    +   * @return string
    +   *   The normalized form of the element name.
    +   */
    +  public static function normalizeSvgElement($name) {
    +    $name = strtolower($name);
    +    if (isset(static::$svgCaseSensitiveElementMap[$name])) {
    +      $name = static::$svgCaseSensitiveElementMap[$name];
    +    }
    +
    +    return $name;
    +  }
    +
    +  /**
    +   * Normalize a SVG attribute name to its proper case and form.
    +   *
    +   * @param string $name
    +   *   The name of the attribute.
    +   *
    +   * @return string
    +   *   The normalized form of the attribute name.
    +   */
    +  public static function normalizeSvgAttribute($name) {
    +    $name = strtolower($name);
    +    if (isset(static::$svgCaseSensitiveAttributeMap[$name])) {
    +      $name = static::$svgCaseSensitiveAttributeMap[$name];
    +    }
    +
    +    return $name;
    +  }
    +
    +  /**
    +   * Normalize a MathML attribute name to its proper case and form.
    +   *
    +   * Note, all MathML element names are lowercase.
    +   * 
    +   * @param string $name
    +   *   The name of the attribute.
    +   *
    +   * @return string
    +   *   The normalized form of the attribute name.
    +   */
    +  public static function normalizeMathMlAttribute($name) {
    +    $name = strtolower($name);
    +
    +    // Only one attribute has a mixed case form for MathML.
    +    if ($name == 'definitionurl') {
    +      $name = 'definitionURL';
    +    }
    +
    +    return $name;
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Entities.php b/libraries/html5php/HTML5/Entities.php
    new file mode 100644
    index 0000000..e5de9e2
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Entities.php
    @@ -0,0 +1,2230 @@
    + 'Á',
    +  'Aacut' => 'Á',
    +  'aacute' => 'á',
    +  'aacut' => 'á',
    +  'Abreve' => 'Ă',
    +  'abreve' => 'ă',
    +  'ac' => '∾',
    +  'acd' => '∿',
    +  'acE' => '∾̳',
    +  'Acirc' => 'Â',
    +  'Acir' => 'Â',
    +  'acirc' => 'â',
    +  'acir' => 'â',
    +  'acute' => '´',
    +  'acut' => '´',
    +  'Acy' => 'А',
    +  'acy' => 'а',
    +  'AElig' => 'Æ',
    +  'AEli' => 'Æ',
    +  'aelig' => 'æ',
    +  'aeli' => 'æ',
    +  'af' => '⁡',
    +  'Afr' => '𝔄',
    +  'afr' => '𝔞',
    +  'Agrave' => 'À',
    +  'Agrav' => 'À',
    +  'agrave' => 'à',
    +  'agrav' => 'à',
    +  'alefsym' => 'ℵ',
    +  'aleph' => 'ℵ',
    +  'Alpha' => 'Α',
    +  'alpha' => 'α',
    +  'Amacr' => 'Ā',
    +  'amacr' => 'ā',
    +  'amalg' => '⨿',
    +  'AMP' => '&',
    +  'AM' => '&',
    +  'amp' => '&',
    +  'am' => '&',
    +  'And' => '⩓',
    +  'and' => '∧',
    +  'andand' => '⩕',
    +  'andd' => '⩜',
    +  'andslope' => '⩘',
    +  'andv' => '⩚',
    +  'ang' => '∠',
    +  'ange' => '⦤',
    +  'angle' => '∠',
    +  'angmsd' => '∡',
    +  'angmsdaa' => '⦨',
    +  'angmsdab' => '⦩',
    +  'angmsdac' => '⦪',
    +  'angmsdad' => '⦫',
    +  'angmsdae' => '⦬',
    +  'angmsdaf' => '⦭',
    +  'angmsdag' => '⦮',
    +  'angmsdah' => '⦯',
    +  'angrt' => '∟',
    +  'angrtvb' => '⊾',
    +  'angrtvbd' => '⦝',
    +  'angsph' => '∢',
    +  'angst' => 'Å',
    +  'angzarr' => '⍼',
    +  'Aogon' => 'Ą',
    +  'aogon' => 'ą',
    +  'Aopf' => '𝔸',
    +  'aopf' => '𝕒',
    +  'ap' => '≈',
    +  'apacir' => '⩯',
    +  'apE' => '⩰',
    +  'ape' => '≊',
    +  'apid' => '≋',
    +  'apos' => '\'',
    +  'ApplyFunction' => '⁡',
    +  'approx' => '≈',
    +  'approxeq' => '≊',
    +  'Aring' => 'Å',
    +  'Arin' => 'Å',
    +  'aring' => 'å',
    +  'arin' => 'å',
    +  'Ascr' => '𝒜',
    +  'ascr' => '𝒶',
    +  'Assign' => '≔',
    +  'ast' => '*',
    +  'asymp' => '≈',
    +  'asympeq' => '≍',
    +  'Atilde' => 'Ã',
    +  'Atild' => 'Ã',
    +  'atilde' => 'ã',
    +  'atild' => 'ã',
    +  'Auml' => 'Ä',
    +  'Aum' => 'Ä',
    +  'auml' => 'ä',
    +  'aum' => 'ä',
    +  'awconint' => '∳',
    +  'awint' => '⨑',
    +  'backcong' => '≌',
    +  'backepsilon' => '϶',
    +  'backprime' => '‵',
    +  'backsim' => '∽',
    +  'backsimeq' => '⋍',
    +  'Backslash' => '∖',
    +  'Barv' => '⫧',
    +  'barvee' => '⊽',
    +  'Barwed' => '⌆',
    +  'barwed' => '⌅',
    +  'barwedge' => '⌅',
    +  'bbrk' => '⎵',
    +  'bbrktbrk' => '⎶',
    +  'bcong' => '≌',
    +  'Bcy' => 'Б',
    +  'bcy' => 'б',
    +  'bdquo' => '„',
    +  'becaus' => '∵',
    +  'Because' => '∵',
    +  'because' => '∵',
    +  'bemptyv' => '⦰',
    +  'bepsi' => '϶',
    +  'bernou' => 'ℬ',
    +  'Bernoullis' => 'ℬ',
    +  'Beta' => 'Β',
    +  'beta' => 'β',
    +  'beth' => 'ℶ',
    +  'between' => '≬',
    +  'Bfr' => '𝔅',
    +  'bfr' => '𝔟',
    +  'bigcap' => '⋂',
    +  'bigcirc' => '◯',
    +  'bigcup' => '⋃',
    +  'bigodot' => '⨀',
    +  'bigoplus' => '⨁',
    +  'bigotimes' => '⨂',
    +  'bigsqcup' => '⨆',
    +  'bigstar' => '★',
    +  'bigtriangledown' => '▽',
    +  'bigtriangleup' => '△',
    +  'biguplus' => '⨄',
    +  'bigvee' => '⋁',
    +  'bigwedge' => '⋀',
    +  'bkarow' => '⤍',
    +  'blacklozenge' => '⧫',
    +  'blacksquare' => '▪',
    +  'blacktriangle' => '▴',
    +  'blacktriangledown' => '▾',
    +  'blacktriangleleft' => '◂',
    +  'blacktriangleright' => '▸',
    +  'blank' => '␣',
    +  'blk12' => '▒',
    +  'blk14' => '░',
    +  'blk34' => '▓',
    +  'block' => '█',
    +  'bne' => '=⃥',
    +  'bnequiv' => '≡⃥',
    +  'bNot' => '⫭',
    +  'bnot' => '⌐',
    +  'Bopf' => '𝔹',
    +  'bopf' => '𝕓',
    +  'bot' => '⊥',
    +  'bottom' => '⊥',
    +  'bowtie' => '⋈',
    +  'boxbox' => '⧉',
    +  'boxDL' => '╗',
    +  'boxDl' => '╖',
    +  'boxdL' => '╕',
    +  'boxdl' => '┐',
    +  'boxDR' => '╔',
    +  'boxDr' => '╓',
    +  'boxdR' => '╒',
    +  'boxdr' => '┌',
    +  'boxH' => '═',
    +  'boxh' => '─',
    +  'boxHD' => '╦',
    +  'boxHd' => '╤',
    +  'boxhD' => '╥',
    +  'boxhd' => '┬',
    +  'boxHU' => '╩',
    +  'boxHu' => '╧',
    +  'boxhU' => '╨',
    +  'boxhu' => '┴',
    +  'boxminus' => '⊟',
    +  'boxplus' => '⊞',
    +  'boxtimes' => '⊠',
    +  'boxUL' => '╝',
    +  'boxUl' => '╜',
    +  'boxuL' => '╛',
    +  'boxul' => '┘',
    +  'boxUR' => '╚',
    +  'boxUr' => '╙',
    +  'boxuR' => '╘',
    +  'boxur' => '└',
    +  'boxV' => '║',
    +  'boxv' => '│',
    +  'boxVH' => '╬',
    +  'boxVh' => '╫',
    +  'boxvH' => '╪',
    +  'boxvh' => '┼',
    +  'boxVL' => '╣',
    +  'boxVl' => '╢',
    +  'boxvL' => '╡',
    +  'boxvl' => '┤',
    +  'boxVR' => '╠',
    +  'boxVr' => '╟',
    +  'boxvR' => '╞',
    +  'boxvr' => '├',
    +  'bprime' => '‵',
    +  'Breve' => '˘',
    +  'breve' => '˘',
    +  'brvbar' => '¦',
    +  'brvba' => '¦',
    +  'Bscr' => 'ℬ',
    +  'bscr' => '𝒷',
    +  'bsemi' => '⁏',
    +  'bsim' => '∽',
    +  'bsime' => '⋍',
    +  'bsol' => '\\',
    +  'bsolb' => '⧅',
    +  'bsolhsub' => '⟈',
    +  'bull' => '•',
    +  'bullet' => '•',
    +  'bump' => '≎',
    +  'bumpE' => '⪮',
    +  'bumpe' => '≏',
    +  'Bumpeq' => '≎',
    +  'bumpeq' => '≏',
    +  'Cacute' => 'Ć',
    +  'cacute' => 'ć',
    +  'Cap' => '⋒',
    +  'cap' => '∩',
    +  'capand' => '⩄',
    +  'capbrcup' => '⩉',
    +  'capcap' => '⩋',
    +  'capcup' => '⩇',
    +  'capdot' => '⩀',
    +  'CapitalDifferentialD' => 'ⅅ',
    +  'caps' => '∩︀',
    +  'caret' => '⁁',
    +  'caron' => 'ˇ',
    +  'Cayleys' => 'ℭ',
    +  'ccaps' => '⩍',
    +  'Ccaron' => 'Č',
    +  'ccaron' => 'č',
    +  'Ccedil' => 'Ç',
    +  'Ccedi' => 'Ç',
    +  'ccedil' => 'ç',
    +  'ccedi' => 'ç',
    +  'Ccirc' => 'Ĉ',
    +  'ccirc' => 'ĉ',
    +  'Cconint' => '∰',
    +  'ccups' => '⩌',
    +  'ccupssm' => '⩐',
    +  'Cdot' => 'Ċ',
    +  'cdot' => 'ċ',
    +  'cedil' => '¸',
    +  'cedi' => '¸',
    +  'Cedilla' => '¸',
    +  'cemptyv' => '⦲',
    +  'cent' => '¢',
    +  'cen' => '¢',
    +  'CenterDot' => '·',
    +  'centerdot' => '·',
    +  'Cfr' => 'ℭ',
    +  'cfr' => '𝔠',
    +  'CHcy' => 'Ч',
    +  'chcy' => 'ч',
    +  'check' => '✓',
    +  'checkmark' => '✓',
    +  'Chi' => 'Χ',
    +  'chi' => 'χ',
    +  'cir' => '○',
    +  'circ' => 'ˆ',
    +  'circeq' => '≗',
    +  'circlearrowleft' => '↺',
    +  'circlearrowright' => '↻',
    +  'circledast' => '⊛',
    +  'circledcirc' => '⊚',
    +  'circleddash' => '⊝',
    +  'CircleDot' => '⊙',
    +  'circledR' => '®',
    +  'circledS' => 'Ⓢ',
    +  'CircleMinus' => '⊖',
    +  'CirclePlus' => '⊕',
    +  'CircleTimes' => '⊗',
    +  'cirE' => '⧃',
    +  'cire' => '≗',
    +  'cirfnint' => '⨐',
    +  'cirmid' => '⫯',
    +  'cirscir' => '⧂',
    +  'ClockwiseContourIntegral' => '∲',
    +  'CloseCurlyDoubleQuote' => '”',
    +  'CloseCurlyQuote' => '’',
    +  'clubs' => '♣',
    +  'clubsuit' => '♣',
    +  'Colon' => '∷',
    +  'colon' => ':',
    +  'Colone' => '⩴',
    +  'colone' => '≔',
    +  'coloneq' => '≔',
    +  'comma' => ',',
    +  'commat' => '@',
    +  'comp' => '∁',
    +  'compfn' => '∘',
    +  'complement' => '∁',
    +  'complexes' => 'ℂ',
    +  'cong' => '≅',
    +  'congdot' => '⩭',
    +  'Congruent' => '≡',
    +  'Conint' => '∯',
    +  'conint' => '∮',
    +  'ContourIntegral' => '∮',
    +  'Copf' => 'ℂ',
    +  'copf' => '𝕔',
    +  'coprod' => '∐',
    +  'Coproduct' => '∐',
    +  'COPY' => '©',
    +  'COP' => '©',
    +  'copy' => '©',
    +  'cop' => '©',
    +  'copysr' => '℗',
    +  'CounterClockwiseContourIntegral' => '∳',
    +  'crarr' => '↵',
    +  'Cross' => '⨯',
    +  'cross' => '✗',
    +  'Cscr' => '𝒞',
    +  'cscr' => '𝒸',
    +  'csub' => '⫏',
    +  'csube' => '⫑',
    +  'csup' => '⫐',
    +  'csupe' => '⫒',
    +  'ctdot' => '⋯',
    +  'cudarrl' => '⤸',
    +  'cudarrr' => '⤵',
    +  'cuepr' => '⋞',
    +  'cuesc' => '⋟',
    +  'cularr' => '↶',
    +  'cularrp' => '⤽',
    +  'Cup' => '⋓',
    +  'cup' => '∪',
    +  'cupbrcap' => '⩈',
    +  'CupCap' => '≍',
    +  'cupcap' => '⩆',
    +  'cupcup' => '⩊',
    +  'cupdot' => '⊍',
    +  'cupor' => '⩅',
    +  'cups' => '∪︀',
    +  'curarr' => '↷',
    +  'curarrm' => '⤼',
    +  'curlyeqprec' => '⋞',
    +  'curlyeqsucc' => '⋟',
    +  'curlyvee' => '⋎',
    +  'curlywedge' => '⋏',
    +  'curren' => '¤',
    +  'curre' => '¤',
    +  'curvearrowleft' => '↶',
    +  'curvearrowright' => '↷',
    +  'cuvee' => '⋎',
    +  'cuwed' => '⋏',
    +  'cwconint' => '∲',
    +  'cwint' => '∱',
    +  'cylcty' => '⌭',
    +  'Dagger' => '‡',
    +  'dagger' => '†',
    +  'daleth' => 'ℸ',
    +  'Darr' => '↡',
    +  'dArr' => '⇓',
    +  'darr' => '↓',
    +  'dash' => '‐',
    +  'Dashv' => '⫤',
    +  'dashv' => '⊣',
    +  'dbkarow' => '⤏',
    +  'dblac' => '˝',
    +  'Dcaron' => 'Ď',
    +  'dcaron' => 'ď',
    +  'Dcy' => 'Д',
    +  'dcy' => 'д',
    +  'DD' => 'ⅅ',
    +  'dd' => 'ⅆ',
    +  'ddagger' => '‡',
    +  'ddarr' => '⇊',
    +  'DDotrahd' => '⤑',
    +  'ddotseq' => '⩷',
    +  'deg' => '°',
    +  'de' => '°',
    +  'Del' => '∇',
    +  'Delta' => 'Δ',
    +  'delta' => 'δ',
    +  'demptyv' => '⦱',
    +  'dfisht' => '⥿',
    +  'Dfr' => '𝔇',
    +  'dfr' => '𝔡',
    +  'dHar' => '⥥',
    +  'dharl' => '⇃',
    +  'dharr' => '⇂',
    +  'DiacriticalAcute' => '´',
    +  'DiacriticalDot' => '˙',
    +  'DiacriticalDoubleAcute' => '˝',
    +  'DiacriticalGrave' => '`',
    +  'DiacriticalTilde' => '˜',
    +  'diam' => '⋄',
    +  'Diamond' => '⋄',
    +  'diamond' => '⋄',
    +  'diamondsuit' => '♦',
    +  'diams' => '♦',
    +  'die' => '¨',
    +  'DifferentialD' => 'ⅆ',
    +  'digamma' => 'ϝ',
    +  'disin' => '⋲',
    +  'div' => '÷',
    +  'divide' => '÷',
    +  'divid' => '÷',
    +  'divideontimes' => '⋇',
    +  'divonx' => '⋇',
    +  'DJcy' => 'Ђ',
    +  'djcy' => 'ђ',
    +  'dlcorn' => '⌞',
    +  'dlcrop' => '⌍',
    +  'dollar' => '$',
    +  'Dopf' => '𝔻',
    +  'dopf' => '𝕕',
    +  'Dot' => '¨',
    +  'dot' => '˙',
    +  'DotDot' => '⃜',
    +  'doteq' => '≐',
    +  'doteqdot' => '≑',
    +  'DotEqual' => '≐',
    +  'dotminus' => '∸',
    +  'dotplus' => '∔',
    +  'dotsquare' => '⊡',
    +  'doublebarwedge' => '⌆',
    +  'DoubleContourIntegral' => '∯',
    +  'DoubleDot' => '¨',
    +  'DoubleDownArrow' => '⇓',
    +  'DoubleLeftArrow' => '⇐',
    +  'DoubleLeftRightArrow' => '⇔',
    +  'DoubleLeftTee' => '⫤',
    +  'DoubleLongLeftArrow' => '⟸',
    +  'DoubleLongLeftRightArrow' => '⟺',
    +  'DoubleLongRightArrow' => '⟹',
    +  'DoubleRightArrow' => '⇒',
    +  'DoubleRightTee' => '⊨',
    +  'DoubleUpArrow' => '⇑',
    +  'DoubleUpDownArrow' => '⇕',
    +  'DoubleVerticalBar' => '∥',
    +  'DownArrow' => '↓',
    +  'Downarrow' => '⇓',
    +  'downarrow' => '↓',
    +  'DownArrowBar' => '⤓',
    +  'DownArrowUpArrow' => '⇵',
    +  'DownBreve' => '̑',
    +  'downdownarrows' => '⇊',
    +  'downharpoonleft' => '⇃',
    +  'downharpoonright' => '⇂',
    +  'DownLeftRightVector' => '⥐',
    +  'DownLeftTeeVector' => '⥞',
    +  'DownLeftVector' => '↽',
    +  'DownLeftVectorBar' => '⥖',
    +  'DownRightTeeVector' => '⥟',
    +  'DownRightVector' => '⇁',
    +  'DownRightVectorBar' => '⥗',
    +  'DownTee' => '⊤',
    +  'DownTeeArrow' => '↧',
    +  'drbkarow' => '⤐',
    +  'drcorn' => '⌟',
    +  'drcrop' => '⌌',
    +  'Dscr' => '𝒟',
    +  'dscr' => '𝒹',
    +  'DScy' => 'Ѕ',
    +  'dscy' => 'ѕ',
    +  'dsol' => '⧶',
    +  'Dstrok' => 'Đ',
    +  'dstrok' => 'đ',
    +  'dtdot' => '⋱',
    +  'dtri' => '▿',
    +  'dtrif' => '▾',
    +  'duarr' => '⇵',
    +  'duhar' => '⥯',
    +  'dwangle' => '⦦',
    +  'DZcy' => 'Џ',
    +  'dzcy' => 'џ',
    +  'dzigrarr' => '⟿',
    +  'Eacute' => 'É',
    +  'Eacut' => 'É',
    +  'eacute' => 'é',
    +  'eacut' => 'é',
    +  'easter' => '⩮',
    +  'Ecaron' => 'Ě',
    +  'ecaron' => 'ě',
    +  'ecir' => 'ê',
    +  'Ecirc' => 'Ê',
    +  'Ecir' => 'Ê',
    +  'ecirc' => 'ê',
    +  'ecolon' => '≕',
    +  'Ecy' => 'Э',
    +  'ecy' => 'э',
    +  'eDDot' => '⩷',
    +  'Edot' => 'Ė',
    +  'eDot' => '≑',
    +  'edot' => 'ė',
    +  'ee' => 'ⅇ',
    +  'efDot' => '≒',
    +  'Efr' => '𝔈',
    +  'efr' => '𝔢',
    +  'eg' => '⪚',
    +  'Egrave' => 'È',
    +  'Egrav' => 'È',
    +  'egrave' => 'è',
    +  'egrav' => 'è',
    +  'egs' => '⪖',
    +  'egsdot' => '⪘',
    +  'el' => '⪙',
    +  'Element' => '∈',
    +  'elinters' => '⏧',
    +  'ell' => 'ℓ',
    +  'els' => '⪕',
    +  'elsdot' => '⪗',
    +  'Emacr' => 'Ē',
    +  'emacr' => 'ē',
    +  'empty' => '∅',
    +  'emptyset' => '∅',
    +  'EmptySmallSquare' => '◻',
    +  'emptyv' => '∅',
    +  'EmptyVerySmallSquare' => '▫',
    +  'emsp' => ' ',
    +  'emsp13' => ' ',
    +  'emsp14' => ' ',
    +  'ENG' => 'Ŋ',
    +  'eng' => 'ŋ',
    +  'ensp' => ' ',
    +  'Eogon' => 'Ę',
    +  'eogon' => 'ę',
    +  'Eopf' => '𝔼',
    +  'eopf' => '𝕖',
    +  'epar' => '⋕',
    +  'eparsl' => '⧣',
    +  'eplus' => '⩱',
    +  'epsi' => 'ε',
    +  'Epsilon' => 'Ε',
    +  'epsilon' => 'ε',
    +  'epsiv' => 'ϵ',
    +  'eqcirc' => '≖',
    +  'eqcolon' => '≕',
    +  'eqsim' => '≂',
    +  'eqslantgtr' => '⪖',
    +  'eqslantless' => '⪕',
    +  'Equal' => '⩵',
    +  'equals' => '=',
    +  'EqualTilde' => '≂',
    +  'equest' => '≟',
    +  'Equilibrium' => '⇌',
    +  'equiv' => '≡',
    +  'equivDD' => '⩸',
    +  'eqvparsl' => '⧥',
    +  'erarr' => '⥱',
    +  'erDot' => '≓',
    +  'Escr' => 'ℰ',
    +  'escr' => 'ℯ',
    +  'esdot' => '≐',
    +  'Esim' => '⩳',
    +  'esim' => '≂',
    +  'Eta' => 'Η',
    +  'eta' => 'η',
    +  'ETH' => 'Ð',
    +  'ET' => 'Ð',
    +  'eth' => 'ð',
    +  'et' => 'ð',
    +  'Euml' => 'Ë',
    +  'Eum' => 'Ë',
    +  'euml' => 'ë',
    +  'eum' => 'ë',
    +  'euro' => '€',
    +  'excl' => '!',
    +  'exist' => '∃',
    +  'Exists' => '∃',
    +  'expectation' => 'ℰ',
    +  'ExponentialE' => 'ⅇ',
    +  'exponentiale' => 'ⅇ',
    +  'fallingdotseq' => '≒',
    +  'Fcy' => 'Ф',
    +  'fcy' => 'ф',
    +  'female' => '♀',
    +  'ffilig' => 'ffi',
    +  'fflig' => 'ff',
    +  'ffllig' => 'ffl',
    +  'Ffr' => '𝔉',
    +  'ffr' => '𝔣',
    +  'filig' => 'fi',
    +  'FilledSmallSquare' => '◼',
    +  'FilledVerySmallSquare' => '▪',
    +  'fjlig' => 'fj',
    +  'flat' => '♭',
    +  'fllig' => 'fl',
    +  'fltns' => '▱',
    +  'fnof' => 'ƒ',
    +  'Fopf' => '𝔽',
    +  'fopf' => '𝕗',
    +  'ForAll' => '∀',
    +  'forall' => '∀',
    +  'fork' => '⋔',
    +  'forkv' => '⫙',
    +  'Fouriertrf' => 'ℱ',
    +  'fpartint' => '⨍',
    +  'frac12' => '½',
    +  'frac1' => '¼',
    +  'frac13' => '⅓',
    +  'frac14' => '¼',
    +  'frac15' => '⅕',
    +  'frac16' => '⅙',
    +  'frac18' => '⅛',
    +  'frac23' => '⅔',
    +  'frac25' => '⅖',
    +  'frac34' => '¾',
    +  'frac3' => '¾',
    +  'frac35' => '⅗',
    +  'frac38' => '⅜',
    +  'frac45' => '⅘',
    +  'frac56' => '⅚',
    +  'frac58' => '⅝',
    +  'frac78' => '⅞',
    +  'frasl' => '⁄',
    +  'frown' => '⌢',
    +  'Fscr' => 'ℱ',
    +  'fscr' => '𝒻',
    +  'gacute' => 'ǵ',
    +  'Gamma' => 'Γ',
    +  'gamma' => 'γ',
    +  'Gammad' => 'Ϝ',
    +  'gammad' => 'ϝ',
    +  'gap' => '⪆',
    +  'Gbreve' => 'Ğ',
    +  'gbreve' => 'ğ',
    +  'Gcedil' => 'Ģ',
    +  'Gcirc' => 'Ĝ',
    +  'gcirc' => 'ĝ',
    +  'Gcy' => 'Г',
    +  'gcy' => 'г',
    +  'Gdot' => 'Ġ',
    +  'gdot' => 'ġ',
    +  'gE' => '≧',
    +  'ge' => '≥',
    +  'gEl' => '⪌',
    +  'gel' => '⋛',
    +  'geq' => '≥',
    +  'geqq' => '≧',
    +  'geqslant' => '⩾',
    +  'ges' => '⩾',
    +  'gescc' => '⪩',
    +  'gesdot' => '⪀',
    +  'gesdoto' => '⪂',
    +  'gesdotol' => '⪄',
    +  'gesl' => '⋛︀',
    +  'gesles' => '⪔',
    +  'Gfr' => '𝔊',
    +  'gfr' => '𝔤',
    +  'Gg' => '⋙',
    +  'gg' => '≫',
    +  'ggg' => '⋙',
    +  'gimel' => 'ℷ',
    +  'GJcy' => 'Ѓ',
    +  'gjcy' => 'ѓ',
    +  'gl' => '≷',
    +  'gla' => '⪥',
    +  'glE' => '⪒',
    +  'glj' => '⪤',
    +  'gnap' => '⪊',
    +  'gnapprox' => '⪊',
    +  'gnE' => '≩',
    +  'gne' => '⪈',
    +  'gneq' => '⪈',
    +  'gneqq' => '≩',
    +  'gnsim' => '⋧',
    +  'Gopf' => '𝔾',
    +  'gopf' => '𝕘',
    +  'grave' => '`',
    +  'GreaterEqual' => '≥',
    +  'GreaterEqualLess' => '⋛',
    +  'GreaterFullEqual' => '≧',
    +  'GreaterGreater' => '⪢',
    +  'GreaterLess' => '≷',
    +  'GreaterSlantEqual' => '⩾',
    +  'GreaterTilde' => '≳',
    +  'Gscr' => '𝒢',
    +  'gscr' => 'ℊ',
    +  'gsim' => '≳',
    +  'gsime' => '⪎',
    +  'gsiml' => '⪐',
    +  'GT' => '>',
    +  'G' => '>',
    +  'Gt' => '≫',
    +  'gt' => '>',
    +  'g' => '>',
    +  'gtcc' => '⪧',
    +  'gtcir' => '⩺',
    +  'gtdot' => '⋗',
    +  'gtlPar' => '⦕',
    +  'gtquest' => '⩼',
    +  'gtrapprox' => '⪆',
    +  'gtrarr' => '⥸',
    +  'gtrdot' => '⋗',
    +  'gtreqless' => '⋛',
    +  'gtreqqless' => '⪌',
    +  'gtrless' => '≷',
    +  'gtrsim' => '≳',
    +  'gvertneqq' => '≩︀',
    +  'gvnE' => '≩︀',
    +  'Hacek' => 'ˇ',
    +  'hairsp' => ' ',
    +  'half' => '½',
    +  'hamilt' => 'ℋ',
    +  'HARDcy' => 'Ъ',
    +  'hardcy' => 'ъ',
    +  'hArr' => '⇔',
    +  'harr' => '↔',
    +  'harrcir' => '⥈',
    +  'harrw' => '↭',
    +  'Hat' => '^',
    +  'hbar' => 'ℏ',
    +  'Hcirc' => 'Ĥ',
    +  'hcirc' => 'ĥ',
    +  'hearts' => '♥',
    +  'heartsuit' => '♥',
    +  'hellip' => '…',
    +  'hercon' => '⊹',
    +  'Hfr' => 'ℌ',
    +  'hfr' => '𝔥',
    +  'HilbertSpace' => 'ℋ',
    +  'hksearow' => '⤥',
    +  'hkswarow' => '⤦',
    +  'hoarr' => '⇿',
    +  'homtht' => '∻',
    +  'hookleftarrow' => '↩',
    +  'hookrightarrow' => '↪',
    +  'Hopf' => 'ℍ',
    +  'hopf' => '𝕙',
    +  'horbar' => '―',
    +  'HorizontalLine' => '─',
    +  'Hscr' => 'ℋ',
    +  'hscr' => '𝒽',
    +  'hslash' => 'ℏ',
    +  'Hstrok' => 'Ħ',
    +  'hstrok' => 'ħ',
    +  'HumpDownHump' => '≎',
    +  'HumpEqual' => '≏',
    +  'hybull' => '⁃',
    +  'hyphen' => '‐',
    +  'Iacute' => 'Í',
    +  'Iacut' => 'Í',
    +  'iacute' => 'í',
    +  'iacut' => 'í',
    +  'ic' => '⁣',
    +  'Icirc' => 'Î',
    +  'Icir' => 'Î',
    +  'icirc' => 'î',
    +  'icir' => 'î',
    +  'Icy' => 'И',
    +  'icy' => 'и',
    +  'Idot' => 'İ',
    +  'IEcy' => 'Е',
    +  'iecy' => 'е',
    +  'iexcl' => '¡',
    +  'iexc' => '¡',
    +  'iff' => '⇔',
    +  'Ifr' => 'ℑ',
    +  'ifr' => '𝔦',
    +  'Igrave' => 'Ì',
    +  'Igrav' => 'Ì',
    +  'igrave' => 'ì',
    +  'igrav' => 'ì',
    +  'ii' => 'ⅈ',
    +  'iiiint' => '⨌',
    +  'iiint' => '∭',
    +  'iinfin' => '⧜',
    +  'iiota' => '℩',
    +  'IJlig' => 'IJ',
    +  'ijlig' => 'ij',
    +  'Im' => 'ℑ',
    +  'Imacr' => 'Ī',
    +  'imacr' => 'ī',
    +  'image' => 'ℑ',
    +  'ImaginaryI' => 'ⅈ',
    +  'imagline' => 'ℐ',
    +  'imagpart' => 'ℑ',
    +  'imath' => 'ı',
    +  'imof' => '⊷',
    +  'imped' => 'Ƶ',
    +  'Implies' => '⇒',
    +  'in' => '∈',
    +  'incare' => '℅',
    +  'infin' => '∞',
    +  'infintie' => '⧝',
    +  'inodot' => 'ı',
    +  'Int' => '∬',
    +  'int' => '∫',
    +  'intcal' => '⊺',
    +  'integers' => 'ℤ',
    +  'Integral' => '∫',
    +  'intercal' => '⊺',
    +  'Intersection' => '⋂',
    +  'intlarhk' => '⨗',
    +  'intprod' => '⨼',
    +  'InvisibleComma' => '⁣',
    +  'InvisibleTimes' => '⁢',
    +  'IOcy' => 'Ё',
    +  'iocy' => 'ё',
    +  'Iogon' => 'Į',
    +  'iogon' => 'į',
    +  'Iopf' => '𝕀',
    +  'iopf' => '𝕚',
    +  'Iota' => 'Ι',
    +  'iota' => 'ι',
    +  'iprod' => '⨼',
    +  'iquest' => '¿',
    +  'iques' => '¿',
    +  'Iscr' => 'ℐ',
    +  'iscr' => '𝒾',
    +  'isin' => '∈',
    +  'isindot' => '⋵',
    +  'isinE' => '⋹',
    +  'isins' => '⋴',
    +  'isinsv' => '⋳',
    +  'isinv' => '∈',
    +  'it' => '⁢',
    +  'Itilde' => 'Ĩ',
    +  'itilde' => 'ĩ',
    +  'Iukcy' => 'І',
    +  'iukcy' => 'і',
    +  'Iuml' => 'Ï',
    +  'Ium' => 'Ï',
    +  'iuml' => 'ï',
    +  'ium' => 'ï',
    +  'Jcirc' => 'Ĵ',
    +  'jcirc' => 'ĵ',
    +  'Jcy' => 'Й',
    +  'jcy' => 'й',
    +  'Jfr' => '𝔍',
    +  'jfr' => '𝔧',
    +  'jmath' => 'ȷ',
    +  'Jopf' => '𝕁',
    +  'jopf' => '𝕛',
    +  'Jscr' => '𝒥',
    +  'jscr' => '𝒿',
    +  'Jsercy' => 'Ј',
    +  'jsercy' => 'ј',
    +  'Jukcy' => 'Є',
    +  'jukcy' => 'є',
    +  'Kappa' => 'Κ',
    +  'kappa' => 'κ',
    +  'kappav' => 'ϰ',
    +  'Kcedil' => 'Ķ',
    +  'kcedil' => 'ķ',
    +  'Kcy' => 'К',
    +  'kcy' => 'к',
    +  'Kfr' => '𝔎',
    +  'kfr' => '𝔨',
    +  'kgreen' => 'ĸ',
    +  'KHcy' => 'Х',
    +  'khcy' => 'х',
    +  'KJcy' => 'Ќ',
    +  'kjcy' => 'ќ',
    +  'Kopf' => '𝕂',
    +  'kopf' => '𝕜',
    +  'Kscr' => '𝒦',
    +  'kscr' => '𝓀',
    +  'lAarr' => '⇚',
    +  'Lacute' => 'Ĺ',
    +  'lacute' => 'ĺ',
    +  'laemptyv' => '⦴',
    +  'lagran' => 'ℒ',
    +  'Lambda' => 'Λ',
    +  'lambda' => 'λ',
    +  'Lang' => '⟪',
    +  'lang' => '⟨',
    +  'langd' => '⦑',
    +  'langle' => '⟨',
    +  'lap' => '⪅',
    +  'Laplacetrf' => 'ℒ',
    +  'laquo' => '«',
    +  'laqu' => '«',
    +  'Larr' => '↞',
    +  'lArr' => '⇐',
    +  'larr' => '←',
    +  'larrb' => '⇤',
    +  'larrbfs' => '⤟',
    +  'larrfs' => '⤝',
    +  'larrhk' => '↩',
    +  'larrlp' => '↫',
    +  'larrpl' => '⤹',
    +  'larrsim' => '⥳',
    +  'larrtl' => '↢',
    +  'lat' => '⪫',
    +  'lAtail' => '⤛',
    +  'latail' => '⤙',
    +  'late' => '⪭',
    +  'lates' => '⪭︀',
    +  'lBarr' => '⤎',
    +  'lbarr' => '⤌',
    +  'lbbrk' => '❲',
    +  'lbrace' => '{',
    +  'lbrack' => '[',
    +  'lbrke' => '⦋',
    +  'lbrksld' => '⦏',
    +  'lbrkslu' => '⦍',
    +  'Lcaron' => 'Ľ',
    +  'lcaron' => 'ľ',
    +  'Lcedil' => 'Ļ',
    +  'lcedil' => 'ļ',
    +  'lceil' => '⌈',
    +  'lcub' => '{',
    +  'Lcy' => 'Л',
    +  'lcy' => 'л',
    +  'ldca' => '⤶',
    +  'ldquo' => '“',
    +  'ldquor' => '„',
    +  'ldrdhar' => '⥧',
    +  'ldrushar' => '⥋',
    +  'ldsh' => '↲',
    +  'lE' => '≦',
    +  'le' => '≤',
    +  'LeftAngleBracket' => '⟨',
    +  'LeftArrow' => '←',
    +  'Leftarrow' => '⇐',
    +  'leftarrow' => '←',
    +  'LeftArrowBar' => '⇤',
    +  'LeftArrowRightArrow' => '⇆',
    +  'leftarrowtail' => '↢',
    +  'LeftCeiling' => '⌈',
    +  'LeftDoubleBracket' => '⟦',
    +  'LeftDownTeeVector' => '⥡',
    +  'LeftDownVector' => '⇃',
    +  'LeftDownVectorBar' => '⥙',
    +  'LeftFloor' => '⌊',
    +  'leftharpoondown' => '↽',
    +  'leftharpoonup' => '↼',
    +  'leftleftarrows' => '⇇',
    +  'LeftRightArrow' => '↔',
    +  'Leftrightarrow' => '⇔',
    +  'leftrightarrow' => '↔',
    +  'leftrightarrows' => '⇆',
    +  'leftrightharpoons' => '⇋',
    +  'leftrightsquigarrow' => '↭',
    +  'LeftRightVector' => '⥎',
    +  'LeftTee' => '⊣',
    +  'LeftTeeArrow' => '↤',
    +  'LeftTeeVector' => '⥚',
    +  'leftthreetimes' => '⋋',
    +  'LeftTriangle' => '⊲',
    +  'LeftTriangleBar' => '⧏',
    +  'LeftTriangleEqual' => '⊴',
    +  'LeftUpDownVector' => '⥑',
    +  'LeftUpTeeVector' => '⥠',
    +  'LeftUpVector' => '↿',
    +  'LeftUpVectorBar' => '⥘',
    +  'LeftVector' => '↼',
    +  'LeftVectorBar' => '⥒',
    +  'lEg' => '⪋',
    +  'leg' => '⋚',
    +  'leq' => '≤',
    +  'leqq' => '≦',
    +  'leqslant' => '⩽',
    +  'les' => '⩽',
    +  'lescc' => '⪨',
    +  'lesdot' => '⩿',
    +  'lesdoto' => '⪁',
    +  'lesdotor' => '⪃',
    +  'lesg' => '⋚︀',
    +  'lesges' => '⪓',
    +  'lessapprox' => '⪅',
    +  'lessdot' => '⋖',
    +  'lesseqgtr' => '⋚',
    +  'lesseqqgtr' => '⪋',
    +  'LessEqualGreater' => '⋚',
    +  'LessFullEqual' => '≦',
    +  'LessGreater' => '≶',
    +  'lessgtr' => '≶',
    +  'LessLess' => '⪡',
    +  'lesssim' => '≲',
    +  'LessSlantEqual' => '⩽',
    +  'LessTilde' => '≲',
    +  'lfisht' => '⥼',
    +  'lfloor' => '⌊',
    +  'Lfr' => '𝔏',
    +  'lfr' => '𝔩',
    +  'lg' => '≶',
    +  'lgE' => '⪑',
    +  'lHar' => '⥢',
    +  'lhard' => '↽',
    +  'lharu' => '↼',
    +  'lharul' => '⥪',
    +  'lhblk' => '▄',
    +  'LJcy' => 'Љ',
    +  'ljcy' => 'љ',
    +  'Ll' => '⋘',
    +  'll' => '≪',
    +  'llarr' => '⇇',
    +  'llcorner' => '⌞',
    +  'Lleftarrow' => '⇚',
    +  'llhard' => '⥫',
    +  'lltri' => '◺',
    +  'Lmidot' => 'Ŀ',
    +  'lmidot' => 'ŀ',
    +  'lmoust' => '⎰',
    +  'lmoustache' => '⎰',
    +  'lnap' => '⪉',
    +  'lnapprox' => '⪉',
    +  'lnE' => '≨',
    +  'lne' => '⪇',
    +  'lneq' => '⪇',
    +  'lneqq' => '≨',
    +  'lnsim' => '⋦',
    +  'loang' => '⟬',
    +  'loarr' => '⇽',
    +  'lobrk' => '⟦',
    +  'LongLeftArrow' => '⟵',
    +  'Longleftarrow' => '⟸',
    +  'longleftarrow' => '⟵',
    +  'LongLeftRightArrow' => '⟷',
    +  'Longleftrightarrow' => '⟺',
    +  'longleftrightarrow' => '⟷',
    +  'longmapsto' => '⟼',
    +  'LongRightArrow' => '⟶',
    +  'Longrightarrow' => '⟹',
    +  'longrightarrow' => '⟶',
    +  'looparrowleft' => '↫',
    +  'looparrowright' => '↬',
    +  'lopar' => '⦅',
    +  'Lopf' => '𝕃',
    +  'lopf' => '𝕝',
    +  'loplus' => '⨭',
    +  'lotimes' => '⨴',
    +  'lowast' => '∗',
    +  'lowbar' => '_',
    +  'LowerLeftArrow' => '↙',
    +  'LowerRightArrow' => '↘',
    +  'loz' => '◊',
    +  'lozenge' => '◊',
    +  'lozf' => '⧫',
    +  'lpar' => '(',
    +  'lparlt' => '⦓',
    +  'lrarr' => '⇆',
    +  'lrcorner' => '⌟',
    +  'lrhar' => '⇋',
    +  'lrhard' => '⥭',
    +  'lrm' => '‎',
    +  'lrtri' => '⊿',
    +  'lsaquo' => '‹',
    +  'Lscr' => 'ℒ',
    +  'lscr' => '𝓁',
    +  'Lsh' => '↰',
    +  'lsh' => '↰',
    +  'lsim' => '≲',
    +  'lsime' => '⪍',
    +  'lsimg' => '⪏',
    +  'lsqb' => '[',
    +  'lsquo' => '‘',
    +  'lsquor' => '‚',
    +  'Lstrok' => 'Ł',
    +  'lstrok' => 'ł',
    +  'LT' => '<',
    +  'L' => '<',
    +  'Lt' => '≪',
    +  'lt' => '<',
    +  'l' => '<',
    +  'ltcc' => '⪦',
    +  'ltcir' => '⩹',
    +  'ltdot' => '⋖',
    +  'lthree' => '⋋',
    +  'ltimes' => '⋉',
    +  'ltlarr' => '⥶',
    +  'ltquest' => '⩻',
    +  'ltri' => '◃',
    +  'ltrie' => '⊴',
    +  'ltrif' => '◂',
    +  'ltrPar' => '⦖',
    +  'lurdshar' => '⥊',
    +  'luruhar' => '⥦',
    +  'lvertneqq' => '≨︀',
    +  'lvnE' => '≨︀',
    +  'macr' => '¯',
    +  'mac' => '¯',
    +  'male' => '♂',
    +  'malt' => '✠',
    +  'maltese' => '✠',
    +  'Map' => '⤅',
    +  'map' => '↦',
    +  'mapsto' => '↦',
    +  'mapstodown' => '↧',
    +  'mapstoleft' => '↤',
    +  'mapstoup' => '↥',
    +  'marker' => '▮',
    +  'mcomma' => '⨩',
    +  'Mcy' => 'М',
    +  'mcy' => 'м',
    +  'mdash' => '—',
    +  'mDDot' => '∺',
    +  'measuredangle' => '∡',
    +  'MediumSpace' => ' ',
    +  'Mellintrf' => 'ℳ',
    +  'Mfr' => '𝔐',
    +  'mfr' => '𝔪',
    +  'mho' => '℧',
    +  'micro' => 'µ',
    +  'micr' => 'µ',
    +  'mid' => '∣',
    +  'midast' => '*',
    +  'midcir' => '⫰',
    +  'middot' => '·',
    +  'middo' => '·',
    +  'minus' => '−',
    +  'minusb' => '⊟',
    +  'minusd' => '∸',
    +  'minusdu' => '⨪',
    +  'MinusPlus' => '∓',
    +  'mlcp' => '⫛',
    +  'mldr' => '…',
    +  'mnplus' => '∓',
    +  'models' => '⊧',
    +  'Mopf' => '𝕄',
    +  'mopf' => '𝕞',
    +  'mp' => '∓',
    +  'Mscr' => 'ℳ',
    +  'mscr' => '𝓂',
    +  'mstpos' => '∾',
    +  'Mu' => 'Μ',
    +  'mu' => 'μ',
    +  'multimap' => '⊸',
    +  'mumap' => '⊸',
    +  'nabla' => '∇',
    +  'Nacute' => 'Ń',
    +  'nacute' => 'ń',
    +  'nang' => '∠⃒',
    +  'nap' => '≉',
    +  'napE' => '⩰̸',
    +  'napid' => '≋̸',
    +  'napos' => 'ʼn',
    +  'napprox' => '≉',
    +  'natur' => '♮',
    +  'natural' => '♮',
    +  'naturals' => 'ℕ',
    +  'nbsp' => ' ',
    +  'nbs' => ' ',
    +  'nbump' => '≎̸',
    +  'nbumpe' => '≏̸',
    +  'ncap' => '⩃',
    +  'Ncaron' => 'Ň',
    +  'ncaron' => 'ň',
    +  'Ncedil' => 'Ņ',
    +  'ncedil' => 'ņ',
    +  'ncong' => '≇',
    +  'ncongdot' => '⩭̸',
    +  'ncup' => '⩂',
    +  'Ncy' => 'Н',
    +  'ncy' => 'н',
    +  'ndash' => '–',
    +  'ne' => '≠',
    +  'nearhk' => '⤤',
    +  'neArr' => '⇗',
    +  'nearr' => '↗',
    +  'nearrow' => '↗',
    +  'nedot' => '≐̸',
    +  'NegativeMediumSpace' => '​',
    +  'NegativeThickSpace' => '​',
    +  'NegativeThinSpace' => '​',
    +  'NegativeVeryThinSpace' => '​',
    +  'nequiv' => '≢',
    +  'nesear' => '⤨',
    +  'nesim' => '≂̸',
    +  'NestedGreaterGreater' => '≫',
    +  'NestedLessLess' => '≪',
    +  'NewLine' => '
    +',
    +  'nexist' => '∄',
    +  'nexists' => '∄',
    +  'Nfr' => '𝔑',
    +  'nfr' => '𝔫',
    +  'ngE' => '≧̸',
    +  'nge' => '≱',
    +  'ngeq' => '≱',
    +  'ngeqq' => '≧̸',
    +  'ngeqslant' => '⩾̸',
    +  'nges' => '⩾̸',
    +  'nGg' => '⋙̸',
    +  'ngsim' => '≵',
    +  'nGt' => '≫⃒',
    +  'ngt' => '≯',
    +  'ngtr' => '≯',
    +  'nGtv' => '≫̸',
    +  'nhArr' => '⇎',
    +  'nharr' => '↮',
    +  'nhpar' => '⫲',
    +  'ni' => '∋',
    +  'nis' => '⋼',
    +  'nisd' => '⋺',
    +  'niv' => '∋',
    +  'NJcy' => 'Њ',
    +  'njcy' => 'њ',
    +  'nlArr' => '⇍',
    +  'nlarr' => '↚',
    +  'nldr' => '‥',
    +  'nlE' => '≦̸',
    +  'nle' => '≰',
    +  'nLeftarrow' => '⇍',
    +  'nleftarrow' => '↚',
    +  'nLeftrightarrow' => '⇎',
    +  'nleftrightarrow' => '↮',
    +  'nleq' => '≰',
    +  'nleqq' => '≦̸',
    +  'nleqslant' => '⩽̸',
    +  'nles' => '⩽̸',
    +  'nless' => '≮',
    +  'nLl' => '⋘̸',
    +  'nlsim' => '≴',
    +  'nLt' => '≪⃒',
    +  'nlt' => '≮',
    +  'nltri' => '⋪',
    +  'nltrie' => '⋬',
    +  'nLtv' => '≪̸',
    +  'nmid' => '∤',
    +  'NoBreak' => '⁠',
    +  'NonBreakingSpace' => ' ',
    +  'Nopf' => 'ℕ',
    +  'nopf' => '𝕟',
    +  'Not' => '⫬',
    +  'not' => '¬',
    +  'no' => '¬',
    +  'NotCongruent' => '≢',
    +  'NotCupCap' => '≭',
    +  'NotDoubleVerticalBar' => '∦',
    +  'NotElement' => '∉',
    +  'NotEqual' => '≠',
    +  'NotEqualTilde' => '≂̸',
    +  'NotExists' => '∄',
    +  'NotGreater' => '≯',
    +  'NotGreaterEqual' => '≱',
    +  'NotGreaterFullEqual' => '≧̸',
    +  'NotGreaterGreater' => '≫̸',
    +  'NotGreaterLess' => '≹',
    +  'NotGreaterSlantEqual' => '⩾̸',
    +  'NotGreaterTilde' => '≵',
    +  'NotHumpDownHump' => '≎̸',
    +  'NotHumpEqual' => '≏̸',
    +  'notin' => '∉',
    +  'notindot' => '⋵̸',
    +  'notinE' => '⋹̸',
    +  'notinva' => '∉',
    +  'notinvb' => '⋷',
    +  'notinvc' => '⋶',
    +  'NotLeftTriangle' => '⋪',
    +  'NotLeftTriangleBar' => '⧏̸',
    +  'NotLeftTriangleEqual' => '⋬',
    +  'NotLess' => '≮',
    +  'NotLessEqual' => '≰',
    +  'NotLessGreater' => '≸',
    +  'NotLessLess' => '≪̸',
    +  'NotLessSlantEqual' => '⩽̸',
    +  'NotLessTilde' => '≴',
    +  'NotNestedGreaterGreater' => '⪢̸',
    +  'NotNestedLessLess' => '⪡̸',
    +  'notni' => '∌',
    +  'notniva' => '∌',
    +  'notnivb' => '⋾',
    +  'notnivc' => '⋽',
    +  'NotPrecedes' => '⊀',
    +  'NotPrecedesEqual' => '⪯̸',
    +  'NotPrecedesSlantEqual' => '⋠',
    +  'NotReverseElement' => '∌',
    +  'NotRightTriangle' => '⋫',
    +  'NotRightTriangleBar' => '⧐̸',
    +  'NotRightTriangleEqual' => '⋭',
    +  'NotSquareSubset' => '⊏̸',
    +  'NotSquareSubsetEqual' => '⋢',
    +  'NotSquareSuperset' => '⊐̸',
    +  'NotSquareSupersetEqual' => '⋣',
    +  'NotSubset' => '⊂⃒',
    +  'NotSubsetEqual' => '⊈',
    +  'NotSucceeds' => '⊁',
    +  'NotSucceedsEqual' => '⪰̸',
    +  'NotSucceedsSlantEqual' => '⋡',
    +  'NotSucceedsTilde' => '≿̸',
    +  'NotSuperset' => '⊃⃒',
    +  'NotSupersetEqual' => '⊉',
    +  'NotTilde' => '≁',
    +  'NotTildeEqual' => '≄',
    +  'NotTildeFullEqual' => '≇',
    +  'NotTildeTilde' => '≉',
    +  'NotVerticalBar' => '∤',
    +  'npar' => '∦',
    +  'nparallel' => '∦',
    +  'nparsl' => '⫽⃥',
    +  'npart' => '∂̸',
    +  'npolint' => '⨔',
    +  'npr' => '⊀',
    +  'nprcue' => '⋠',
    +  'npre' => '⪯̸',
    +  'nprec' => '⊀',
    +  'npreceq' => '⪯̸',
    +  'nrArr' => '⇏',
    +  'nrarr' => '↛',
    +  'nrarrc' => '⤳̸',
    +  'nrarrw' => '↝̸',
    +  'nRightarrow' => '⇏',
    +  'nrightarrow' => '↛',
    +  'nrtri' => '⋫',
    +  'nrtrie' => '⋭',
    +  'nsc' => '⊁',
    +  'nsccue' => '⋡',
    +  'nsce' => '⪰̸',
    +  'Nscr' => '𝒩',
    +  'nscr' => '𝓃',
    +  'nshortmid' => '∤',
    +  'nshortparallel' => '∦',
    +  'nsim' => '≁',
    +  'nsime' => '≄',
    +  'nsimeq' => '≄',
    +  'nsmid' => '∤',
    +  'nspar' => '∦',
    +  'nsqsube' => '⋢',
    +  'nsqsupe' => '⋣',
    +  'nsub' => '⊄',
    +  'nsubE' => '⫅̸',
    +  'nsube' => '⊈',
    +  'nsubset' => '⊂⃒',
    +  'nsubseteq' => '⊈',
    +  'nsubseteqq' => '⫅̸',
    +  'nsucc' => '⊁',
    +  'nsucceq' => '⪰̸',
    +  'nsup' => '⊅',
    +  'nsupE' => '⫆̸',
    +  'nsupe' => '⊉',
    +  'nsupset' => '⊃⃒',
    +  'nsupseteq' => '⊉',
    +  'nsupseteqq' => '⫆̸',
    +  'ntgl' => '≹',
    +  'Ntilde' => 'Ñ',
    +  'Ntild' => 'Ñ',
    +  'ntilde' => 'ñ',
    +  'ntild' => 'ñ',
    +  'ntlg' => '≸',
    +  'ntriangleleft' => '⋪',
    +  'ntrianglelefteq' => '⋬',
    +  'ntriangleright' => '⋫',
    +  'ntrianglerighteq' => '⋭',
    +  'Nu' => 'Ν',
    +  'nu' => 'ν',
    +  'num' => '#',
    +  'numero' => '№',
    +  'numsp' => ' ',
    +  'nvap' => '≍⃒',
    +  'nVDash' => '⊯',
    +  'nVdash' => '⊮',
    +  'nvDash' => '⊭',
    +  'nvdash' => '⊬',
    +  'nvge' => '≥⃒',
    +  'nvgt' => '>⃒',
    +  'nvHarr' => '⤄',
    +  'nvinfin' => '⧞',
    +  'nvlArr' => '⤂',
    +  'nvle' => '≤⃒',
    +  'nvlt' => '<⃒',
    +  'nvltrie' => '⊴⃒',
    +  'nvrArr' => '⤃',
    +  'nvrtrie' => '⊵⃒',
    +  'nvsim' => '∼⃒',
    +  'nwarhk' => '⤣',
    +  'nwArr' => '⇖',
    +  'nwarr' => '↖',
    +  'nwarrow' => '↖',
    +  'nwnear' => '⤧',
    +  'Oacute' => 'Ó',
    +  'Oacut' => 'Ó',
    +  'oacute' => 'ó',
    +  'oacut' => 'ó',
    +  'oast' => '⊛',
    +  'ocir' => 'ô',
    +  'Ocirc' => 'Ô',
    +  'Ocir' => 'Ô',
    +  'ocirc' => 'ô',
    +  'Ocy' => 'О',
    +  'ocy' => 'о',
    +  'odash' => '⊝',
    +  'Odblac' => 'Ő',
    +  'odblac' => 'ő',
    +  'odiv' => '⨸',
    +  'odot' => '⊙',
    +  'odsold' => '⦼',
    +  'OElig' => 'Œ',
    +  'oelig' => 'œ',
    +  'ofcir' => '⦿',
    +  'Ofr' => '𝔒',
    +  'ofr' => '𝔬',
    +  'ogon' => '˛',
    +  'Ograve' => 'Ò',
    +  'Ograv' => 'Ò',
    +  'ograve' => 'ò',
    +  'ograv' => 'ò',
    +  'ogt' => '⧁',
    +  'ohbar' => '⦵',
    +  'ohm' => 'Ω',
    +  'oint' => '∮',
    +  'olarr' => '↺',
    +  'olcir' => '⦾',
    +  'olcross' => '⦻',
    +  'oline' => '‾',
    +  'olt' => '⧀',
    +  'Omacr' => 'Ō',
    +  'omacr' => 'ō',
    +  'Omega' => 'Ω',
    +  'omega' => 'ω',
    +  'Omicron' => 'Ο',
    +  'omicron' => 'ο',
    +  'omid' => '⦶',
    +  'ominus' => '⊖',
    +  'Oopf' => '𝕆',
    +  'oopf' => '𝕠',
    +  'opar' => '⦷',
    +  'OpenCurlyDoubleQuote' => '“',
    +  'OpenCurlyQuote' => '‘',
    +  'operp' => '⦹',
    +  'oplus' => '⊕',
    +  'Or' => '⩔',
    +  'or' => '∨',
    +  'orarr' => '↻',
    +  'ord' => 'º',
    +  'order' => 'ℴ',
    +  'orderof' => 'ℴ',
    +  'ordf' => 'ª',
    +  'ordm' => 'º',
    +  'origof' => '⊶',
    +  'oror' => '⩖',
    +  'orslope' => '⩗',
    +  'orv' => '⩛',
    +  'oS' => 'Ⓢ',
    +  'Oscr' => '𝒪',
    +  'oscr' => 'ℴ',
    +  'Oslash' => 'Ø',
    +  'Oslas' => 'Ø',
    +  'oslash' => 'ø',
    +  'oslas' => 'ø',
    +  'osol' => '⊘',
    +  'Otilde' => 'Õ',
    +  'Otild' => 'Õ',
    +  'otilde' => 'õ',
    +  'otild' => 'õ',
    +  'Otimes' => '⨷',
    +  'otimes' => '⊗',
    +  'otimesas' => '⨶',
    +  'Ouml' => 'Ö',
    +  'Oum' => 'Ö',
    +  'ouml' => 'ö',
    +  'oum' => 'ö',
    +  'ovbar' => '⌽',
    +  'OverBar' => '‾',
    +  'OverBrace' => '⏞',
    +  'OverBracket' => '⎴',
    +  'OverParenthesis' => '⏜',
    +  'par' => '¶',
    +  'para' => '¶',
    +  'parallel' => '∥',
    +  'parsim' => '⫳',
    +  'parsl' => '⫽',
    +  'part' => '∂',
    +  'PartialD' => '∂',
    +  'Pcy' => 'П',
    +  'pcy' => 'п',
    +  'percnt' => '%',
    +  'period' => '.',
    +  'permil' => '‰',
    +  'perp' => '⊥',
    +  'pertenk' => '‱',
    +  'Pfr' => '𝔓',
    +  'pfr' => '𝔭',
    +  'Phi' => 'Φ',
    +  'phi' => 'φ',
    +  'phiv' => 'ϕ',
    +  'phmmat' => 'ℳ',
    +  'phone' => '☎',
    +  'Pi' => 'Π',
    +  'pi' => 'π',
    +  'pitchfork' => '⋔',
    +  'piv' => 'ϖ',
    +  'planck' => 'ℏ',
    +  'planckh' => 'ℎ',
    +  'plankv' => 'ℏ',
    +  'plus' => '+',
    +  'plusacir' => '⨣',
    +  'plusb' => '⊞',
    +  'pluscir' => '⨢',
    +  'plusdo' => '∔',
    +  'plusdu' => '⨥',
    +  'pluse' => '⩲',
    +  'PlusMinus' => '±',
    +  'plusmn' => '±',
    +  'plusm' => '±',
    +  'plussim' => '⨦',
    +  'plustwo' => '⨧',
    +  'pm' => '±',
    +  'Poincareplane' => 'ℌ',
    +  'pointint' => '⨕',
    +  'Popf' => 'ℙ',
    +  'popf' => '𝕡',
    +  'pound' => '£',
    +  'poun' => '£',
    +  'Pr' => '⪻',
    +  'pr' => '≺',
    +  'prap' => '⪷',
    +  'prcue' => '≼',
    +  'prE' => '⪳',
    +  'pre' => '⪯',
    +  'prec' => '≺',
    +  'precapprox' => '⪷',
    +  'preccurlyeq' => '≼',
    +  'Precedes' => '≺',
    +  'PrecedesEqual' => '⪯',
    +  'PrecedesSlantEqual' => '≼',
    +  'PrecedesTilde' => '≾',
    +  'preceq' => '⪯',
    +  'precnapprox' => '⪹',
    +  'precneqq' => '⪵',
    +  'precnsim' => '⋨',
    +  'precsim' => '≾',
    +  'Prime' => '″',
    +  'prime' => '′',
    +  'primes' => 'ℙ',
    +  'prnap' => '⪹',
    +  'prnE' => '⪵',
    +  'prnsim' => '⋨',
    +  'prod' => '∏',
    +  'Product' => '∏',
    +  'profalar' => '⌮',
    +  'profline' => '⌒',
    +  'profsurf' => '⌓',
    +  'prop' => '∝',
    +  'Proportion' => '∷',
    +  'Proportional' => '∝',
    +  'propto' => '∝',
    +  'prsim' => '≾',
    +  'prurel' => '⊰',
    +  'Pscr' => '𝒫',
    +  'pscr' => '𝓅',
    +  'Psi' => 'Ψ',
    +  'psi' => 'ψ',
    +  'puncsp' => ' ',
    +  'Qfr' => '𝔔',
    +  'qfr' => '𝔮',
    +  'qint' => '⨌',
    +  'Qopf' => 'ℚ',
    +  'qopf' => '𝕢',
    +  'qprime' => '⁗',
    +  'Qscr' => '𝒬',
    +  'qscr' => '𝓆',
    +  'quaternions' => 'ℍ',
    +  'quatint' => '⨖',
    +  'quest' => '?',
    +  'questeq' => '≟',
    +  'QUOT' => '"',
    +  'QUO' => '"',
    +  'quot' => '"',
    +  'quo' => '"',
    +  'rAarr' => '⇛',
    +  'race' => '∽̱',
    +  'Racute' => 'Ŕ',
    +  'racute' => 'ŕ',
    +  'radic' => '√',
    +  'raemptyv' => '⦳',
    +  'Rang' => '⟫',
    +  'rang' => '⟩',
    +  'rangd' => '⦒',
    +  'range' => '⦥',
    +  'rangle' => '⟩',
    +  'raquo' => '»',
    +  'raqu' => '»',
    +  'Rarr' => '↠',
    +  'rArr' => '⇒',
    +  'rarr' => '→',
    +  'rarrap' => '⥵',
    +  'rarrb' => '⇥',
    +  'rarrbfs' => '⤠',
    +  'rarrc' => '⤳',
    +  'rarrfs' => '⤞',
    +  'rarrhk' => '↪',
    +  'rarrlp' => '↬',
    +  'rarrpl' => '⥅',
    +  'rarrsim' => '⥴',
    +  'Rarrtl' => '⤖',
    +  'rarrtl' => '↣',
    +  'rarrw' => '↝',
    +  'rAtail' => '⤜',
    +  'ratail' => '⤚',
    +  'ratio' => '∶',
    +  'rationals' => 'ℚ',
    +  'RBarr' => '⤐',
    +  'rBarr' => '⤏',
    +  'rbarr' => '⤍',
    +  'rbbrk' => '❳',
    +  'rbrace' => '}',
    +  'rbrack' => ']',
    +  'rbrke' => '⦌',
    +  'rbrksld' => '⦎',
    +  'rbrkslu' => '⦐',
    +  'Rcaron' => 'Ř',
    +  'rcaron' => 'ř',
    +  'Rcedil' => 'Ŗ',
    +  'rcedil' => 'ŗ',
    +  'rceil' => '⌉',
    +  'rcub' => '}',
    +  'Rcy' => 'Р',
    +  'rcy' => 'р',
    +  'rdca' => '⤷',
    +  'rdldhar' => '⥩',
    +  'rdquo' => '”',
    +  'rdquor' => '”',
    +  'rdsh' => '↳',
    +  'Re' => 'ℜ',
    +  'real' => 'ℜ',
    +  'realine' => 'ℛ',
    +  'realpart' => 'ℜ',
    +  'reals' => 'ℝ',
    +  'rect' => '▭',
    +  'REG' => '®',
    +  'RE' => '®',
    +  'reg' => '®',
    +  're' => '®',
    +  'ReverseElement' => '∋',
    +  'ReverseEquilibrium' => '⇋',
    +  'ReverseUpEquilibrium' => '⥯',
    +  'rfisht' => '⥽',
    +  'rfloor' => '⌋',
    +  'Rfr' => 'ℜ',
    +  'rfr' => '𝔯',
    +  'rHar' => '⥤',
    +  'rhard' => '⇁',
    +  'rharu' => '⇀',
    +  'rharul' => '⥬',
    +  'Rho' => 'Ρ',
    +  'rho' => 'ρ',
    +  'rhov' => 'ϱ',
    +  'RightAngleBracket' => '⟩',
    +  'RightArrow' => '→',
    +  'Rightarrow' => '⇒',
    +  'rightarrow' => '→',
    +  'RightArrowBar' => '⇥',
    +  'RightArrowLeftArrow' => '⇄',
    +  'rightarrowtail' => '↣',
    +  'RightCeiling' => '⌉',
    +  'RightDoubleBracket' => '⟧',
    +  'RightDownTeeVector' => '⥝',
    +  'RightDownVector' => '⇂',
    +  'RightDownVectorBar' => '⥕',
    +  'RightFloor' => '⌋',
    +  'rightharpoondown' => '⇁',
    +  'rightharpoonup' => '⇀',
    +  'rightleftarrows' => '⇄',
    +  'rightleftharpoons' => '⇌',
    +  'rightrightarrows' => '⇉',
    +  'rightsquigarrow' => '↝',
    +  'RightTee' => '⊢',
    +  'RightTeeArrow' => '↦',
    +  'RightTeeVector' => '⥛',
    +  'rightthreetimes' => '⋌',
    +  'RightTriangle' => '⊳',
    +  'RightTriangleBar' => '⧐',
    +  'RightTriangleEqual' => '⊵',
    +  'RightUpDownVector' => '⥏',
    +  'RightUpTeeVector' => '⥜',
    +  'RightUpVector' => '↾',
    +  'RightUpVectorBar' => '⥔',
    +  'RightVector' => '⇀',
    +  'RightVectorBar' => '⥓',
    +  'ring' => '˚',
    +  'risingdotseq' => '≓',
    +  'rlarr' => '⇄',
    +  'rlhar' => '⇌',
    +  'rlm' => '‏',
    +  'rmoust' => '⎱',
    +  'rmoustache' => '⎱',
    +  'rnmid' => '⫮',
    +  'roang' => '⟭',
    +  'roarr' => '⇾',
    +  'robrk' => '⟧',
    +  'ropar' => '⦆',
    +  'Ropf' => 'ℝ',
    +  'ropf' => '𝕣',
    +  'roplus' => '⨮',
    +  'rotimes' => '⨵',
    +  'RoundImplies' => '⥰',
    +  'rpar' => ')',
    +  'rpargt' => '⦔',
    +  'rppolint' => '⨒',
    +  'rrarr' => '⇉',
    +  'Rrightarrow' => '⇛',
    +  'rsaquo' => '›',
    +  'Rscr' => 'ℛ',
    +  'rscr' => '𝓇',
    +  'Rsh' => '↱',
    +  'rsh' => '↱',
    +  'rsqb' => ']',
    +  'rsquo' => '’',
    +  'rsquor' => '’',
    +  'rthree' => '⋌',
    +  'rtimes' => '⋊',
    +  'rtri' => '▹',
    +  'rtrie' => '⊵',
    +  'rtrif' => '▸',
    +  'rtriltri' => '⧎',
    +  'RuleDelayed' => '⧴',
    +  'ruluhar' => '⥨',
    +  'rx' => '℞',
    +  'Sacute' => 'Ś',
    +  'sacute' => 'ś',
    +  'sbquo' => '‚',
    +  'Sc' => '⪼',
    +  'sc' => '≻',
    +  'scap' => '⪸',
    +  'Scaron' => 'Š',
    +  'scaron' => 'š',
    +  'sccue' => '≽',
    +  'scE' => '⪴',
    +  'sce' => '⪰',
    +  'Scedil' => 'Ş',
    +  'scedil' => 'ş',
    +  'Scirc' => 'Ŝ',
    +  'scirc' => 'ŝ',
    +  'scnap' => '⪺',
    +  'scnE' => '⪶',
    +  'scnsim' => '⋩',
    +  'scpolint' => '⨓',
    +  'scsim' => '≿',
    +  'Scy' => 'С',
    +  'scy' => 'с',
    +  'sdot' => '⋅',
    +  'sdotb' => '⊡',
    +  'sdote' => '⩦',
    +  'searhk' => '⤥',
    +  'seArr' => '⇘',
    +  'searr' => '↘',
    +  'searrow' => '↘',
    +  'sect' => '§',
    +  'sec' => '§',
    +  'semi' => ';',
    +  'seswar' => '⤩',
    +  'setminus' => '∖',
    +  'setmn' => '∖',
    +  'sext' => '✶',
    +  'Sfr' => '𝔖',
    +  'sfr' => '𝔰',
    +  'sfrown' => '⌢',
    +  'sharp' => '♯',
    +  'SHCHcy' => 'Щ',
    +  'shchcy' => 'щ',
    +  'SHcy' => 'Ш',
    +  'shcy' => 'ш',
    +  'ShortDownArrow' => '↓',
    +  'ShortLeftArrow' => '←',
    +  'shortmid' => '∣',
    +  'shortparallel' => '∥',
    +  'ShortRightArrow' => '→',
    +  'ShortUpArrow' => '↑',
    +  'shy' => '­',
    +  'sh' => '­',
    +  'Sigma' => 'Σ',
    +  'sigma' => 'σ',
    +  'sigmaf' => 'ς',
    +  'sigmav' => 'ς',
    +  'sim' => '∼',
    +  'simdot' => '⩪',
    +  'sime' => '≃',
    +  'simeq' => '≃',
    +  'simg' => '⪞',
    +  'simgE' => '⪠',
    +  'siml' => '⪝',
    +  'simlE' => '⪟',
    +  'simne' => '≆',
    +  'simplus' => '⨤',
    +  'simrarr' => '⥲',
    +  'slarr' => '←',
    +  'SmallCircle' => '∘',
    +  'smallsetminus' => '∖',
    +  'smashp' => '⨳',
    +  'smeparsl' => '⧤',
    +  'smid' => '∣',
    +  'smile' => '⌣',
    +  'smt' => '⪪',
    +  'smte' => '⪬',
    +  'smtes' => '⪬︀',
    +  'SOFTcy' => 'Ь',
    +  'softcy' => 'ь',
    +  'sol' => '/',
    +  'solb' => '⧄',
    +  'solbar' => '⌿',
    +  'Sopf' => '𝕊',
    +  'sopf' => '𝕤',
    +  'spades' => '♠',
    +  'spadesuit' => '♠',
    +  'spar' => '∥',
    +  'sqcap' => '⊓',
    +  'sqcaps' => '⊓︀',
    +  'sqcup' => '⊔',
    +  'sqcups' => '⊔︀',
    +  'Sqrt' => '√',
    +  'sqsub' => '⊏',
    +  'sqsube' => '⊑',
    +  'sqsubset' => '⊏',
    +  'sqsubseteq' => '⊑',
    +  'sqsup' => '⊐',
    +  'sqsupe' => '⊒',
    +  'sqsupset' => '⊐',
    +  'sqsupseteq' => '⊒',
    +  'squ' => '□',
    +  'Square' => '□',
    +  'square' => '□',
    +  'SquareIntersection' => '⊓',
    +  'SquareSubset' => '⊏',
    +  'SquareSubsetEqual' => '⊑',
    +  'SquareSuperset' => '⊐',
    +  'SquareSupersetEqual' => '⊒',
    +  'SquareUnion' => '⊔',
    +  'squarf' => '▪',
    +  'squf' => '▪',
    +  'srarr' => '→',
    +  'Sscr' => '𝒮',
    +  'sscr' => '𝓈',
    +  'ssetmn' => '∖',
    +  'ssmile' => '⌣',
    +  'sstarf' => '⋆',
    +  'Star' => '⋆',
    +  'star' => '☆',
    +  'starf' => '★',
    +  'straightepsilon' => 'ϵ',
    +  'straightphi' => 'ϕ',
    +  'strns' => '¯',
    +  'Sub' => '⋐',
    +  'sub' => '⊂',
    +  'subdot' => '⪽',
    +  'subE' => '⫅',
    +  'sube' => '⊆',
    +  'subedot' => '⫃',
    +  'submult' => '⫁',
    +  'subnE' => '⫋',
    +  'subne' => '⊊',
    +  'subplus' => '⪿',
    +  'subrarr' => '⥹',
    +  'Subset' => '⋐',
    +  'subset' => '⊂',
    +  'subseteq' => '⊆',
    +  'subseteqq' => '⫅',
    +  'SubsetEqual' => '⊆',
    +  'subsetneq' => '⊊',
    +  'subsetneqq' => '⫋',
    +  'subsim' => '⫇',
    +  'subsub' => '⫕',
    +  'subsup' => '⫓',
    +  'succ' => '≻',
    +  'succapprox' => '⪸',
    +  'succcurlyeq' => '≽',
    +  'Succeeds' => '≻',
    +  'SucceedsEqual' => '⪰',
    +  'SucceedsSlantEqual' => '≽',
    +  'SucceedsTilde' => '≿',
    +  'succeq' => '⪰',
    +  'succnapprox' => '⪺',
    +  'succneqq' => '⪶',
    +  'succnsim' => '⋩',
    +  'succsim' => '≿',
    +  'SuchThat' => '∋',
    +  'Sum' => '∑',
    +  'sum' => '∑',
    +  'sung' => '♪',
    +  'Sup' => '⋑',
    +  'sup' => '³',
    +  'sup1' => '¹',
    +  'sup2' => '²',
    +  'sup3' => '³',
    +  'supdot' => '⪾',
    +  'supdsub' => '⫘',
    +  'supE' => '⫆',
    +  'supe' => '⊇',
    +  'supedot' => '⫄',
    +  'Superset' => '⊃',
    +  'SupersetEqual' => '⊇',
    +  'suphsol' => '⟉',
    +  'suphsub' => '⫗',
    +  'suplarr' => '⥻',
    +  'supmult' => '⫂',
    +  'supnE' => '⫌',
    +  'supne' => '⊋',
    +  'supplus' => '⫀',
    +  'Supset' => '⋑',
    +  'supset' => '⊃',
    +  'supseteq' => '⊇',
    +  'supseteqq' => '⫆',
    +  'supsetneq' => '⊋',
    +  'supsetneqq' => '⫌',
    +  'supsim' => '⫈',
    +  'supsub' => '⫔',
    +  'supsup' => '⫖',
    +  'swarhk' => '⤦',
    +  'swArr' => '⇙',
    +  'swarr' => '↙',
    +  'swarrow' => '↙',
    +  'swnwar' => '⤪',
    +  'szlig' => 'ß',
    +  'szli' => 'ß',
    +  'Tab' => '	',
    +  'target' => '⌖',
    +  'Tau' => 'Τ',
    +  'tau' => 'τ',
    +  'tbrk' => '⎴',
    +  'Tcaron' => 'Ť',
    +  'tcaron' => 'ť',
    +  'Tcedil' => 'Ţ',
    +  'tcedil' => 'ţ',
    +  'Tcy' => 'Т',
    +  'tcy' => 'т',
    +  'tdot' => '⃛',
    +  'telrec' => '⌕',
    +  'Tfr' => '𝔗',
    +  'tfr' => '𝔱',
    +  'there4' => '∴',
    +  'Therefore' => '∴',
    +  'therefore' => '∴',
    +  'Theta' => 'Θ',
    +  'theta' => 'θ',
    +  'thetasym' => 'ϑ',
    +  'thetav' => 'ϑ',
    +  'thickapprox' => '≈',
    +  'thicksim' => '∼',
    +  'ThickSpace' => '  ',
    +  'thinsp' => ' ',
    +  'ThinSpace' => ' ',
    +  'thkap' => '≈',
    +  'thksim' => '∼',
    +  'THORN' => 'Þ',
    +  'THOR' => 'Þ',
    +  'thorn' => 'þ',
    +  'thor' => 'þ',
    +  'Tilde' => '∼',
    +  'tilde' => '˜',
    +  'TildeEqual' => '≃',
    +  'TildeFullEqual' => '≅',
    +  'TildeTilde' => '≈',
    +  'times' => '×',
    +  'time' => '×',
    +  'timesb' => '⊠',
    +  'timesbar' => '⨱',
    +  'timesd' => '⨰',
    +  'tint' => '∭',
    +  'toea' => '⤨',
    +  'top' => '⊤',
    +  'topbot' => '⌶',
    +  'topcir' => '⫱',
    +  'Topf' => '𝕋',
    +  'topf' => '𝕥',
    +  'topfork' => '⫚',
    +  'tosa' => '⤩',
    +  'tprime' => '‴',
    +  'TRADE' => '™',
    +  'trade' => '™',
    +  'triangle' => '▵',
    +  'triangledown' => '▿',
    +  'triangleleft' => '◃',
    +  'trianglelefteq' => '⊴',
    +  'triangleq' => '≜',
    +  'triangleright' => '▹',
    +  'trianglerighteq' => '⊵',
    +  'tridot' => '◬',
    +  'trie' => '≜',
    +  'triminus' => '⨺',
    +  'TripleDot' => '⃛',
    +  'triplus' => '⨹',
    +  'trisb' => '⧍',
    +  'tritime' => '⨻',
    +  'trpezium' => '⏢',
    +  'Tscr' => '𝒯',
    +  'tscr' => '𝓉',
    +  'TScy' => 'Ц',
    +  'tscy' => 'ц',
    +  'TSHcy' => 'Ћ',
    +  'tshcy' => 'ћ',
    +  'Tstrok' => 'Ŧ',
    +  'tstrok' => 'ŧ',
    +  'twixt' => '≬',
    +  'twoheadleftarrow' => '↞',
    +  'twoheadrightarrow' => '↠',
    +  'Uacute' => 'Ú',
    +  'Uacut' => 'Ú',
    +  'uacute' => 'ú',
    +  'uacut' => 'ú',
    +  'Uarr' => '↟',
    +  'uArr' => '⇑',
    +  'uarr' => '↑',
    +  'Uarrocir' => '⥉',
    +  'Ubrcy' => 'Ў',
    +  'ubrcy' => 'ў',
    +  'Ubreve' => 'Ŭ',
    +  'ubreve' => 'ŭ',
    +  'Ucirc' => 'Û',
    +  'Ucir' => 'Û',
    +  'ucirc' => 'û',
    +  'ucir' => 'û',
    +  'Ucy' => 'У',
    +  'ucy' => 'у',
    +  'udarr' => '⇅',
    +  'Udblac' => 'Ű',
    +  'udblac' => 'ű',
    +  'udhar' => '⥮',
    +  'ufisht' => '⥾',
    +  'Ufr' => '𝔘',
    +  'ufr' => '𝔲',
    +  'Ugrave' => 'Ù',
    +  'Ugrav' => 'Ù',
    +  'ugrave' => 'ù',
    +  'ugrav' => 'ù',
    +  'uHar' => '⥣',
    +  'uharl' => '↿',
    +  'uharr' => '↾',
    +  'uhblk' => '▀',
    +  'ulcorn' => '⌜',
    +  'ulcorner' => '⌜',
    +  'ulcrop' => '⌏',
    +  'ultri' => '◸',
    +  'Umacr' => 'Ū',
    +  'umacr' => 'ū',
    +  'uml' => '¨',
    +  'um' => '¨',
    +  'UnderBar' => '_',
    +  'UnderBrace' => '⏟',
    +  'UnderBracket' => '⎵',
    +  'UnderParenthesis' => '⏝',
    +  'Union' => '⋃',
    +  'UnionPlus' => '⊎',
    +  'Uogon' => 'Ų',
    +  'uogon' => 'ų',
    +  'Uopf' => '𝕌',
    +  'uopf' => '𝕦',
    +  'UpArrow' => '↑',
    +  'Uparrow' => '⇑',
    +  'uparrow' => '↑',
    +  'UpArrowBar' => '⤒',
    +  'UpArrowDownArrow' => '⇅',
    +  'UpDownArrow' => '↕',
    +  'Updownarrow' => '⇕',
    +  'updownarrow' => '↕',
    +  'UpEquilibrium' => '⥮',
    +  'upharpoonleft' => '↿',
    +  'upharpoonright' => '↾',
    +  'uplus' => '⊎',
    +  'UpperLeftArrow' => '↖',
    +  'UpperRightArrow' => '↗',
    +  'Upsi' => 'ϒ',
    +  'upsi' => 'υ',
    +  'upsih' => 'ϒ',
    +  'Upsilon' => 'Υ',
    +  'upsilon' => 'υ',
    +  'UpTee' => '⊥',
    +  'UpTeeArrow' => '↥',
    +  'upuparrows' => '⇈',
    +  'urcorn' => '⌝',
    +  'urcorner' => '⌝',
    +  'urcrop' => '⌎',
    +  'Uring' => 'Ů',
    +  'uring' => 'ů',
    +  'urtri' => '◹',
    +  'Uscr' => '𝒰',
    +  'uscr' => '𝓊',
    +  'utdot' => '⋰',
    +  'Utilde' => 'Ũ',
    +  'utilde' => 'ũ',
    +  'utri' => '▵',
    +  'utrif' => '▴',
    +  'uuarr' => '⇈',
    +  'Uuml' => 'Ü',
    +  'Uum' => 'Ü',
    +  'uuml' => 'ü',
    +  'uum' => 'ü',
    +  'uwangle' => '⦧',
    +  'vangrt' => '⦜',
    +  'varepsilon' => 'ϵ',
    +  'varkappa' => 'ϰ',
    +  'varnothing' => '∅',
    +  'varphi' => 'ϕ',
    +  'varpi' => 'ϖ',
    +  'varpropto' => '∝',
    +  'vArr' => '⇕',
    +  'varr' => '↕',
    +  'varrho' => 'ϱ',
    +  'varsigma' => 'ς',
    +  'varsubsetneq' => '⊊︀',
    +  'varsubsetneqq' => '⫋︀',
    +  'varsupsetneq' => '⊋︀',
    +  'varsupsetneqq' => '⫌︀',
    +  'vartheta' => 'ϑ',
    +  'vartriangleleft' => '⊲',
    +  'vartriangleright' => '⊳',
    +  'Vbar' => '⫫',
    +  'vBar' => '⫨',
    +  'vBarv' => '⫩',
    +  'Vcy' => 'В',
    +  'vcy' => 'в',
    +  'VDash' => '⊫',
    +  'Vdash' => '⊩',
    +  'vDash' => '⊨',
    +  'vdash' => '⊢',
    +  'Vdashl' => '⫦',
    +  'Vee' => '⋁',
    +  'vee' => '∨',
    +  'veebar' => '⊻',
    +  'veeeq' => '≚',
    +  'vellip' => '⋮',
    +  'Verbar' => '‖',
    +  'verbar' => '|',
    +  'Vert' => '‖',
    +  'vert' => '|',
    +  'VerticalBar' => '∣',
    +  'VerticalLine' => '|',
    +  'VerticalSeparator' => '❘',
    +  'VerticalTilde' => '≀',
    +  'VeryThinSpace' => ' ',
    +  'Vfr' => '𝔙',
    +  'vfr' => '𝔳',
    +  'vltri' => '⊲',
    +  'vnsub' => '⊂⃒',
    +  'vnsup' => '⊃⃒',
    +  'Vopf' => '𝕍',
    +  'vopf' => '𝕧',
    +  'vprop' => '∝',
    +  'vrtri' => '⊳',
    +  'Vscr' => '𝒱',
    +  'vscr' => '𝓋',
    +  'vsubnE' => '⫋︀',
    +  'vsubne' => '⊊︀',
    +  'vsupnE' => '⫌︀',
    +  'vsupne' => '⊋︀',
    +  'Vvdash' => '⊪',
    +  'vzigzag' => '⦚',
    +  'Wcirc' => 'Ŵ',
    +  'wcirc' => 'ŵ',
    +  'wedbar' => '⩟',
    +  'Wedge' => '⋀',
    +  'wedge' => '∧',
    +  'wedgeq' => '≙',
    +  'weierp' => '℘',
    +  'Wfr' => '𝔚',
    +  'wfr' => '𝔴',
    +  'Wopf' => '𝕎',
    +  'wopf' => '𝕨',
    +  'wp' => '℘',
    +  'wr' => '≀',
    +  'wreath' => '≀',
    +  'Wscr' => '𝒲',
    +  'wscr' => '𝓌',
    +  'xcap' => '⋂',
    +  'xcirc' => '◯',
    +  'xcup' => '⋃',
    +  'xdtri' => '▽',
    +  'Xfr' => '𝔛',
    +  'xfr' => '𝔵',
    +  'xhArr' => '⟺',
    +  'xharr' => '⟷',
    +  'Xi' => 'Ξ',
    +  'xi' => 'ξ',
    +  'xlArr' => '⟸',
    +  'xlarr' => '⟵',
    +  'xmap' => '⟼',
    +  'xnis' => '⋻',
    +  'xodot' => '⨀',
    +  'Xopf' => '𝕏',
    +  'xopf' => '𝕩',
    +  'xoplus' => '⨁',
    +  'xotime' => '⨂',
    +  'xrArr' => '⟹',
    +  'xrarr' => '⟶',
    +  'Xscr' => '𝒳',
    +  'xscr' => '𝓍',
    +  'xsqcup' => '⨆',
    +  'xuplus' => '⨄',
    +  'xutri' => '△',
    +  'xvee' => '⋁',
    +  'xwedge' => '⋀',
    +  'Yacute' => 'Ý',
    +  'Yacut' => 'Ý',
    +  'yacute' => 'ý',
    +  'yacut' => 'ý',
    +  'YAcy' => 'Я',
    +  'yacy' => 'я',
    +  'Ycirc' => 'Ŷ',
    +  'ycirc' => 'ŷ',
    +  'Ycy' => 'Ы',
    +  'ycy' => 'ы',
    +  'yen' => '¥',
    +  'ye' => '¥',
    +  'Yfr' => '𝔜',
    +  'yfr' => '𝔶',
    +  'YIcy' => 'Ї',
    +  'yicy' => 'ї',
    +  'Yopf' => '𝕐',
    +  'yopf' => '𝕪',
    +  'Yscr' => '𝒴',
    +  'yscr' => '𝓎',
    +  'YUcy' => 'Ю',
    +  'yucy' => 'ю',
    +  'Yuml' => 'Ÿ',
    +  'yuml' => 'ÿ',
    +  'yum' => 'ÿ',
    +  'Zacute' => 'Ź',
    +  'zacute' => 'ź',
    +  'Zcaron' => 'Ž',
    +  'zcaron' => 'ž',
    +  'Zcy' => 'З',
    +  'zcy' => 'з',
    +  'Zdot' => 'Ż',
    +  'zdot' => 'ż',
    +  'zeetrf' => 'ℨ',
    +  'ZeroWidthSpace' => '​',
    +  'Zeta' => 'Ζ',
    +  'zeta' => 'ζ',
    +  'Zfr' => 'ℨ',
    +  'zfr' => '𝔷',
    +  'ZHcy' => 'Ж',
    +  'zhcy' => 'ж',
    +  'zigrarr' => '⇝',
    +  'Zopf' => 'ℤ',
    +  'zopf' => '𝕫',
    +  'Zscr' => '𝒵',
    +  'zscr' => '𝓏',
    +  'zwj' => '‍',
    +  'zwnj' => '‌',
    +);
    +}
    diff --git a/libraries/html5php/HTML5/Exception.php b/libraries/html5php/HTML5/Exception.php
    new file mode 100644
    index 0000000..aa650a6
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Exception.php
    @@ -0,0 +1,8 @@
    +createDocumentType('html');
    +    //$this->doc = \DOMImplementation::createDocument(NULL, 'html', $dt);
    +    $this->doc = $impl->createDocument(NULL, NULL, $dt);
    +    $this->doc->errors = array();
    +
    +    // $this->current = $this->doc->documentElement;
    +    $this->current = $this->doc; //->documentElement;
    +
    +    // Create a rules engine for tags.
    +    $this->rules = new TreeBuildingRules($this->doc);
    +
    +    if ($isFragment) {
    +      $this->isFragment = TRUE;
    +      $this->insertMode = static::IM_IN_BODY;
    +      $ele = $this->doc->createElement('html');
    +      $this->doc->appendChild($ele);
    +      $this->current = $ele;
    +    }
    +  }
    +
    +  /**
    +   * Get the document.
    +   */
    +  public function document() {
    +    return $this->doc;
    +  }
    +
    +  /**
    +   * Get the DOM fragment for the body.
    +   *
    +   * This returns a DOMNodeList because a fragment may have zero or more
    +   * DOMNodes at its root.
    +   *
    +   * @see http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#concept-frag-parse-context
    +   *
    +   * @return \DOMFragmentDocumentFragment
    +   */
    +  public function fragment() {
    +    $append = $this->doc->documentElement->childNodes;
    +    $frag = $this->doc->createDocumentFragment();
    +
    +    // appendChild() modifies the DOMNodeList, so we
    +    // have to buffer up the items first, then use the
    +    // array buffer and loop twice.
    +    $buffer = array();
    +    foreach ($append as $node) {
    +      $buffer[] = $node;
    +    }
    +
    +    foreach ($buffer as $node) {
    +      $frag->appendChild($node);
    +    }
    +
    +    $frag->errors = $this->doc->errors;
    +    return $frag;
    +  }
    +
    +  /**
    +   * Provide an instruction processor.
    +   *
    +   * This is used for handling Processor Instructions as they are
    +   * inserted. If omitted, PI's are inserted directly into the DOM tree.
    +   */
    +  public function setInstructionProcessor(\HTML5\InstructionProcessor $proc) {
    +    $this->processor = $proc;
    +  }
    +
    +  public function doctype($name, $idType = 0, $id = NULL, $quirks = FALSE) {
    +    // This is used solely for setting quirks mode. Currently we don't
    +    // try to preserve the inbound DT. We convert it to HTML5.
    +    $this->quirks = $quirks;
    +
    +    if ($this->insertMode > static::IM_INITIAL) {
    +      $this->parseError("Illegal placement of DOCTYPE tag. Ignoring: " . $name);
    +      return;
    +    }
    +
    +    $this->insertMode = static::IM_BEFORE_HTML;
    +  }
    +
    +  /**
    +   * Process the start tag.
    +   *
    +   * @todo
    +   *   - XMLNS namespace handling (we need to parse, even if it's not valid)
    +   *   - XLink, MathML and SVG namespace handling
    +   *   - Omission rules: 8.1.2.4 Optional tags
    +   */
    +  public function startTag($name, $attributes = array(), $selfClosing = FALSE) {
    +    // fprintf(STDOUT, $name);
    +    $lname = $this->normalizeTagName($name);
    +
    +    // Make sure we have an html element.
    +    if (!$this->doc->documentElement && $name !== 'html') {
    +      $this->startTag('html');
    +    }
    +
    +    // Set quirks mode if we're at IM_INITIAL with no doctype.
    +    if ($this->insertMode == static::IM_INITIAL) {
    +      $this->quirks = TRUE;
    +      $this->parseError("No DOCTYPE specified.");
    +    }
    +
    +    // SPECIAL TAG HANDLING:
    +    // Spec says do this, and "don't ask."
    +    if ($name == 'image') {
    +      $name = 'img';
    +    }
    +
    +
    +    // Autoclose p tags where appropriate.
    +    if ($this->insertMode >= static::IM_IN_BODY && Elements::isA($name, Elements::AUTOCLOSE_P)) {
    +      $this->autoclose('p');
    +    }
    +
    +    // Set insert mode:
    +    switch ($name) {
    +    case 'html':
    +      $this->insertMode = static::IM_BEFORE_HEAD;
    +      break;
    +    case 'head':
    +      if ($this->insertMode > static::IM_BEFORE_HEAD) {
    +        $this->parseError("Unexpected head tag outside of head context.");
    +      }
    +      else {
    +        $this->insertMode = static::IM_IN_HEAD;
    +      }
    +      break;
    +    case 'body':
    +      $this->insertMode = static::IM_IN_BODY;
    +      break;
    +    case 'svg':
    +      $this->insertMode = static::IM_IN_SVG;
    +      break;
    +    case 'math':
    +      $this->insertMode = static::IM_IN_MATHML;
    +      break;
    +    case 'noscript':
    +      if ($this->insertMode == static::IM_IN_HEAD) {
    +        $this->insertMode = static::IM_IN_HEAD_NOSCRIPT;
    +      }
    +      break;
    +
    +    }
    +
    +    // Special case handling for SVG.
    +    if ($this->insertMode == static::IM_IN_SVG) {
    +      $lname = Elements::normalizeSvgElement($lname);
    +    }
    +
    +    try {
    +      $ele = $this->doc->createElement($lname);
    +    }
    +    catch(\DOMException $e) {
    +      $this->parseError("Illegal tag name: <$lname>. Replaced with .");
    +      $ele = $this->doc->createElement('invalid');
    +    }
    +
    +    foreach ($attributes as $aName => $aVal) {
    +
    +      if ($this->insertMode == static::IM_IN_SVG) {
    +        $aName = Elements::normalizeSvgAttribute($aName);
    +      }
    +      elseif ($this->insertMode == static::IM_IN_MATHML) {
    +        $aName = Elements::normalizeMathMlAttribute($aName);
    +      }
    +
    +      try {
    +        $ele->setAttribute($aName, $aVal);
    +      }
    +      catch(\DOMException $e) {
    +        $this->parseError("Illegal attribute name for tag $name. Ignoring: $aName");
    +        continue;
    +      }
    +
    +      // This is necessary on a non-DTD schema, like HTML5.
    +      if ($aName == 'id') {
    +        $ele->setIdAttribute('id', TRUE);
    +      }
    +    }
    +
    +    // Some elements have special processing rules. Handle those separately.
    +    if ($this->rules->hasRules($name)) {
    +      $this->current = $this->rules->evaluate($ele, $this->current);
    +    }
    +    // Otherwise, it's a standard element.
    +    else {
    +      $this->current->appendChild($ele);
    +
    +      // XXX: Need to handle self-closing tags and unary tags.
    +      if (!Elements::isA($name, Elements::VOID_TAG)) {
    +        $this->current = $ele;
    +      }
    +    }
    +
    +    // This is sort of a last-ditch attempt to correct for cases where no head/body
    +    // elements are provided.
    +    if ($this->insertMode <= static::IM_BEFORE_HEAD && $name != 'head' && $name != 'html') {
    +      $this->insertMode = static::IM_IN_BODY;
    +    }
    +
    +    // Return the element mask, which the tokenizer can then use to set
    +    // various processing rules.
    +    return Elements::element($name);
    +  }
    +
    +  public function endTag($name) {
    +    $lname = $this->normalizeTagName($name);
    +
    +    // Ignore closing tags for unary elements.
    +    if (Elements::isA($name, Elements::VOID_TAG)) {
    +      return;
    +    }
    +
    +    if ($this->insertMode <= static::IM_BEFORE_HTML) {
    +      // 8.2.5.4.2
    +      if (in_array($name, array('html', 'br', 'head', 'title'))) {
    +        $this->startTag('html');
    +        $this->endTag($name);
    +        $this->insertMode = static::IM_BEFORE_HEAD;
    +        return;
    +      }
    +
    +      // Ignore the tag.
    +      $this->parseError("Illegal closing tag at global scope.");
    +      return;
    +    }
    +
    +    // Special case handling for SVG.
    +    if ($this->insertMode == static::IM_IN_SVG) {
    +      $lname = Elements::normalizeSvgElement($lname);
    +    }
    +
    +    // XXX: Not sure whether we need this anymore.
    +    // if ($name != $lname) {
    +    //  return $this->quirksTreeResolver($lname);
    +    //}
    +
    +    // XXX: HTML has no parent. What do we do, though,
    +    // if this element appears in the wrong place?
    +    if ($lname == 'html') {
    +      return;
    +    }
    +
    +    //$this->current = $this->current->parentNode;
    +    if (!$this->autoclose($lname)) {
    +      $this->parseError('Could not find closing tag for ' . $lname);
    +    }
    +
    +    //switch ($this->insertMode) {
    +    switch ($lname) {
    +    case "head":
    +      $this->insertMode = static::IM_AFTER_HEAD;
    +      break;
    +    case "body":
    +      $this->insertMode = static::IM_AFTER_BODY;
    +      break;
    +    case "svg":
    +    case "mathml":
    +      $this->insertMode = static::IM_IN_BODY;
    +      break;
    +    }
    +  }
    +
    +  public function comment($cdata) {
    +    // TODO: Need to handle case where comment appears outside of the HTML tag.
    +    $node = $this->doc->createComment($cdata);
    +    $this->current->appendChild($node);
    +  }
    +
    +  public function text($data) {
    +    // XXX: Hmmm.... should we really be this strict?
    +    if ($this->insertMode < static::IM_IN_HEAD) {
    +      // Per '8.2.5.4.3 The "before head" insertion mode' the characters
    +      // " \t\n\r\f" should be ignored but no mention of a parse error. This is
    +      // practical as most documents contain these characters. Other text is not
    +      // expected here so recording a parse error is necessary.
    +      $dataTmp = trim($data, " \t\n\r\f");
    +      if (!empty($dataTmp)) {
    +        //fprintf(STDOUT, "Unexpected insert mode: %d", $this->insertMode);
    +        $this->parseError("Unexpected text. Ignoring: " . $dataTmp);
    +      }
    +      return;
    +    }
    +    //fprintf(STDOUT, "Appending text %s.", $data);
    +    $node = $this->doc->createTextNode($data);
    +    $this->current->appendChild($node);
    +  }
    +
    +  public function eof() {
    +    // If the $current isn't the $root, do we need to do anything?
    +  }
    +
    +  public function parseError($msg, $line = 0, $col = 0) {
    +    $this->doc->errors[] = sprintf("Line %d, Col %d: %s", $line, $col, $msg);
    +  }
    +
    +  public function cdata($data) {
    +    $node = $this->doc->createCDATASection($data);
    +    $this->current->appendChild($node);
    +  }
    +
    +  public function processingInstruction($name, $data = NULL) {
    +    // XXX: Ignore initial XML declaration, per the spec.
    +    if ($this->insertMode == static::IM_INITIAL && 'xml' == strtolower($name)) {
    +      return;
    +    }
    +
    +    // Important: The processor may modify the current DOM tree however
    +    // it sees fit.
    +    if (isset($this->processor)) {
    +      $res = $this->processor->process($this->current, $name, $data);
    +      if (!empty($res)) {
    +        $this->current = $res;
    +      }
    +      return;
    +    }
    +
    +    // Otherwise, this is just a dumb PI element.
    +    $node = $this->doc->createProcessingInstruction($name, $data);
    +
    +    $this->current->appendChild($node);
    +  }
    +
    +  // ==========================================================================
    +  // UTILITIES
    +  // ==========================================================================
    +
    +  /**
    +   * Apply normalization rules to a tag name.
    +   *
    +   * See sections 2.9 and 8.1.2.
    +   *
    +   * @param string $name
    +   *   The tag name.
    +   * @return string
    +   *   The normalized tag name.
    +   */
    +  protected function normalizeTagName($name) {
    +    /* Section 2.9 suggests that we should not do this.
    +    if (strpos($name, ':') !== FALSE) {
    +      // We know from the grammar that there must be at least one other
    +      // char besides :, since : is not a legal tag start.
    +      $parts = explode(':', $name);
    +      return array_pop($parts);
    +    }
    +     */
    +
    +    return $name;
    +  }
    +
    +  protected function quirksTreeResolver($name) {
    +    throw new \Exception("Not implemented.");
    +
    +  }
    +
    +  /**
    +   * Automatically climb the tree and close the closest node with the matching $tag.
    +   */
    +  protected function autoclose($tag) {
    +    $working = $this->current;
    +    do {
    +      if ($working->nodeType != XML_ELEMENT_NODE) {
    +        return FALSE;
    +      }
    +      if ($working->tagName == $tag) {
    +        $this->current = $working->parentNode;
    +        return TRUE;
    +      }
    +    } while ($working = $working->parentNode);
    +    return FALSE;
    +
    +  }
    +
    +  /**
    +   * Checks if the given tagname is an ancestor of the present candidate.
    +   *
    +   * If $this->current or anything above $this->current matches the given tag
    +   * name, this returns TRUE.
    +   */
    +  protected function isAncestor($tagname) {
    +    $candidate = $this->current;
    +    while ($candidate->nodeType === XML_ELEMENT_NODE) {
    +      if ($candidate->tagName == $tagname) {
    +        return TRUE;
    +      }
    +      $candidate = $candidate->parentNode;
    +    }
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Returns TRUE if the immediate parent element is of the given tagname.
    +   */
    +  protected function isParent($tagname) {
    +    return $this->current->tagName == $tagname;
    +  }
    +
    +
    +}
    diff --git a/libraries/html5php/HTML5/Parser/EventHandler.php b/libraries/html5php/HTML5/Parser/EventHandler.php
    new file mode 100644
    index 0000000..4034938
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/EventHandler.php
    @@ -0,0 +1,111 @@
    +)
    +   * @return numeric
    +   *   One of the Tokenizer::TEXTMODE_* constants.
    +   */
    +  public function startTag($name, $attributes = array(), $selfClosing = FALSE);
    +  /**
    +   * An end-tag.
    +   */
    +  public function endTag($name);
    +  /**
    +   * A comment section (unparsed character data).
    +   */
    +  public function comment($cdata);
    +  /**
    +   * A unit of parsed character data.
    +   *
    +   * Entities in this text are *already decoded*.
    +   */
    +  public function text($cdata);
    +  /**
    +   * Indicates that the document has been entirely processed.
    +   */
    +  public function eof();
    +  /**
    +   * Emitted when the parser encounters an error condition.
    +   */
    +  public function parseError($msg, $line, $col);
    +
    +  /**
    +   * A CDATA section.
    +   *
    +   * @param string $data
    +   *   The unparsed character data.
    +   */
    +  public function cdata($data);
    +  /**
    +   * This is a holdover from the XML spec.
    +   *
    +   * While user agents don't get PIs, server-side does.
    +   *
    +   * @param string $name
    +   *   The name of the processor (e.g. 'php').
    +   * @param string $data
    +   *   The unparsed data.
    +   */
    +  public function processingInstruction($name, $data = NULL);
    +}
    diff --git a/libraries/html5php/HTML5/Parser/FileInputStream.php b/libraries/html5php/HTML5/Parser/FileInputStream.php
    new file mode 100644
    index 0000000..c1bb128
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/FileInputStream.php
    @@ -0,0 +1,35 @@
    +is = $input;
    +  }
    +
    +  /**
    +   * Get the current position.
    +   *
    +   * @return int
    +   *   The current intiger byte position.
    +   */
    +  public function position() {
    +    return $this->is->key();
    +  }
    +
    +  /**
    +   * Take a peek at the next character in the data.
    +   *
    +   * @return string
    +   *   The next character.
    +   */
    +  public function peek() {
    +    return $this->is->peek();
    +  }
    +
    +  /**
    +   * Get the next character.
    +   * 
    +   * Note: This advances the pointer.
    +   *
    +   * @return string
    +   *   The next character.
    +   */
    +  public function next() {
    +    $this->is->next();
    +    if ($this->is->valid()) {
    +      if ($this->debug) fprintf(STDOUT, "> %s\n", $this->is->current());
    +      return $this->is->current();
    +    }
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Get the current character.
    +   *
    +   * Note, this does not advance the pointer.
    +   * 
    +   * @return string
    +   *   The current character.
    +   */
    +  public function current() {
    +    if ($this->is->valid()) {
    +      return $this->is->current();
    +    }
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Silently consume N chars.
    +   */
    +  public function consume($count = 1) {
    +    for ($i = 0; $i < $count; ++$i) {
    +      $this->next();
    +    }
    +  }
    +
    +  /**
    +   * Unconsume some of the data. This moves the data pointer backwards.
    +   *
    +   * @param  int $howMany
    +   *   The number of characters to move the pointer back.
    +   */
    +  public function unconsume($howMany = 1) {
    +    $this->is->unconsume($howMany);
    +  }
    +
    +  /**
    +   * Get the next group of that contains hex characters.
    +   *
    +   * Note, along with getting the characters the pointer in the data will be
    +   * moved as well.
    +   * 
    +   * @return string
    +   *   The next group that is hex characters.
    +   */
    +  public function getHex() {
    +    return $this->is->charsWhile(static::CHARS_HEX);
    +  }
    +
    +  /**
    +   * Get the next group of characters that are ASCII Alpha characters.
    +   *
    +   * Note, along with getting the characters the pointer in the data will be
    +   * moved as well.
    +   * 
    +   * @return string
    +   *   The next group of ASCII alpha characters.
    +   */
    +  public function getAsciiAlpha() {
    +    return $this->is->charsWhile(static::CHARS_ALPHA);
    +  }
    +
    +  /**
    +   * Get the next group of characters that are ASCII Alpha characters and numbers.
    +   *
    +   * Note, along with getting the characters the pointer in the data will be
    +   * moved as well.
    +   * 
    +   * @return string
    +   *   The next group of ASCII alpha characters and numbers.
    +   */
    +  public function getAsciiAlphaNum() {
    +    return $this->is->charsWhile(static::CHARS_ALNUM);
    +  }
    +
    +  /**
    +   * Get the next group of numbers.
    +   *
    +   * Note, along with getting the characters the pointer in the data will be
    +   * moved as well.
    +   * 
    +   * @return string
    +   *   The next group of numbers.
    +   */
    +  public function getNumeric() {
    +    return $this->is->charsWhile('0123456789');
    +  }
    +
    +  /**
    +   * Consume whitespace.
    +   *
    +   * Whitespace in HTML5 is: formfeed, tab, newline, space.
    +   */
    +  public function whitespace() {
    +    return $this->is->charsWhile("\n\t\f ");
    +  }
    +
    +  /**
    +   * Returns the current line that is being consumed.
    +   *
    +   * @return int
    +   *   The current line number.
    +   */
    +  public function currentLine() {
    +    return $this->is->currentLine();
    +  }
    +
    +  /**
    +   * Read chars until something in the mask is encountered.
    +   */
    +  public function charsUntil($mask) {
    +    return $this->is->charsUntil($mask);
    +  }
    +  /**
    +   * Read chars as long as the mask matches.
    +   */
    +  public function charsWhile($mask) {
    +    return $this->is->charsWhile($mask);
    +  }
    +
    +  /**
    +   * Returns the current column of the current line that the tokenizer is at.
    +   *
    +   * Newlines are column 0. The first char after a newline is column 1.
    +   *
    +   * @return int
    +   *   The column number.
    +   */
    +  public function columnOffset() {
    +    return $this->is->columnOffset();
    +  }
    +
    +  /**
    +   * Get all characters until EOF.
    +   *
    +   * This consumes characters until the EOF.
    +   *
    +   * @return int
    +   *   The number of characters remaining.
    +   */
    +  public function remainingChars() {
    +    return $this->is->remainingChars();
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Parser/StringInputStream.php b/libraries/html5php/HTML5/Parser/StringInputStream.php
    new file mode 100644
    index 0000000..ca5fee0
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/StringInputStream.php
    @@ -0,0 +1,315 @@
    +
    +
    +Permission is hereby granted, free of charge, to any person obtaining a
    +copy of this software and associated documentation files (the
    +"Software"), to deal in the Software without restriction, including
    +without limitation the rights to use, copy, modify, merge, publish,
    +distribute, sublicense, and/or sell copies of the Software, and to
    +permit persons to whom the Software is furnished to do so, subject to
    +the following conditions:
    +
    +The above copyright notice and this permission notice shall be included
    +in all copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    +
    +*/
    +
    +// Some conventions:
    +// - /* */ indicates verbatim text from the HTML 5 specification
    +//   MPB: Not sure which version of the spec. Moving from HTML5lib to 
    +//   HTML5-PHP, I have been using this version:
    +//   http://www.w3.org/TR/2012/CR-html5-20121217/Overview.html#contents
    +//
    +// - // indicates regular comments
    +
    +class StringInputStream implements InputStream {
    +  /**
    +   * The string data we're parsing.
    +   */
    +  private $data;
    +
    +  /**
    +   * The current integer byte position we are in $data
    +   */
    +  private $char;
    +
    +  /**
    +   * Length of $data; when $char === $data, we are at the end-of-file.
    +   */
    +  private $EOF;
    +
    +  /**
    +   * Parse errors.
    +   */
    +  public $errors = array();
    +
    +  /**
    +   * Create a new InputStream wrapper.
    +   *
    +   * @param $data Data to parse
    +   */
    +  public function __construct($data, $encoding = 'UTF-8', $debug = '') {
    +
    +    $data = UTF8Utils::convertToUTF8($data, $encoding);
    +    if ($debug) fprintf(STDOUT, $debug, $data, strlen($data));
    +
    +    // There is good reason to question whether it makes sense to
    +    // do this here, since most of these checks are done during
    +    // parsing, and since this check doesn't actually *do* anything.
    +    $this->errors = UTF8Utils::checkForIllegalCodepoints($data);
    +    //if (!empty($e)) {
    +    //  throw new ParseError("UTF-8 encoding issues: " . implode(', ', $e));
    +    //}
    +
    +    $data = $this->replaceLinefeeds($data);
    +
    +    $this->data = $data;
    +    $this->char = 0;
    +    $this->EOF  = strlen($data);
    +  }
    +
    +  /**
    +   * Replace linefeed characters according to the spec.
    +   */
    +  protected function replaceLinefeeds($data) {
    +    /* U+000D CARRIAGE RETURN (CR) characters and U+000A LINE FEED
    +    (LF) characters are treated specially. Any CR characters
    +    that are followed by LF characters must be removed, and any
    +    CR characters not followed by LF characters must be converted
    +    to LF characters. Thus, newlines in HTML DOMs are represented
    +    by LF characters, and there are never any CR characters in the
    +    input to the tokenization stage. */
    +    $crlfTable = array(
    +        "\0" =>  "\xEF\xBF\xBD",
    +        "\r\n" => "\n",
    +        "\r" => "\n",
    +    );
    +    return strtr($data, $crlfTable);
    +  }
    +
    +  /**
    +   * Returns the current line that the tokenizer is at.
    +   */
    +  public function currentLine() {
    +    if (empty($this->EOF) || $this->char == 0) {
    +      return 1;
    +    }
    +    // Add one to $this->char because we want the number for the next
    +    // byte to be processed.
    +    return substr_count($this->data, "\n", 0, min($this->char, $this->EOF)) + 1;
    +  }
    +
    +  /**
    +   * @deprecated
    +   */
    +  public function getCurrentLine() {
    +    return currentLine();
    +  }
    +
    +  /**
    +   * Returns the current column of the current line that the tokenizer is at.
    +   *
    +   * Newlines are column 0. The first char after a newline is column 1.
    +   *
    +   * @return int
    +   *   The column number.
    +   */
    +  public function columnOffset() {
    +
    +    // Short circuit for the first char.
    +    if ($this->char == 0) {
    +      return 0;
    +    }
    +    // strrpos is weird, and the offset needs to be negative for what we
    +    // want (i.e., the last \n before $this->char). This needs to not have
    +    // one (to make it point to the next character, the one we want the
    +    // position of) added to it because strrpos's behaviour includes the
    +    // final offset byte.
    +    $backwardFrom = $this->char - 1 - strlen($this->data);
    +    $lastLine = strrpos($this->data, "\n", $backwardFrom);
    +
    +    // However, for here we want the length up until the next byte to be
    +    // processed, so add one to the current byte ($this->char).
    +    if ($lastLine !== FALSE) {
    +      $findLengthOf = substr($this->data, $lastLine + 1, $this->char - 1 - $lastLine);
    +    }
    +    else {
    +      // After a newline.
    +      $findLengthOf = substr($this->data, 0, $this->char);
    +    }
    +
    +    return UTF8Utils::countChars($findLengthOf);
    +  }
    +
    +  /**
    +   * @deprecated
    +   */
    +  public function getColumnOffset() {
    +    return $this->columnOffset();
    +  }
    +
    +  /**
    +   * Get the current character.
    +   * 
    +   * @return string
    +   *   The current character.
    +   */
    +  public function current() {
    +    return $this->data[$this->char];
    +  }
    +
    +  /**
    +   * Advance the pointer. This is part of the Iterator interface.
    +   */
    +  public function next() {
    +    $this->char++;
    +  }
    +
    +  /**
    +   * Rewind to the start of the string.
    +   */
    +  public function rewind() {
    +    $this->char = 0;
    +  }
    +
    +  /**
    +   * Is the current pointer location valid.
    +   *
    +   * @return bool
    +   *   Is the current pointer location valid.
    +   */
    +  public function valid() {
    +    if ($this->char < $this->EOF) {
    +      return TRUE;
    +    }
    +
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Get all characters until EOF.
    +   *
    +   * This reads to the end of the file, and sets the read marker at the 
    +   * end of the file.
    +   *
    +   * @note This performs bounds checking
    +   *
    +   * @return string
    +   *   Returns the remaining text. If called when the InputStream is
    +   *   already exhausted, it returns an empty string.
    +   */
    +  public function remainingChars() {
    +    if ($this->char < $this->EOF) {
    +      $data = substr($this->data, $this->char);
    +      $this->char = $this->EOF;
    +      return $data;
    +    }
    +    return '';//FALSE;
    +  }
    +
    +  /**
    +   * Read to a particular match (or until $max bytes are consumed).
    +   *
    +   * This operates on byte sequences, not characters.
    +   *
    +   * Matches as far as possible until we reach a certain set of bytes
    +   * and returns the matched substring.
    +   *
    +   * @param string $bytes
    +   *   Bytes to match.
    +   * @param int $max
    +   *   Maximum number of bytes to scan.
    +   * @return mixed
    +   *   Index or FALSE if no match is found. You should use strong 
    +   *   equality when checking the result, since index could be 0.
    +   */
    +  public function charsUntil($bytes, $max = null) {
    +    if ($this->char >= $this->EOF) {
    +      return FALSE;
    +    }
    +
    +    if ($max === 0 || $max) {
    +      $len = strcspn($this->data, $bytes, $this->char, $max);
    +    }
    +    else {
    +      $len = strcspn($this->data, $bytes, $this->char);
    +    }
    +
    +    $string = (string) substr($this->data, $this->char, $len);
    +    $this->char += $len;
    +    return $string;
    +  }
    +
    +  /**
    +   * Returns the string so long as $bytes matches.
    +   *
    +   * Matches as far as possible with a certain set of bytes
    +   * and returns the matched substring.
    +   *
    +   * @param string $bytes
    +   *   A mask of bytes to match. If ANY byte in this mask matches the 
    +   *   current char, the pointer advances and the char is part of the 
    +   *   substring.
    +   * @param int $max
    +   *   The max number of chars to read.
    +   */
    +  public function charsWhile($bytes, $max = null) {
    +    if ($this->char >= $this->EOF) {
    +      return FALSE;
    +    }
    +
    +    if ($max === 0 || $max) {
    +      $len = strspn($this->data, $bytes, $this->char, $max);
    +    }
    +    else {
    +      $len = strspn($this->data, $bytes, $this->char);
    +    }
    +    $string = (string) substr($this->data, $this->char, $len);
    +    $this->char += $len;
    +    return $string;
    +  }
    +
    +  /**
    +   * Unconsume characters.
    +   *
    +   * @param int $howMany
    +   *   The number of characters to unconsume.
    +   */
    +  public function unconsume($howMany = 1) {
    +    if (($this->char - $howMany) >= 0) {
    +      $this->char = $this->char - $howMany;
    +    }
    +  }
    +
    +  /**
    +   * Look ahead without moving cursor.
    +   */
    +  public function peek() {
    +    if (($this->char + 1) <= $this->EOF) {
    +      return $this->data[$this->char + 1];
    +    }
    +
    +    return FALSE;
    +  }
    +
    +  public function key() {
    +    return $this->char;
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Parser/Tokenizer.php b/libraries/html5php/HTML5/Parser/Tokenizer.php
    new file mode 100644
    index 0000000..a78cf23
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/Tokenizer.php
    @@ -0,0 +1,1058 @@
    +scanner = $scanner;
    +    $this->events = $eventHandler;
    +  }
    +
    +  /**
    +   * Begin parsing.
    +   *
    +   * This will begin scanning the document, tokenizing as it goes.
    +   * Tokens are emitted into the event handler.
    +   *
    +   * Tokenizing will continue until the document is completely
    +   * read. Errors are emitted into the event handler, but
    +   * the parser will attempt to continue parsing until the
    +   * entire input stream is read.
    +   */
    +  public function parse() {
    +    $p = 0;
    +    do {
    +      $p = $this->scanner->position();
    +      $this->consumeData();
    +
    +      // FIXME: Add infinite loop protection.
    +    }
    +    while ($this->carryOn);
    +  }
    +
    +  /**
    +   * Set the text mode for the character data reader.
    +   *
    +   * HTML5 defines three different modes for reading text:
    +   * - Normal: Read until a tag is encountered.
    +   * - RCDATA: Read until a tag is encountered, but skip a few otherwise-
    +   *   special characters.
    +   * - Raw: Read until a special closing tag is encountered (viz. pre, script)
    +   *
    +   * This allows those modes to be set.
    +   *
    +   * Normally, setting is done by the event handler via a special return code on
    +   * startTag(), but it can also be set manually using this function.
    +   *
    +   * @param integer $textmode
    +   *   One of Elements::TEXT_*
    +   * @param string $untilTag
    +   *   The tag that should stop RAW or RCDATA mode. Normal mode does not
    +   *   use this indicator.
    +   */
    +  public function setTextMode($textmode, $untilTag = NULL) {
    +    $this->textMode = $textmode & (Elements::TEXT_RAW | Elements::TEXT_RCDATA);
    +    $this->untilTag = $untilTag;
    +  }
    +
    +  /**
    +   * Consume a character and make a move.
    +   * HTML5 8.2.4.1
    +   */
    +  protected function consumeData() {
    +    // Character Ref
    +    /*
    +    $this->characterReference() ||
    +      $this->tagOpen() ||
    +      $this->eof() ||
    +      $this->characterData();
    +     */
    +
    +    $this->characterReference();
    +    $this->tagOpen();
    +    $this->eof();
    +    $this->characterData();
    +
    +
    +    return $this->carryOn;
    +  }
    +
    +  /**
    +   * Parse anything that looks like character data.
    +   *
    +   * Different rules apply based on the current text mode.
    +   *
    +   * @see Elements::TEXT_RAW Elements::TEXT_RCDATA.
    +   */
    +  protected function characterData() {
    +    if ($this->scanner->current() === FALSE) {
    +      return FALSE;
    +    }
    +    switch ($this->textMode) {
    +    case Elements::TEXT_RAW:
    +    case Elements::TEXT_RCDATA:
    +      return $this->rawText();
    +    default:
    +      $tok = $this->scanner->current();
    +      if (strspn($tok, "<&")) {
    +        return FALSE;
    +      }
    +      return $this->text();
    +    }
    +  }
    +
    +  /**
    +   * This buffers the current token as character data.
    +   */
    +  protected function text() {
    +    $tok = $this->scanner->current();
    +
    +    // This should never happen...
    +    if ($tok === FALSE) {
    +      return FALSE;
    +    }
    +    // Null
    +    if ($tok === "\00") {
    +      $this->parseError("Received NULL character.");
    +    }
    +    // fprintf(STDOUT, "Writing '%s'", $tok);
    +    $this->buffer($tok);
    +    $this->scanner->next();
    +    return TRUE;
    +  }
    +
    +  /**
    +   * Read text in RAW mode.
    +   */
    +  protected function rawText() {
    +    if (is_null($this->untilTag)) {
    +      return $this->text();
    +    }
    +    $sequence = 'untilTag . '>';
    +    $txt =  $this->readUntilSequence($sequence);
    +    $this->events->text($txt);
    +    $this->setTextMode(0);
    +    return $this->endTag();
    +  }
    +
    +  /**
    +   * If the document is read, emit an EOF event.
    +   */
    +  protected function eof() {
    +    if ($this->scanner->current() === FALSE) {
    +      //fprintf(STDOUT, "EOF");
    +      $this->flushBuffer();
    +      $this->events->eof();
    +      $this->carryOn = FALSE;
    +      return TRUE;
    +    }
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Handle character references (aka entities).
    +   *
    +   * This version is specific to PCDATA, as it buffers data into the
    +   * text buffer. For a generic version, see decodeCharacterReference().
    +   *
    +   * HTML5 8.2.4.2
    +   */
    +  protected function characterReference() {
    +    $ref = $this->decodeCharacterReference();
    +    if ($ref !== FALSE) {
    +      $this->buffer($ref);
    +      return TRUE;
    +    }
    +    return FALSE;
    +  }
    +
    +
    +  /**
    +   * Emit a tagStart event on encountering a tag.
    +   *
    +   * 8.2.4.8
    +   */
    +  protected function tagOpen() {
    +    if ($this->scanner->current() != '<') {
    +      return FALSE;
    +    }
    +
    +    // Any buffered text data can go out now.
    +    $this->flushBuffer();
    +
    +    $this->scanner->next();
    +
    +    return $this->markupDeclaration() ||
    +      $this->endTag() ||
    +      $this->processingInstruction() ||
    +      $this->tagName() ||
    +      // This always returns false.
    +      $this->parseError("Illegal tag opening") ||
    +      $this->characterData();
    +  }
    +
    +  /**
    +   * Look for markup.
    +   */
    +  protected function markupDeclaration() {
    +    if ($this->scanner->current() != '!') {
    +      return FALSE;
    +    }
    +
    +    $tok = $this->scanner->next();
    +
    +    // Comment:
    +    if ($tok == '-' && $this->scanner->peek() == '-') {
    +      $this->scanner->next(); // Consume the other '-'
    +      $this->scanner->next(); // Next char.
    +      return $this->comment();
    +    }
    +    // Doctype
    +    elseif($tok == 'D' || $tok == 'd') {
    +      return $this->doctype('');
    +    }
    +    // CDATA section
    +    elseif($tok == '[') {
    +      return $this->cdataSection();
    +    }
    +
    +    // FINISH
    +    $this->parseError("Expected . Emit an empty comment because 8.2.4.46 says to.
    +    if ($tok == '>') {
    +      // Parse error. Emit the comment token.
    +      $this->parseError("Expected comment data, got '>'");
    +      $this->events->comment('');
    +      $this->scanner->next();
    +      return TRUE;
    +    }
    +
    +    // Replace NULL with the replacement char.
    +    if ($tok == "\0") {
    +      $tok = UTF8Utils::FFFD;
    +    }
    +    while (!$this->isCommentEnd()) {
    +      $comment .= $tok;
    +      $tok = $this->scanner->next();
    +    }
    +
    +    $this->events->comment($comment);
    +    $this->scanner->next();
    +    return TRUE;
    +  }
    +
    +  /**
    +   * Check if the scanner has reached the end of a comment.
    +   */
    +  protected function isCommentEnd() {
    +    // EOF
    +    if($this->scanner->current() === FALSE) {
    +      // Hit the end.
    +      $this->parseError("Unexpected EOF in a comment.");
    +      return TRUE;
    +    }
    +
    +    // If it doesn't start with -, not the end.
    +    if($this->scanner->current() != '-') {
    +      return FALSE;
    +    }
    +
    +
    +    // Advance one, and test for '->'
    +    if ($this->scanner->next() == '-'
    +        && $this->scanner->peek() == '>') {
    +      $this->scanner->next(); // Consume the last '>'
    +      return TRUE;
    +    }
    +    // Unread '-';
    +    $this->scanner->unconsume(1);
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Parse a DOCTYPE.
    +   *
    +   * Parse a DOCTYPE declaration. This method has strong bearing on whether or
    +   * not Quirksmode is enabled on the event handler.
    +   *
    +   * @todo This method is a little long. Should probably refactor.
    +   */
    +  protected function doctype() {
    +    if (strcasecmp($this->scanner->current(), 'D')) {
    +      return FALSE;
    +    }
    +    // Check that string is DOCTYPE.
    +    $chars = $this->scanner->charsWhile("DOCTYPEdoctype");
    +    if (strcasecmp($chars, 'DOCTYPE')) {
    +      $this->parseError('Expected DOCTYPE, got %s', $chars);
    +      return $this->bogusComment('scanner->whitespace();
    +    $tok = $this->scanner->current();
    +
    +    // EOF: die.
    +    if ($tok === FALSE) {
    +      $this->events->doctype('html5',EventHandler::DOCTYPE_NONE,'', TRUE);
    +      return $this->eof();
    +    }
    +
    +    $doctypeName = '';
    +
    +    // NULL char: convert.
    +    if ($tok === "\0") {
    +      $this->parseError("Unexpected NULL character in DOCTYPE.");
    +      $doctypeName .= UTF8::FFFD;
    +      $tok = $this->scanner->next();
    +    }
    +
    +    $stop = " \n\f>";
    +    $doctypeName = $this->scanner->charsUntil($stop);
    +    // Lowercase ASCII, replace \0 with FFFD
    +    $doctypeName = strtolower(strtr($doctypeName, "\0", UTF8Utils::FFFD));
    +
    +    $tok = $this->scanner->current();
    +
    +    // If FALSE, emit a parse error, DOCTYPE, and return.
    +    if ($tok === FALSE) {
    +      $this->parseError('Unexpected EOF in DOCTYPE declaration.');
    +      $this->events->doctype($doctypeName, EventHandler::DOCTYPE_NONE, NULL, TRUE);
    +      return TRUE;
    +    }
    +
    +    // Short DOCTYPE, like 
    +    if ($tok == '>') {
    +      // DOCTYPE without a name.
    +      if (strlen($doctypeName) == 0) {
    +        $this->parseError("Expected a DOCTYPE name. Got nothing.");
    +        $this->events->doctype($doctypeName, 0, NULL, TRUE);
    +        $this->scanner->next();
    +        return TRUE;
    +      }
    +      $this->events->doctype($doctypeName);
    +      $this->scanner->next();
    +      return TRUE;
    +    }
    +    $this->scanner->whitespace();
    +
    +    $pub = strtoupper($this->scanner->getAsciiAlpha());
    +    $white = strlen($this->scanner->whitespace());
    +    $tok = $this->scanner->current();
    +
    +    // Get ID, and flag it as pub or system.
    +    if (($pub == 'PUBLIC' || $pub == 'SYSTEM') && $white > 0) {
    +      // Get the sys ID.
    +      $type = $pub == 'PUBLIC' ? EventHandler::DOCTYPE_PUBLIC : EventHandler::DOCTYPE_SYSTEM;
    +      $id = $this->quotedString("\0>");
    +      if ($id === FALSE) {
    +        $this->events->doctype($doctypeName, $type, $pub, FALSE);
    +        return FALSE;
    +      }
    +
    +      // Premature EOF.
    +      if ($this->scanner->current() === FALSE) {
    +        $this->parseError("Unexpected EOF in DOCTYPE");
    +        $this->events->doctype($doctypeName, $type, $id, TRUE);
    +        return TRUE;
    +      }
    +
    +      // Well-formed complete DOCTYPE.
    +      $this->scanner->whitespace();
    +      if ($this->scanner->current() == '>') {
    +        $this->events->doctype($doctypeName, $type, $id, FALSE);
    +        $this->scanner->next();
    +        return TRUE;
    +      }
    +
    +      // If we get here, we have scanner->charsUntil(">");
    +      $this->parseError("Malformed DOCTYPE.");
    +      $this->events->doctype($doctypeName, $type, $id, TRUE);
    +      $this->scanner->next();
    +      return TRUE;
    +    }
    +
    +    // Else it's a bogus DOCTYPE.
    +    // Consume to > and trash.
    +    $this->scanner->charsUntil('>');
    +
    +    $this->parseError("Expected PUBLIC or SYSTEM. Got %s.", $pub);
    +    $this->events->doctype($doctypeName, 0, NULL, TRUE);
    +    $this->scanner->next();
    +    return TRUE;
    +
    +  }
    +
    +  /**
    +   * Utility for reading a quoted string.
    +   *
    +   * @param string $stopchars
    +   *   Characters (in addition to a close-quote) that should stop the string.
    +   *   E.g. sometimes '>' is higher precedence than '"' or "'".
    +   * @return mixed
    +   *   String if one is found (quotations omitted)
    +   */
    +  protected function quotedString($stopchars) {
    +    $tok = $this->scanner->current();
    +    if ($tok == '"' || $tok == "'") {
    +      $this->scanner->next();
    +      $ret = $this->scanner->charsUntil($tok . $stopchars);
    +      if ($this->scanner->current() == $tok) {
    +        $this->scanner->next();
    +      }
    +      else {
    +        // Parse error because no close quote.
    +        $this->parseError("Expected %s, got %s", $tok, $this->scanner->current());
    +      }
    +      return $ret;
    +    }
    +    return FALSE;
    +  }
    +
    +
    +  /**
    +   * Handle a CDATA section.
    +   */
    +  protected function cdataSection() {
    +    if ($this->scanner->current() != '[') {
    +      return FALSE;
    +    }
    +    $cdata = '';
    +    $this->scanner->next();
    +
    +    $chars = $this->scanner->charsWhile('CDAT');
    +    if ($chars != 'CDATA' || $this->scanner->current() != '[') {
    +      $this->parseError('Expected [CDATA[, got %s', $chars);
    +      return $this->bogusComment('scanner->next();
    +    do {
    +      if ($tok === FALSE) {
    +        $this->parseError('Unexpected EOF inside CDATA.');
    +        $this->bogusComment('scanner->next();
    +    }
    +    while (!$this->sequenceMatches(']]>'));
    +
    +    // Consume ]]>
    +    $this->scanner->consume(3);
    +
    +    $this->events->cdata($cdata);
    +    return TRUE;
    +
    +  }
    +
    +  // ================================================================
    +  // Non-HTML5
    +  // ================================================================
    +  /**
    +   * Handle a processing instruction.
    +   *
    +   * XML processing instructions are supposed to be ignored in HTML5,
    +   * treated as "bogus comments". However, since we're not a user
    +   * agent, we allow them. We consume until ?> and then issue a
    +   * EventListener::processingInstruction() event.
    +   */
    +  protected function processingInstruction() {
    +    if ($this->scanner->current() != '?') {
    +      return FALSE;
    +    }
    +
    +    $tok = $this->scanner->next();
    +    $procName = $this->scanner->getAsciiAlpha();
    +    $white = strlen($this->scanner->whitespace());
    +
    +    // If not a PI, send to bogusComment.
    +    if (strlen($procName) == 0 || $white == 0 || $this->scanner->current() == FALSE) {
    +      $this->parseError("Expected processing instruction name, got $tok");
    +      $this->bogusComment('.
    +    while (!($this->scanner->current() == '?' && $this->scanner->peek() == '>')) {
    +      $data .= $this->scanner->current();
    +
    +      $tok = $this->scanner->next();
    +      if ($tok === FALSE) {
    +        $this->parseError("Unexpected EOF in processing instruction.");
    +        $this->events->processingInstruction($procName, $data);
    +        return TRUE;
    +      }
    +
    +    }
    +
    +    $this->scanner->next(); // >
    +    $this->scanner->next(); // Next token.
    +    $this->events->processingInstruction($procName, $data);
    +    return TRUE;
    +  }
    +
    +
    +  // ================================================================
    +  // UTILITY FUNCTIONS
    +  // ================================================================
    +
    +  /**
    +   * Read from the input stream until we get to the desired sequene
    +   * or hit the end of the input stream.
    +   */
    +  protected function readUntilSequence($sequence) {
    +    $buffer = '';
    +
    +    // Optimization for reading larger blocks faster.
    +    $first = substr($sequence, 0, 1);
    +    while ($this->scanner->current() !== FALSE) {
    +      $buffer .= $this->scanner->charsUntil($first);
    +
    +      // Stop as soon as we hit the stopping condition.
    +      if ($this->sequenceMatches($sequence) || $this->sequenceMatches(strtoupper($sequence))) {
    +        return $buffer;
    +      }
    +      $buffer .= $this->scanner->current();
    +      $this->scanner->next();
    +    }
    +
    +    // If we get here, we hit the EOF.
    +    $this->parseError("Unexpected EOF during text read.");
    +    return $buffer;
    +  }
    +
    +  /**
    +   * Check if upcomming chars match the given sequence.
    +   *
    +   * This will read the stream for the $sequence. If it's
    +   * found, this will return TRUE. If not, return FALSE.
    +   * Since this unconsumes any chars it reads, the caller
    +   * will still need to read the next sequence, even if
    +   * this returns TRUE.
    +   *
    +   * Example: $this->sequenceMatches('') will
    +   * see if the input stream is at the start of a
    +   * '' string.
    +   */
    +  protected function sequenceMatches($sequence) {
    +    $len = strlen($sequence);
    +    $buffer = '';
    +    for ($i = 0; $i < $len; ++$i) {
    +      $buffer .= $this->scanner->current();
    +
    +      // EOF. Rewind and let the caller handle it.
    +      if ($this->scanner->current() === FALSE) {
    +        $this->scanner->unconsume($i);
    +        return FALSE;
    +      }
    +      $this->scanner->next();
    +    }
    +
    +    $this->scanner->unconsume($len);
    +    return $buffer == $sequence;
    +
    +  }
    +
    +  /**
    +   * Send a TEXT event with the contents of the text buffer.
    +   *
    +   * This emits an EventHandler::text() event with the current contents of the
    +   * temporary text buffer. (The buffer is used to group as much PCDATA
    +   * as we can instead of emitting lots and lots of TEXT events.)
    +   */
    +  protected function flushBuffer() {
    +    if (empty($this->text)) {
    +      return;
    +    }
    +    $this->events->text($this->text);
    +    $this->text = '';
    +  }
    +
    +  /**
    +   * Add text to the temporary buffer.
    +   *
    +   * @see flushBuffer()
    +   */
    +  protected function buffer($str) {
    +    $this->text .= $str;
    +  }
    +
    +  /**
    +   * Emit a parse error.
    +   *
    +   * A parse error always returns FALSE because it never consumes any
    +   * characters.
    +   */
    +  protected function parseError($msg) {
    +    $args = func_get_args();
    +
    +    if (count($args) > 1) {
    +      array_shift($args);
    +      $msg = vsprintf($msg, $args);
    +    }
    +
    +    $line = $this->scanner->currentLine();
    +    $col = $this->scanner->columnOffset();
    +    $this->events->parseError($msg, $line, $col);
    +    return FALSE;
    +  }
    +
    +  /**
    +   * Decode a character reference and return the string.
    +   *
    +   * Returns FALSE if the entity could not be found. If $inAttribute is set
    +   * to TRUE, a bare & will be returned as-is.
    +   *
    +   * @param boolean $inAttribute
    +   *   Set to TRUE if the text is inside of an attribute value.
    +   *   FALSE otherwise.
    +   */
    +  protected function decodeCharacterReference($inAttribute = FALSE) {
    +
    +    // If it fails this, it's definitely not an entity.
    +    if ($this->scanner->current() != '&') {
    +      return FALSE;
    +    }
    +
    +    // Next char after &.
    +    $tok = $this->scanner->next();
    +    $entity = '';
    +    $start = $this->scanner->position();
    +
    +    if ($tok == FALSE) {
    +      return '&';
    +    }
    +
    +    // These indicate not an entity. We return just
    +    // the &.
    +    if (strspn($tok, static::WHITE . "&<") == 1) {
    +      //$this->scanner->next();
    +      return '&';
    +    }
    +
    +    // Numeric entity
    +    if ($tok == '#') {
    +      $tok = $this->scanner->next();
    +
    +      // Hexidecimal encoding.
    +      // X[0-9a-fA-F]+;
    +      // x[0-9a-fA-F]+;
    +      if ($tok == 'x' || $tok == 'X') {
    +        $tok = $this->scanner->next(); // Consume x
    +
    +        // Convert from hex code to char.
    +        $hex = $this->scanner->getHex();
    +        if (empty($hex)) {
    +          $this->parseError("Expected &#xHEX;, got &#x%s", $tok);
    +          // We unconsume because we don't know what parser rules might
    +          // be in effect for the remaining chars. For example. '&#>'
    +          // might result in a specific parsing rule inside of tag
    +          // contexts, while not inside of pcdata context.
    +          $this->scanner->unconsume(2);
    +          return '&';
    +        }
    +        $entity = CharacterReference::lookupHex($hex);
    +      }
    +      // Decimal encoding.
    +      // [0-9]+;
    +      else {
    +        // Convert from decimal to char.
    +        $numeric = $this->scanner->getNumeric();
    +        if ($numeric === FALSE) {
    +          $this->parseError("Expected &#DIGITS;, got &#%s", $tok);
    +          $this->scanner->unconsume(2);
    +          return '&';
    +        }
    +        $entity = CharacterReference::lookupDecimal($numeric);
    +      }
    +    }
    +    // String entity.
    +    else {
    +      // Attempt to consume a string up to a ';'.
    +      // [a-zA-Z0-9]+;
    +      $cname = $this->scanner->getAsciiAlpha();
    +      $entity = CharacterReference::lookupName($cname);
    +      if ($entity == NULL) {
    +          $this->parseError("No match in entity table for '%s'", $entity);
    +      }
    +    }
    +
    +    // The scanner has advanced the cursor for us.
    +    $tok = $this->scanner->current();
    +
    +    // We have an entity. We're done here.
    +    if ($tok == ';') {
    +      $this->scanner->next();
    +      return $entity;
    +    }
    +
    +    // If in an attribute, then failing to match ; means unconsume the
    +    // entire string. Otherwise, failure to match is an error.
    +    if ($inAttribute) {
    +      $this->scanner->unconsume($this->scanner->position() - $start);
    +      return '&';
    +    }
    +
    +    $this->parseError("Expected &ENTITY;, got &ENTITY%s (no trailing ;) ", $tok);
    +    return '&' . $entity;
    +
    +  }
    +
    +}
    diff --git a/libraries/html5php/HTML5/Parser/TreeBuildingRules.php b/libraries/html5php/HTML5/Parser/TreeBuildingRules.php
    new file mode 100644
    index 0000000..b87c6b5
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/TreeBuildingRules.php
    @@ -0,0 +1,114 @@
    + 1,
    +    'dd' => 1,
    +    'dt' => 1,
    +    'rt' => 1,
    +    'rp' => 1,
    +    'tr' => 1,
    +    'th' => 1,
    +    'td' => 1,
    +    'thead' => 1,
    +    'tfoot' => 1,
    +    'tbody' => 1,
    +    'table' => 1,
    +    'optgroup' => 1,
    +    'option' => 1,
    +  );
    +
    +  /**
    +   * Build a new rules engine.
    +   *
    +   * @param \DOMDocument $doc
    +   *   The DOM document to use for evaluation and modification.
    +   */
    +  public function __construct($doc) {
    +    $this->doc = $doc;
    +  }
    +
    +  /**
    +   * Returns TRUE if the given tagname has special processing rules.
    +   */
    +  public function hasRules($tagname) {
    +    return isset(static::$tags[$tagname]);
    +  }
    +
    +  /**
    +   * Evaluate the rule for the current tag name.
    +   *
    +   * This may modify the existing DOM.
    +   *
    +   * @return \DOMElement
    +   *   The new Current DOM element.
    +   */
    +  public function evaluate($new, $current) {
    +
    +    switch($new->tagName) {
    +    case 'li':
    +      return $this->handleLI($new, $current);
    +    case 'dt':
    +    case 'dd':
    +      return $this->handleDT($new, $current);
    +    case 'rt':
    +    case 'rp':
    +      return $this->handleRT($new, $current);
    +    case 'optgroup':
    +      return $this->closeIfCurrentMatches($new, $current, array('optgroup'));
    +    case 'option':
    +      return $this->closeIfCurrentMatches($new, $current, array('option', 'optgroup'));
    +    case 'tr':
    +      return $this->closeIfCurrentMatches($new, $current, array('tr'));
    +    case 'td':
    +    case 'th':
    +      return $this->closeIfCurrentMatches($new, $current, array('th', 'td'));
    +    case 'tbody':
    +    case 'thead':
    +    case 'tfoot':
    +    case 'table': // Spec isn't explicit about this, but it's necessary.
    +      return $this->closeIfCurrentMatches($new, $current, array('thead', 'tfoot', 'tbody'));
    +    }
    +
    +    return $current;
    +  }
    +
    +  protected function handleLI($ele, $current) {
    +    return $this->closeIfCurrentMatches($ele, $current, array('li'));
    +  }
    +
    +  protected function handleDT($ele, $current) {
    +    return $this->closeIfCurrentMatches($ele, $current, array('dt','dd'));
    +  }
    +  protected function handleRT($ele, $current) {
    +    return $this->closeIfCurrentMatches($ele, $current, array('rt','rp'));
    +  }
    +
    +  protected function closeIfCurrentMatches($ele, $current, $match) {
    +    $tname = $current->tagName;
    +    if (in_array($current->tagName, $match)) {
    +      $current->parentNode->appendChild($ele);
    +    }
    +    else {
    +      $current->appendChild($ele);
    +    }
    +    return $ele;
    +
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Parser/UTF8Utils.php b/libraries/html5php/HTML5/Parser/UTF8Utils.php
    new file mode 100644
    index 0000000..974a670
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Parser/UTF8Utils.php
    @@ -0,0 +1,187 @@
    +
    +
    +Permission is hereby granted, free of charge, to any person obtaining a
    +copy of this software and associated documentation files (the
    +"Software"), to deal in the Software without restriction, including
    +without limitation the rights to use, copy, modify, merge, publish,
    +distribute, sublicense, and/or sell copies of the Software, and to
    +permit persons to whom the Software is furnished to do so, subject to
    +the following conditions:
    +
    +The above copyright notice and this permission notice shall be included
    +in all copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
    +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
    +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
    +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
    +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
    +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
    +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    +
    +*/
    +namespace HTML5\Parser;
    +/**
    + * UTF-8 Utilities
    + */
    +class UTF8Utils {
    +  /**
    +   * The Unicode replacement character..
    +   */
    +  const FFFD = "\xEF\xBF\xBD";
    +  /**
    +   * Count the number of characters in a string.
    +   *
    +   * UTF-8 aware. This will try (in order) iconv,
    +   * MB, libxml, and finally a custom counter.
    +   *
    +   * @todo Move this to a general utility class.
    +   */
    +  public static function countChars($string) {
    +    // Get the length for the string we need.
    +    if(function_exists('iconv_strlen')) {
    +      return iconv_strlen($string, 'utf-8');
    +    }
    +    elseif(function_exists('mb_strlen')) {
    +      return mb_strlen($string, 'utf-8');
    +    }
    +    elseif(function_exists('utf8_decode')) {
    +      // MPB: Will this work? Won't certain decodes lead to two chars 
    +      // extrapolated out of 2-byte chars?
    +      return strlen(utf8_decode($string));
    +    }
    +    $count = count_chars($string);
    +    // 0x80 = 0x7F - 0 + 1 (one added to get inclusive range)
    +    // 0x33 = 0xF4 - 0x2C + 1 (one added to get inclusive range)
    +    return array_sum(array_slice($count, 0, 0x80)) +
    +         array_sum(array_slice($count, 0xC2, 0x33));
    +  }
    +
    +  /**
    +   * Convert data from the given encoding to UTF-8.
    +   *
    +   * This has not yet been tested with charactersets other than UTF-8. 
    +   * It should work with ISO-8859-1/-13 and standard Latin Win charsets.
    +   *
    +   * @param string $data
    +   *   The data to convert.
    +   * @param string $encoding
    +   *   A valid encoding. Examples: http://www.php.net/manual/en/mbstring.supported-encodings.php
    +   */
    +  public static function convertToUTF8($data, $encoding = 'UTF-8') {
    +    /*
    +     * From the HTML5 spec:
    +    Given an encoding, the bytes in the input stream must be
    +    converted to Unicode characters for the tokeniser, as
    +    described by the rules for that encoding, except that the
    +    leading U+FEFF BYTE ORDER MARK character, if any, must not
    +    be stripped by the encoding layer (it is stripped by the rule below).
    +
    +    Bytes or sequences of bytes in the original byte stream that
    +    could not be converted to Unicode characters must be converted
    +    to U+FFFD REPLACEMENT CHARACTER code points. */
    +
    +    // mb_convert_encoding is chosen over iconv because of a bug. The best
    +    // details for the bug are on http://us1.php.net/manual/en/function.iconv.php#108643
    +    // which contains links to the actual but reports as well as work around
    +    // details.
    +    if (function_exists('mb_convert_encoding')) {
    +      // mb library has the following behaviors:
    +      // - UTF-16 surrogates result in FALSE.
    +      // - Overlongs and outside Plane 16 result in empty strings.
    +      
    +      // Before we run mb_convert_encoding we need to tell it what to do with
    +      // characters it does not know. This could be different than the parent
    +      // application executing this library so we store the value, change it
    +      // to our needs, and then change it back when we are done. This feels
    +      // a little excessive and it would be great if there was a better way.
    +      $save = ini_get('mbstring.substitute_character');
    +      ini_set('mbstring.substitute_character', "none");
    +      $data = mb_convert_encoding($data, 'UTF-8', $encoding);
    +      ini_set('mbstring.substitute_character', $save);
    +    }
    +    // @todo Get iconv running in at least some environments if that is possible.
    +    elseif (function_exists('iconv') && $encoding != 'auto') {
    +      // fprintf(STDOUT, "iconv found\n");
    +      // iconv has the following behaviors:
    +      // - Overlong representations are ignored.
    +      // - Beyond Plane 16 is replaced with a lower char.
    +      // - Incomplete sequences generate a warning.
    +      $data = @iconv($encoding, 'UTF-8//IGNORE', $data);
    +    }
    +    else {
    +      // we can make a conforming native implementation
    +      throw new Exception('Not implemented, please install mbstring or iconv');
    +    }
    +
    +    /* One leading U+FEFF BYTE ORDER MARK character must be
    +    ignored if any are present. */
    +    if (substr($data, 0, 3) === "\xEF\xBB\xBF") {
    +      $data = substr($data, 3);
    +    }
    +
    +    return $data;
    +  }
    +
    +  /**
    +   * Checks for Unicode code points that are not valid in a document.
    +   *
    +   * @param string $data
    +   *   A string to analyze.
    +   * @return array
    +   *   An array of (string) error messages produced by the scanning.
    +   */
    +  public static function checkForIllegalCodepoints($data) {
    +    if (!function_exists('preg_match_all')) {
    +      throw \Exception('The PCRE library is not loaded or is not available.');
    +    }
    +
    +    // Vestigal error handling.
    +    $errors = array();
    +
    +    /* All U+0000 NULL characters in the input must be replaced
    +    by U+FFFD REPLACEMENT CHARACTERs. Any occurrences of such
    +    characters is a parse error. */
    +    for ($i = 0, $count = substr_count($data, "\0"); $i < $count; $i++) {
    +      $errors[] = 'null-character';
    +    }
    +
    +    /* Any occurrences of any characters in the ranges U+0001 to
    +    U+0008, U+000B,  U+000E to U+001F,  U+007F  to U+009F,
    +    U+D800 to U+DFFF , U+FDD0 to U+FDEF, and
    +    characters U+FFFE, U+FFFF, U+1FFFE, U+1FFFF, U+2FFFE, U+2FFFF,
    +    U+3FFFE, U+3FFFF, U+4FFFE, U+4FFFF, U+5FFFE, U+5FFFF, U+6FFFE,
    +    U+6FFFF, U+7FFFE, U+7FFFF, U+8FFFE, U+8FFFF, U+9FFFE, U+9FFFF,
    +    U+AFFFE, U+AFFFF, U+BFFFE, U+BFFFF, U+CFFFE, U+CFFFF, U+DFFFE,
    +    U+DFFFF, U+EFFFE, U+EFFFF, U+FFFFE, U+FFFFF, U+10FFFE, and
    +    U+10FFFF are parse errors. (These are all control characters
    +    or permanently undefined Unicode characters.) */
    +    // Check PCRE is loaded.
    +    $count = preg_match_all(
    +      '/(?:
    +        [\x01-\x08\x0B\x0E-\x1F\x7F] # U+0001 to U+0008, U+000B,  U+000E to U+001F and U+007F
    +      |
    +        \xC2[\x80-\x9F] # U+0080 to U+009F
    +      |
    +        \xED(?:\xA0[\x80-\xFF]|[\xA1-\xBE][\x00-\xFF]|\xBF[\x00-\xBF]) # U+D800 to U+DFFFF
    +      |
    +        \xEF\xB7[\x90-\xAF] # U+FDD0 to U+FDEF
    +      |
    +        \xEF\xBF[\xBE\xBF] # U+FFFE and U+FFFF
    +      |
    +        [\xF0-\xF4][\x8F-\xBF]\xBF[\xBE\xBF] # U+nFFFE and U+nFFFF (1 <= n <= 10_{16})
    +      )/x',
    +      $data,
    +      $matches
    +    );
    +    for ($i = 0; $i < $count; $i++) {
    +      $errors[] =  'invalid-codepoint';
    +    }
    +    return $errors;
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Serializer/HTML5Entities.php b/libraries/html5php/HTML5/Serializer/HTML5Entities.php
    new file mode 100644
    index 0000000..5717002
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Serializer/HTML5Entities.php
    @@ -0,0 +1,1530 @@
    + '	',
    +    "\n" => '
    ',
    +    '!' => '!',
    +    '"' => '"',
    +    '#' => '#',
    +    '$' => '$',
    +    '%' => '%',
    +    '&' => '&',
    +    '\'' => ''',
    +    '(' => '(',
    +    ')' => ')',
    +    '*' => '*',
    +    '+' => '+',
    +    ',' => ',',
    +    '.' => '.',
    +    '/' => '/',
    +    ':' => ':',
    +    ';' => ';',
    +    '<' => '<',
    +    '<⃒' => '&nvlt',
    +    '=' => '=',
    +    '=⃥' => '&bne',
    +    '>' => '>',
    +    '>⃒' => '&nvgt',
    +    '?' => '?',
    +    '@' => '@',
    +    '[' => '[',
    +    '\\' => '\',
    +    ']' => ']',
    +    '^' => '^',
    +    '_' => '_',
    +    '`' => '`',
    +    'fj' => '&fjlig',
    +    '{' => '{',
    +    '|' => '|',
    +    '}' => '}',
    +    ' ' => ' ',
    +    '¡' => '¡',
    +    '¢' => '¢',
    +    '£' => '£',
    +    '¤' => '¤',
    +    '¥' => '¥',
    +    '¦' => '¦',
    +    '§' => '§',
    +    '¨' => '¨',
    +    '©' => '©',
    +    'ª' => 'ª',
    +    '«' => '«',
    +    '¬' => '¬',
    +    '­' => '­',
    +    '®' => '®',
    +    '¯' => '¯',
    +    '°' => '°',
    +    '±' => '±',
    +    '²' => '²',
    +    '³' => '³',
    +    '´' => '´',
    +    'µ' => 'µ',
    +    '¶' => '¶',
    +    '·' => '·',
    +    '¸' => '¸',
    +    '¹' => '¹',
    +    'º' => 'º',
    +    '»' => '»',
    +    '¼' => '¼',
    +    '½' => '½',
    +    '¾' => '¾',
    +    '¿' => '¿',
    +    'À' => 'À',
    +    'Á' => 'Á',
    +    'Â' => 'Â',
    +    'Ã' => 'Ã',
    +    'Ä' => 'Ä',
    +    'Å' => 'Å',
    +    'Æ' => 'Æ',
    +    'Ç' => 'Ç',
    +    'È' => 'È',
    +    'É' => 'É',
    +    'Ê' => 'Ê',
    +    'Ë' => 'Ë',
    +    'Ì' => 'Ì',
    +    'Í' => 'Í',
    +    'Î' => 'Î',
    +    'Ï' => 'Ï',
    +    'Ð' => 'Ð',
    +    'Ñ' => 'Ñ',
    +    'Ò' => 'Ò',
    +    'Ó' => 'Ó',
    +    'Ô' => 'Ô',
    +    'Õ' => 'Õ',
    +    'Ö' => 'Ö',
    +    '×' => '×',
    +    'Ø' => 'Ø',
    +    'Ù' => 'Ù',
    +    'Ú' => 'Ú',
    +    'Û' => 'Û',
    +    'Ü' => 'Ü',
    +    'Ý' => 'Ý',
    +    'Þ' => 'Þ',
    +    'ß' => 'ß',
    +    'à' => 'à',
    +    'á' => 'á',
    +    'â' => 'â',
    +    'ã' => 'ã',
    +    'ä' => 'ä',
    +    'å' => 'å',
    +    'æ' => 'æ',
    +    'ç' => 'ç',
    +    'è' => 'è',
    +    'é' => 'é',
    +    'ê' => 'ê',
    +    'ë' => 'ë',
    +    'ì' => 'ì',
    +    'í' => 'í',
    +    'î' => 'î',
    +    'ï' => 'ï',
    +    'ð' => 'ð',
    +    'ñ' => 'ñ',
    +    'ò' => 'ò',
    +    'ó' => 'ó',
    +    'ô' => 'ô',
    +    'õ' => 'õ',
    +    'ö' => 'ö',
    +    '÷' => '÷',
    +    'ø' => 'ø',
    +    'ù' => 'ù',
    +    'ú' => 'ú',
    +    'û' => 'û',
    +    'ü' => 'ü',
    +    'ý' => 'ý',
    +    'þ' => 'þ',
    +    'ÿ' => 'ÿ',
    +    'Ā' => 'Ā',
    +    'ā' => 'ā',
    +    'Ă' => 'Ă',
    +    'ă' => 'ă',
    +    'Ą' => 'Ą',
    +    'ą' => 'ą',
    +    'Ć' => 'Ć',
    +    'ć' => 'ć',
    +    'Ĉ' => 'Ĉ',
    +    'ĉ' => 'ĉ',
    +    'Ċ' => 'Ċ',
    +    'ċ' => 'ċ',
    +    'Č' => 'Č',
    +    'č' => 'č',
    +    'Ď' => 'Ď',
    +    'ď' => 'ď',
    +    'Đ' => 'Đ',
    +    'đ' => 'đ',
    +    'Ē' => 'Ē',
    +    'ē' => 'ē',
    +    'Ė' => 'Ė',
    +    'ė' => 'ė',
    +    'Ę' => 'Ę',
    +    'ę' => 'ę',
    +    'Ě' => 'Ě',
    +    'ě' => 'ě',
    +    'Ĝ' => 'Ĝ',
    +    'ĝ' => 'ĝ',
    +    'Ğ' => 'Ğ',
    +    'ğ' => 'ğ',
    +    'Ġ' => 'Ġ',
    +    'ġ' => 'ġ',
    +    'Ģ' => 'Ģ',
    +    'Ĥ' => 'Ĥ',
    +    'ĥ' => 'ĥ',
    +    'Ħ' => 'Ħ',
    +    'ħ' => 'ħ',
    +    'Ĩ' => 'Ĩ',
    +    'ĩ' => 'ĩ',
    +    'Ī' => 'Ī',
    +    'ī' => 'ī',
    +    'Į' => 'Į',
    +    'į' => 'į',
    +    'İ' => 'İ',
    +    'ı' => 'ı',
    +    'IJ' => 'IJ',
    +    'ij' => 'ij',
    +    'Ĵ' => 'Ĵ',
    +    'ĵ' => 'ĵ',
    +    'Ķ' => 'Ķ',
    +    'ķ' => 'ķ',
    +    'ĸ' => 'ĸ',
    +    'Ĺ' => 'Ĺ',
    +    'ĺ' => 'ĺ',
    +    'Ļ' => 'Ļ',
    +    'ļ' => 'ļ',
    +    'Ľ' => 'Ľ',
    +    'ľ' => 'ľ',
    +    'Ŀ' => 'Ŀ',
    +    'ŀ' => 'ŀ',
    +    'Ł' => 'Ł',
    +    'ł' => 'ł',
    +    'Ń' => 'Ń',
    +    'ń' => 'ń',
    +    'Ņ' => 'Ņ',
    +    'ņ' => 'ņ',
    +    'Ň' => 'Ň',
    +    'ň' => 'ň',
    +    'ʼn' => 'ʼn',
    +    'Ŋ' => 'Ŋ',
    +    'ŋ' => 'ŋ',
    +    'Ō' => 'Ō',
    +    'ō' => 'ō',
    +    'Ő' => 'Ő',
    +    'ő' => 'ő',
    +    'Œ' => 'Œ',
    +    'œ' => 'œ',
    +    'Ŕ' => 'Ŕ',
    +    'ŕ' => 'ŕ',
    +    'Ŗ' => 'Ŗ',
    +    'ŗ' => 'ŗ',
    +    'Ř' => 'Ř',
    +    'ř' => 'ř',
    +    'Ś' => 'Ś',
    +    'ś' => 'ś',
    +    'Ŝ' => 'Ŝ',
    +    'ŝ' => 'ŝ',
    +    'Ş' => 'Ş',
    +    'ş' => 'ş',
    +    'Š' => 'Š',
    +    'š' => 'š',
    +    'Ţ' => 'Ţ',
    +    'ţ' => 'ţ',
    +    'Ť' => 'Ť',
    +    'ť' => 'ť',
    +    'Ŧ' => 'Ŧ',
    +    'ŧ' => 'ŧ',
    +    'Ũ' => 'Ũ',
    +    'ũ' => 'ũ',
    +    'Ū' => 'Ū',
    +    'ū' => 'ū',
    +    'Ŭ' => 'Ŭ',
    +    'ŭ' => 'ŭ',
    +    'Ů' => 'Ů',
    +    'ů' => 'ů',
    +    'Ű' => 'Ű',
    +    'ű' => 'ű',
    +    'Ų' => 'Ų',
    +    'ų' => 'ų',
    +    'Ŵ' => 'Ŵ',
    +    'ŵ' => 'ŵ',
    +    'Ŷ' => 'Ŷ',
    +    'ŷ' => 'ŷ',
    +    'Ÿ' => 'Ÿ',
    +    'Ź' => 'Ź',
    +    'ź' => 'ź',
    +    'Ż' => 'Ż',
    +    'ż' => 'ż',
    +    'Ž' => 'Ž',
    +    'ž' => 'ž',
    +    'ƒ' => 'ƒ',
    +    'Ƶ' => 'Ƶ',
    +    'ǵ' => 'ǵ',
    +    'ȷ' => 'ȷ',
    +    'ˆ' => 'ˆ',
    +    'ˇ' => 'ˇ',
    +    '˘' => '˘',
    +    '˙' => '˙',
    +    '˚' => '˚',
    +    '˛' => '˛',
    +    '˜' => '˜',
    +    '˝' => '˝',
    +    '̑' => '̑',
    +    'Α' => 'Α',
    +    'Β' => 'Β',
    +    'Γ' => 'Γ',
    +    'Δ' => 'Δ',
    +    'Ε' => 'Ε',
    +    'Ζ' => 'Ζ',
    +    'Η' => 'Η',
    +    'Θ' => 'Θ',
    +    'Ι' => 'Ι',
    +    'Κ' => 'Κ',
    +    'Λ' => 'Λ',
    +    'Μ' => 'Μ',
    +    'Ν' => 'Ν',
    +    'Ξ' => 'Ξ',
    +    'Ο' => 'Ο',
    +    'Π' => 'Π',
    +    'Ρ' => 'Ρ',
    +    'Σ' => 'Σ',
    +    'Τ' => 'Τ',
    +    'Υ' => 'Υ',
    +    'Φ' => 'Φ',
    +    'Χ' => 'Χ',
    +    'Ψ' => 'Ψ',
    +    'Ω' => 'Ω',
    +    'α' => 'α',
    +    'β' => 'β',
    +    'γ' => 'γ',
    +    'δ' => 'δ',
    +    'ε' => 'ε',
    +    'ζ' => 'ζ',
    +    'η' => 'η',
    +    'θ' => 'θ',
    +    'ι' => 'ι',
    +    'κ' => 'κ',
    +    'λ' => 'λ',
    +    'μ' => 'μ',
    +    'ν' => 'ν',
    +    'ξ' => 'ξ',
    +    'ο' => 'ο',
    +    'π' => 'π',
    +    'ρ' => 'ρ',
    +    'ς' => 'ς',
    +    'σ' => 'σ',
    +    'τ' => 'τ',
    +    'υ' => 'υ',
    +    'φ' => 'φ',
    +    'χ' => 'χ',
    +    'ψ' => 'ψ',
    +    'ω' => 'ω',
    +    'ϑ' => 'ϑ',
    +    'ϒ' => 'ϒ',
    +    'ϕ' => 'ϕ',
    +    'ϖ' => 'ϖ',
    +    'Ϝ' => 'Ϝ',
    +    'ϝ' => 'ϝ',
    +    'ϰ' => 'ϰ',
    +    'ϱ' => 'ϱ',
    +    'ϵ' => 'ϵ',
    +    '϶' => '϶',
    +    'Ё' => 'Ё',
    +    'Ђ' => 'Ђ',
    +    'Ѓ' => 'Ѓ',
    +    'Є' => 'Є',
    +    'Ѕ' => 'Ѕ',
    +    'І' => 'І',
    +    'Ї' => 'Ї',
    +    'Ј' => 'Ј',
    +    'Љ' => 'Љ',
    +    'Њ' => 'Њ',
    +    'Ћ' => 'Ћ',
    +    'Ќ' => 'Ќ',
    +    'Ў' => 'Ў',
    +    'Џ' => 'Џ',
    +    'А' => 'А',
    +    'Б' => 'Б',
    +    'В' => 'В',
    +    'Г' => 'Г',
    +    'Д' => 'Д',
    +    'Е' => 'Е',
    +    'Ж' => 'Ж',
    +    'З' => 'З',
    +    'И' => 'И',
    +    'Й' => 'Й',
    +    'К' => 'К',
    +    'Л' => 'Л',
    +    'М' => 'М',
    +    'Н' => 'Н',
    +    'О' => 'О',
    +    'П' => 'П',
    +    'Р' => 'Р',
    +    'С' => 'С',
    +    'Т' => 'Т',
    +    'У' => 'У',
    +    'Ф' => 'Ф',
    +    'Х' => 'Х',
    +    'Ц' => 'Ц',
    +    'Ч' => 'Ч',
    +    'Ш' => 'Ш',
    +    'Щ' => 'Щ',
    +    'Ъ' => 'Ъ',
    +    'Ы' => 'Ы',
    +    'Ь' => 'Ь',
    +    'Э' => 'Э',
    +    'Ю' => 'Ю',
    +    'Я' => 'Я',
    +    'а' => 'а',
    +    'б' => 'б',
    +    'в' => 'в',
    +    'г' => 'г',
    +    'д' => 'д',
    +    'е' => 'е',
    +    'ж' => 'ж',
    +    'з' => 'з',
    +    'и' => 'и',
    +    'й' => 'й',
    +    'к' => 'к',
    +    'л' => 'л',
    +    'м' => 'м',
    +    'н' => 'н',
    +    'о' => 'о',
    +    'п' => 'п',
    +    'р' => 'р',
    +    'с' => 'с',
    +    'т' => 'т',
    +    'у' => 'у',
    +    'ф' => 'ф',
    +    'х' => 'х',
    +    'ц' => 'ц',
    +    'ч' => 'ч',
    +    'ш' => 'ш',
    +    'щ' => 'щ',
    +    'ъ' => 'ъ',
    +    'ы' => 'ы',
    +    'ь' => 'ь',
    +    'э' => 'э',
    +    'ю' => 'ю',
    +    'я' => 'я',
    +    'ё' => 'ё',
    +    'ђ' => 'ђ',
    +    'ѓ' => 'ѓ',
    +    'є' => 'є',
    +    'ѕ' => 'ѕ',
    +    'і' => 'і',
    +    'ї' => 'ї',
    +    'ј' => 'ј',
    +    'љ' => 'љ',
    +    'њ' => 'њ',
    +    'ћ' => 'ћ',
    +    'ќ' => 'ќ',
    +    'ў' => 'ў',
    +    'џ' => 'џ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    ' ' => ' ',
    +    '​' => '​',
    +    '‌' => '‌',
    +    '‍' => '‍',
    +    '‎' => '‎',
    +    '‏' => '‏',
    +    '‐' => '‐',
    +    '–' => '–',
    +    '—' => '—',
    +    '―' => '―',
    +    '‖' => '‖',
    +    '‘' => '‘',
    +    '’' => '’',
    +    '‚' => '‚',
    +    '“' => '“',
    +    '”' => '”',
    +    '„' => '„',
    +    '†' => '†',
    +    '‡' => '‡',
    +    '•' => '•',
    +    '‥' => '‥',
    +    '…' => '…',
    +    '‰' => '‰',
    +    '‱' => '‱',
    +    '′' => '′',
    +    '″' => '″',
    +    '‴' => '‴',
    +    '‵' => '‵',
    +    '‹' => '‹',
    +    '›' => '›',
    +    '‾' => '‾',
    +    '⁁' => '⁁',
    +    '⁃' => '⁃',
    +    '⁄' => '⁄',
    +    '⁏' => '⁏',
    +    '⁗' => '⁗',
    +    ' ' => ' ',
    +    '  ' => '&ThickSpace',
    +    '⁠' => '⁠',
    +    '⁡' => '⁡',
    +    '⁢' => '⁢',
    +    '⁣' => '⁣',
    +    '€' => '€',
    +    '⃛' => '⃛',
    +    '⃜' => '⃜',
    +    'ℂ' => 'ℂ',
    +    '℅' => '℅',
    +    'ℊ' => 'ℊ',
    +    'ℋ' => 'ℋ',
    +    'ℌ' => 'ℌ',
    +    'ℍ' => 'ℍ',
    +    'ℎ' => 'ℎ',
    +    'ℏ' => 'ℏ',
    +    'ℐ' => 'ℐ',
    +    'ℑ' => 'ℑ',
    +    'ℒ' => 'ℒ',
    +    'ℓ' => 'ℓ',
    +    'ℕ' => 'ℕ',
    +    '№' => '№',
    +    '℗' => '℗',
    +    '℘' => '℘',
    +    'ℙ' => 'ℙ',
    +    'ℚ' => 'ℚ',
    +    'ℛ' => 'ℛ',
    +    'ℜ' => 'ℜ',
    +    'ℝ' => 'ℝ',
    +    '℞' => '℞',
    +    '™' => '™',
    +    'ℤ' => 'ℤ',
    +    '℧' => '℧',
    +    'ℨ' => 'ℨ',
    +    '℩' => '℩',
    +    'ℬ' => 'ℬ',
    +    'ℭ' => 'ℭ',
    +    'ℯ' => 'ℯ',
    +    'ℰ' => 'ℰ',
    +    'ℱ' => 'ℱ',
    +    'ℳ' => 'ℳ',
    +    'ℴ' => 'ℴ',
    +    'ℵ' => 'ℵ',
    +    'ℶ' => 'ℶ',
    +    'ℷ' => 'ℷ',
    +    'ℸ' => 'ℸ',
    +    'ⅅ' => 'ⅅ',
    +    'ⅆ' => 'ⅆ',
    +    'ⅇ' => 'ⅇ',
    +    'ⅈ' => 'ⅈ',
    +    '⅓' => '⅓',
    +    '⅔' => '⅔',
    +    '⅕' => '⅕',
    +    '⅖' => '⅖',
    +    '⅗' => '⅗',
    +    '⅘' => '⅘',
    +    '⅙' => '⅙',
    +    '⅚' => '⅚',
    +    '⅛' => '⅛',
    +    '⅜' => '⅜',
    +    '⅝' => '⅝',
    +    '⅞' => '⅞',
    +    '←' => '←',
    +    '↑' => '↑',
    +    '→' => '→',
    +    '↓' => '↓',
    +    '↔' => '↔',
    +    '↕' => '↕',
    +    '↖' => '↖',
    +    '↗' => '↗',
    +    '↘' => '↘',
    +    '↙' => '↙',
    +    '↚' => '↚',
    +    '↛' => '↛',
    +    '↝' => '↝',
    +    '↝̸' => '&nrarrw',
    +    '↞' => '↞',
    +    '↟' => '↟',
    +    '↠' => '↠',
    +    '↡' => '↡',
    +    '↢' => '↢',
    +    '↣' => '↣',
    +    '↤' => '↤',
    +    '↥' => '↥',
    +    '↦' => '↦',
    +    '↧' => '↧',
    +    '↩' => '↩',
    +    '↪' => '↪',
    +    '↫' => '↫',
    +    '↬' => '↬',
    +    '↭' => '↭',
    +    '↮' => '↮',
    +    '↰' => '↰',
    +    '↱' => '↱',
    +    '↲' => '↲',
    +    '↳' => '↳',
    +    '↵' => '↵',
    +    '↶' => '↶',
    +    '↷' => '↷',
    +    '↺' => '↺',
    +    '↻' => '↻',
    +    '↼' => '↼',
    +    '↽' => '↽',
    +    '↾' => '↾',
    +    '↿' => '↿',
    +    '⇀' => '⇀',
    +    '⇁' => '⇁',
    +    '⇂' => '⇂',
    +    '⇃' => '⇃',
    +    '⇄' => '⇄',
    +    '⇅' => '⇅',
    +    '⇆' => '⇆',
    +    '⇇' => '⇇',
    +    '⇈' => '⇈',
    +    '⇉' => '⇉',
    +    '⇊' => '⇊',
    +    '⇋' => '⇋',
    +    '⇌' => '⇌',
    +    '⇍' => '⇍',
    +    '⇎' => '⇎',
    +    '⇏' => '⇏',
    +    '⇐' => '⇐',
    +    '⇑' => '⇑',
    +    '⇒' => '⇒',
    +    '⇓' => '⇓',
    +    '⇔' => '⇔',
    +    '⇕' => '⇕',
    +    '⇖' => '⇖',
    +    '⇗' => '⇗',
    +    '⇘' => '⇘',
    +    '⇙' => '⇙',
    +    '⇚' => '⇚',
    +    '⇛' => '⇛',
    +    '⇝' => '⇝',
    +    '⇤' => '⇤',
    +    '⇥' => '⇥',
    +    '⇵' => '⇵',
    +    '⇽' => '⇽',
    +    '⇾' => '⇾',
    +    '⇿' => '⇿',
    +    '∀' => '∀',
    +    '∁' => '∁',
    +    '∂' => '∂',
    +    '∂̸' => '&npart',
    +    '∃' => '∃',
    +    '∄' => '∄',
    +    '∅' => '∅',
    +    '∇' => '∇',
    +    '∈' => '∈',
    +    '∉' => '∉',
    +    '∋' => '∋',
    +    '∌' => '∌',
    +    '∏' => '∏',
    +    '∐' => '∐',
    +    '∑' => '∑',
    +    '−' => '−',
    +    '∓' => '∓',
    +    '∔' => '∔',
    +    '∖' => '∖',
    +    '∗' => '∗',
    +    '∘' => '∘',
    +    '√' => '√',
    +    '∝' => '∝',
    +    '∞' => '∞',
    +    '∟' => '∟',
    +    '∠' => '∠',
    +    '∠⃒' => '&nang',
    +    '∡' => '∡',
    +    '∢' => '∢',
    +    '∣' => '∣',
    +    '∤' => '∤',
    +    '∥' => '∥',
    +    '∦' => '∦',
    +    '∧' => '∧',
    +    '∨' => '∨',
    +    '∩' => '∩',
    +    '∩︀' => '&caps',
    +    '∪' => '∪',
    +    '∪︀' => '&cups',
    +    '∫' => '∫',
    +    '∬' => '∬',
    +    '∭' => '∭',
    +    '∮' => '∮',
    +    '∯' => '∯',
    +    '∰' => '∰',
    +    '∱' => '∱',
    +    '∲' => '∲',
    +    '∳' => '∳',
    +    '∴' => '∴',
    +    '∵' => '∵',
    +    '∶' => '∶',
    +    '∷' => '∷',
    +    '∸' => '∸',
    +    '∺' => '∺',
    +    '∻' => '∻',
    +    '∼' => '∼',
    +    '∼⃒' => '&nvsim',
    +    '∽' => '∽',
    +    '∽̱' => '&race',
    +    '∾' => '∾',
    +    '∾̳' => '&acE',
    +    '∿' => '∿',
    +    '≀' => '≀',
    +    '≁' => '≁',
    +    '≂' => '≂',
    +    '≂̸' => '&nesim',
    +    '≃' => '≃',
    +    '≄' => '≄',
    +    '≅' => '≅',
    +    '≆' => '≆',
    +    '≇' => '≇',
    +    '≈' => '≈',
    +    '≉' => '≉',
    +    '≊' => '≊',
    +    '≋' => '≋',
    +    '≋̸' => '&napid',
    +    '≌' => '≌',
    +    '≍' => '≍',
    +    '≍⃒' => '&nvap',
    +    '≎' => '≎',
    +    '≎̸' => '&nbump',
    +    '≏' => '≏',
    +    '≏̸' => '&nbumpe',
    +    '≐' => '≐',
    +    '≐̸' => '&nedot',
    +    '≑' => '≑',
    +    '≒' => '≒',
    +    '≓' => '≓',
    +    '≔' => '≔',
    +    '≕' => '≕',
    +    '≖' => '≖',
    +    '≗' => '≗',
    +    '≙' => '≙',
    +    '≚' => '≚',
    +    '≜' => '≜',
    +    '≟' => '≟',
    +    '≠' => '≠',
    +    '≡' => '≡',
    +    '≡⃥' => '&bnequiv',
    +    '≢' => '≢',
    +    '≤' => '≤',
    +    '≤⃒' => '&nvle',
    +    '≥' => '≥',
    +    '≥⃒' => '&nvge',
    +    '≦' => '≦',
    +    '≦̸' => '&nlE',
    +    '≧' => '≧',
    +    '≧̸' => '&NotGreaterFullEqual',
    +    '≨' => '≨',
    +    '≨︀' => '&lvertneqq',
    +    '≩' => '≩',
    +    '≩︀' => '&gvertneqq',
    +    '≪' => '≪',
    +    '≪̸' => '&nLtv',
    +    '≪⃒' => '&nLt',
    +    '≫' => '≫',
    +    '≫̸' => '&NotGreaterGreater',
    +    '≫⃒' => '&nGt',
    +    '≬' => '≬',
    +    '≭' => '≭',
    +    '≮' => '≮',
    +    '≯' => '≯',
    +    '≰' => '≰',
    +    '≱' => '≱',
    +    '≲' => '≲',
    +    '≳' => '≳',
    +    '≴' => '≴',
    +    '≵' => '≵',
    +    '≶' => '≶',
    +    '≷' => '≷',
    +    '≸' => '≸',
    +    '≹' => '≹',
    +    '≺' => '≺',
    +    '≻' => '≻',
    +    '≼' => '≼',
    +    '≽' => '≽',
    +    '≾' => '≾',
    +    '≿' => '≿',
    +    '≿̸' => '&NotSucceedsTilde',
    +    '⊀' => '⊀',
    +    '⊁' => '⊁',
    +    '⊂' => '⊂',
    +    '⊂⃒' => '&vnsub',
    +    '⊃' => '⊃',
    +    '⊃⃒' => '&nsupset',
    +    '⊄' => '⊄',
    +    '⊅' => '⊅',
    +    '⊆' => '⊆',
    +    '⊇' => '⊇',
    +    '⊈' => '⊈',
    +    '⊉' => '⊉',
    +    '⊊' => '⊊',
    +    '⊊︀' => '&vsubne',
    +    '⊋' => '⊋',
    +    '⊋︀' => '&vsupne',
    +    '⊍' => '⊍',
    +    '⊎' => '⊎',
    +    '⊏' => '⊏',
    +    '⊏̸' => '&NotSquareSubset',
    +    '⊐' => '⊐',
    +    '⊐̸' => '&NotSquareSuperset',
    +    '⊑' => '⊑',
    +    '⊒' => '⊒',
    +    '⊓' => '⊓',
    +    '⊓︀' => '&sqcaps',
    +    '⊔' => '⊔',
    +    '⊔︀' => '&sqcups',
    +    '⊕' => '⊕',
    +    '⊖' => '⊖',
    +    '⊗' => '⊗',
    +    '⊘' => '⊘',
    +    '⊙' => '⊙',
    +    '⊚' => '⊚',
    +    '⊛' => '⊛',
    +    '⊝' => '⊝',
    +    '⊞' => '⊞',
    +    '⊟' => '⊟',
    +    '⊠' => '⊠',
    +    '⊡' => '⊡',
    +    '⊢' => '⊢',
    +    '⊣' => '⊣',
    +    '⊤' => '⊤',
    +    '⊥' => '⊥',
    +    '⊧' => '⊧',
    +    '⊨' => '⊨',
    +    '⊩' => '⊩',
    +    '⊪' => '⊪',
    +    '⊫' => '⊫',
    +    '⊬' => '⊬',
    +    '⊭' => '⊭',
    +    '⊮' => '⊮',
    +    '⊯' => '⊯',
    +    '⊰' => '⊰',
    +    '⊲' => '⊲',
    +    '⊳' => '⊳',
    +    '⊴' => '⊴',
    +    '⊴⃒' => '&nvltrie',
    +    '⊵' => '⊵',
    +    '⊵⃒' => '&nvrtrie',
    +    '⊶' => '⊶',
    +    '⊷' => '⊷',
    +    '⊸' => '⊸',
    +    '⊹' => '⊹',
    +    '⊺' => '⊺',
    +    '⊻' => '⊻',
    +    '⊽' => '⊽',
    +    '⊾' => '⊾',
    +    '⊿' => '⊿',
    +    '⋀' => '⋀',
    +    '⋁' => '⋁',
    +    '⋂' => '⋂',
    +    '⋃' => '⋃',
    +    '⋄' => '⋄',
    +    '⋅' => '⋅',
    +    '⋆' => '⋆',
    +    '⋇' => '⋇',
    +    '⋈' => '⋈',
    +    '⋉' => '⋉',
    +    '⋊' => '⋊',
    +    '⋋' => '⋋',
    +    '⋌' => '⋌',
    +    '⋍' => '⋍',
    +    '⋎' => '⋎',
    +    '⋏' => '⋏',
    +    '⋐' => '⋐',
    +    '⋑' => '⋑',
    +    '⋒' => '⋒',
    +    '⋓' => '⋓',
    +    '⋔' => '⋔',
    +    '⋕' => '⋕',
    +    '⋖' => '⋖',
    +    '⋗' => '⋗',
    +    '⋘' => '⋘',
    +    '⋘̸' => '&nLl',
    +    '⋙' => '⋙',
    +    '⋙̸' => '&nGg',
    +    '⋚' => '⋚',
    +    '⋚︀' => '&lesg',
    +    '⋛' => '⋛',
    +    '⋛︀' => '&gesl',
    +    '⋞' => '⋞',
    +    '⋟' => '⋟',
    +    '⋠' => '⋠',
    +    '⋡' => '⋡',
    +    '⋢' => '⋢',
    +    '⋣' => '⋣',
    +    '⋦' => '⋦',
    +    '⋧' => '⋧',
    +    '⋨' => '⋨',
    +    '⋩' => '⋩',
    +    '⋪' => '⋪',
    +    '⋫' => '⋫',
    +    '⋬' => '⋬',
    +    '⋭' => '⋭',
    +    '⋮' => '⋮',
    +    '⋯' => '⋯',
    +    '⋰' => '⋰',
    +    '⋱' => '⋱',
    +    '⋲' => '⋲',
    +    '⋳' => '⋳',
    +    '⋴' => '⋴',
    +    '⋵' => '⋵',
    +    '⋵̸' => '¬indot',
    +    '⋶' => '⋶',
    +    '⋷' => '⋷',
    +    '⋹' => '⋹',
    +    '⋹̸' => '¬inE',
    +    '⋺' => '⋺',
    +    '⋻' => '⋻',
    +    '⋼' => '⋼',
    +    '⋽' => '⋽',
    +    '⋾' => '⋾',
    +    '⌅' => '⌅',
    +    '⌆' => '⌆',
    +    '⌈' => '⌈',
    +    '⌉' => '⌉',
    +    '⌊' => '⌊',
    +    '⌋' => '⌋',
    +    '⌌' => '⌌',
    +    '⌍' => '⌍',
    +    '⌎' => '⌎',
    +    '⌏' => '⌏',
    +    '⌐' => '⌐',
    +    '⌒' => '⌒',
    +    '⌓' => '⌓',
    +    '⌕' => '⌕',
    +    '⌖' => '⌖',
    +    '⌜' => '⌜',
    +    '⌝' => '⌝',
    +    '⌞' => '⌞',
    +    '⌟' => '⌟',
    +    '⌢' => '⌢',
    +    '⌣' => '⌣',
    +    '⌭' => '⌭',
    +    '⌮' => '⌮',
    +    '⌶' => '⌶',
    +    '⌽' => '⌽',
    +    '⌿' => '⌿',
    +    '⍼' => '⍼',
    +    '⎰' => '⎰',
    +    '⎱' => '⎱',
    +    '⎴' => '⎴',
    +    '⎵' => '⎵',
    +    '⎶' => '⎶',
    +    '⏜' => '⏜',
    +    '⏝' => '⏝',
    +    '⏞' => '⏞',
    +    '⏟' => '⏟',
    +    '⏢' => '⏢',
    +    '⏧' => '⏧',
    +    '␣' => '␣',
    +    'Ⓢ' => 'Ⓢ',
    +    '─' => '─',
    +    '│' => '│',
    +    '┌' => '┌',
    +    '┐' => '┐',
    +    '└' => '└',
    +    '┘' => '┘',
    +    '├' => '├',
    +    '┤' => '┤',
    +    '┬' => '┬',
    +    '┴' => '┴',
    +    '┼' => '┼',
    +    '═' => '═',
    +    '║' => '║',
    +    '╒' => '╒',
    +    '╓' => '╓',
    +    '╔' => '╔',
    +    '╕' => '╕',
    +    '╖' => '╖',
    +    '╗' => '╗',
    +    '╘' => '╘',
    +    '╙' => '╙',
    +    '╚' => '╚',
    +    '╛' => '╛',
    +    '╜' => '╜',
    +    '╝' => '╝',
    +    '╞' => '╞',
    +    '╟' => '╟',
    +    '╠' => '╠',
    +    '╡' => '╡',
    +    '╢' => '╢',
    +    '╣' => '╣',
    +    '╤' => '╤',
    +    '╥' => '╥',
    +    '╦' => '╦',
    +    '╧' => '╧',
    +    '╨' => '╨',
    +    '╩' => '╩',
    +    '╪' => '╪',
    +    '╫' => '╫',
    +    '╬' => '╬',
    +    '▀' => '▀',
    +    '▄' => '▄',
    +    '█' => '█',
    +    '░' => '░',
    +    '▒' => '▒',
    +    '▓' => '▓',
    +    '□' => '□',
    +    '▪' => '▪',
    +    '▫' => '▫',
    +    '▭' => '▭',
    +    '▮' => '▮',
    +    '▱' => '▱',
    +    '△' => '△',
    +    '▴' => '▴',
    +    '▵' => '▵',
    +    '▸' => '▸',
    +    '▹' => '▹',
    +    '▽' => '▽',
    +    '▾' => '▾',
    +    '▿' => '▿',
    +    '◂' => '◂',
    +    '◃' => '◃',
    +    '◊' => '◊',
    +    '○' => '○',
    +    '◬' => '◬',
    +    '◯' => '◯',
    +    '◸' => '◸',
    +    '◹' => '◹',
    +    '◺' => '◺',
    +    '◻' => '◻',
    +    '◼' => '◼',
    +    '★' => '★',
    +    '☆' => '☆',
    +    '☎' => '☎',
    +    '♀' => '♀',
    +    '♂' => '♂',
    +    '♠' => '♠',
    +    '♣' => '♣',
    +    '♥' => '♥',
    +    '♦' => '♦',
    +    '♪' => '♪',
    +    '♭' => '♭',
    +    '♮' => '♮',
    +    '♯' => '♯',
    +    '✓' => '✓',
    +    '✗' => '✗',
    +    '✠' => '✠',
    +    '✶' => '✶',
    +    '❘' => '❘',
    +    '❲' => '❲',
    +    '❳' => '❳',
    +    '⟈' => '⟈',
    +    '⟉' => '⟉',
    +    '⟦' => '⟦',
    +    '⟧' => '⟧',
    +    '⟨' => '⟨',
    +    '⟩' => '⟩',
    +    '⟪' => '⟪',
    +    '⟫' => '⟫',
    +    '⟬' => '⟬',
    +    '⟭' => '⟭',
    +    '⟵' => '⟵',
    +    '⟶' => '⟶',
    +    '⟷' => '⟷',
    +    '⟸' => '⟸',
    +    '⟹' => '⟹',
    +    '⟺' => '⟺',
    +    '⟼' => '⟼',
    +    '⟿' => '⟿',
    +    '⤂' => '⤂',
    +    '⤃' => '⤃',
    +    '⤄' => '⤄',
    +    '⤅' => '⤅',
    +    '⤌' => '⤌',
    +    '⤍' => '⤍',
    +    '⤎' => '⤎',
    +    '⤏' => '⤏',
    +    '⤐' => '⤐',
    +    '⤑' => '⤑',
    +    '⤒' => '⤒',
    +    '⤓' => '⤓',
    +    '⤖' => '⤖',
    +    '⤙' => '⤙',
    +    '⤚' => '⤚',
    +    '⤛' => '⤛',
    +    '⤜' => '⤜',
    +    '⤝' => '⤝',
    +    '⤞' => '⤞',
    +    '⤟' => '⤟',
    +    '⤠' => '⤠',
    +    '⤣' => '⤣',
    +    '⤤' => '⤤',
    +    '⤥' => '⤥',
    +    '⤦' => '⤦',
    +    '⤧' => '⤧',
    +    '⤨' => '⤨',
    +    '⤩' => '⤩',
    +    '⤪' => '⤪',
    +    '⤳' => '⤳',
    +    '⤳̸' => '&nrarrc',
    +    '⤵' => '⤵',
    +    '⤶' => '⤶',
    +    '⤷' => '⤷',
    +    '⤸' => '⤸',
    +    '⤹' => '⤹',
    +    '⤼' => '⤼',
    +    '⤽' => '⤽',
    +    '⥅' => '⥅',
    +    '⥈' => '⥈',
    +    '⥉' => '⥉',
    +    '⥊' => '⥊',
    +    '⥋' => '⥋',
    +    '⥎' => '⥎',
    +    '⥏' => '⥏',
    +    '⥐' => '⥐',
    +    '⥑' => '⥑',
    +    '⥒' => '⥒',
    +    '⥓' => '⥓',
    +    '⥔' => '⥔',
    +    '⥕' => '⥕',
    +    '⥖' => '⥖',
    +    '⥗' => '⥗',
    +    '⥘' => '⥘',
    +    '⥙' => '⥙',
    +    '⥚' => '⥚',
    +    '⥛' => '⥛',
    +    '⥜' => '⥜',
    +    '⥝' => '⥝',
    +    '⥞' => '⥞',
    +    '⥟' => '⥟',
    +    '⥠' => '⥠',
    +    '⥡' => '⥡',
    +    '⥢' => '⥢',
    +    '⥣' => '⥣',
    +    '⥤' => '⥤',
    +    '⥥' => '⥥',
    +    '⥦' => '⥦',
    +    '⥧' => '⥧',
    +    '⥨' => '⥨',
    +    '⥩' => '⥩',
    +    '⥪' => '⥪',
    +    '⥫' => '⥫',
    +    '⥬' => '⥬',
    +    '⥭' => '⥭',
    +    '⥮' => '⥮',
    +    '⥯' => '⥯',
    +    '⥰' => '⥰',
    +    '⥱' => '⥱',
    +    '⥲' => '⥲',
    +    '⥳' => '⥳',
    +    '⥴' => '⥴',
    +    '⥵' => '⥵',
    +    '⥶' => '⥶',
    +    '⥸' => '⥸',
    +    '⥹' => '⥹',
    +    '⥻' => '⥻',
    +    '⥼' => '⥼',
    +    '⥽' => '⥽',
    +    '⥾' => '⥾',
    +    '⥿' => '⥿',
    +    '⦅' => '⦅',
    +    '⦆' => '⦆',
    +    '⦋' => '⦋',
    +    '⦌' => '⦌',
    +    '⦍' => '⦍',
    +    '⦎' => '⦎',
    +    '⦏' => '⦏',
    +    '⦐' => '⦐',
    +    '⦑' => '⦑',
    +    '⦒' => '⦒',
    +    '⦓' => '⦓',
    +    '⦔' => '⦔',
    +    '⦕' => '⦕',
    +    '⦖' => '⦖',
    +    '⦚' => '⦚',
    +    '⦜' => '⦜',
    +    '⦝' => '⦝',
    +    '⦤' => '⦤',
    +    '⦥' => '⦥',
    +    '⦦' => '⦦',
    +    '⦧' => '⦧',
    +    '⦨' => '⦨',
    +    '⦩' => '⦩',
    +    '⦪' => '⦪',
    +    '⦫' => '⦫',
    +    '⦬' => '⦬',
    +    '⦭' => '⦭',
    +    '⦮' => '⦮',
    +    '⦯' => '⦯',
    +    '⦰' => '⦰',
    +    '⦱' => '⦱',
    +    '⦲' => '⦲',
    +    '⦳' => '⦳',
    +    '⦴' => '⦴',
    +    '⦵' => '⦵',
    +    '⦶' => '⦶',
    +    '⦷' => '⦷',
    +    '⦹' => '⦹',
    +    '⦻' => '⦻',
    +    '⦼' => '⦼',
    +    '⦾' => '⦾',
    +    '⦿' => '⦿',
    +    '⧀' => '⧀',
    +    '⧁' => '⧁',
    +    '⧂' => '⧂',
    +    '⧃' => '⧃',
    +    '⧄' => '⧄',
    +    '⧅' => '⧅',
    +    '⧉' => '⧉',
    +    '⧍' => '⧍',
    +    '⧎' => '⧎',
    +    '⧏' => '⧏',
    +    '⧏̸' => '&NotLeftTriangleBar',
    +    '⧐' => '⧐',
    +    '⧐̸' => '&NotRightTriangleBar',
    +    '⧜' => '⧜',
    +    '⧝' => '⧝',
    +    '⧞' => '⧞',
    +    '⧣' => '⧣',
    +    '⧤' => '⧤',
    +    '⧥' => '⧥',
    +    '⧫' => '⧫',
    +    '⧴' => '⧴',
    +    '⧶' => '⧶',
    +    '⨀' => '⨀',
    +    '⨁' => '⨁',
    +    '⨂' => '⨂',
    +    '⨄' => '⨄',
    +    '⨆' => '⨆',
    +    '⨌' => '⨌',
    +    '⨍' => '⨍',
    +    '⨐' => '⨐',
    +    '⨑' => '⨑',
    +    '⨒' => '⨒',
    +    '⨓' => '⨓',
    +    '⨔' => '⨔',
    +    '⨕' => '⨕',
    +    '⨖' => '⨖',
    +    '⨗' => '⨗',
    +    '⨢' => '⨢',
    +    '⨣' => '⨣',
    +    '⨤' => '⨤',
    +    '⨥' => '⨥',
    +    '⨦' => '⨦',
    +    '⨧' => '⨧',
    +    '⨩' => '⨩',
    +    '⨪' => '⨪',
    +    '⨭' => '⨭',
    +    '⨮' => '⨮',
    +    '⨯' => '⨯',
    +    '⨰' => '⨰',
    +    '⨱' => '⨱',
    +    '⨳' => '⨳',
    +    '⨴' => '⨴',
    +    '⨵' => '⨵',
    +    '⨶' => '⨶',
    +    '⨷' => '⨷',
    +    '⨸' => '⨸',
    +    '⨹' => '⨹',
    +    '⨺' => '⨺',
    +    '⨻' => '⨻',
    +    '⨼' => '⨼',
    +    '⨿' => '⨿',
    +    '⩀' => '⩀',
    +    '⩂' => '⩂',
    +    '⩃' => '⩃',
    +    '⩄' => '⩄',
    +    '⩅' => '⩅',
    +    '⩆' => '⩆',
    +    '⩇' => '⩇',
    +    '⩈' => '⩈',
    +    '⩉' => '⩉',
    +    '⩊' => '⩊',
    +    '⩋' => '⩋',
    +    '⩌' => '⩌',
    +    '⩍' => '⩍',
    +    '⩐' => '⩐',
    +    '⩓' => '⩓',
    +    '⩔' => '⩔',
    +    '⩕' => '⩕',
    +    '⩖' => '⩖',
    +    '⩗' => '⩗',
    +    '⩘' => '⩘',
    +    '⩚' => '⩚',
    +    '⩛' => '⩛',
    +    '⩜' => '⩜',
    +    '⩝' => '⩝',
    +    '⩟' => '⩟',
    +    '⩦' => '⩦',
    +    '⩪' => '⩪',
    +    '⩭' => '⩭',
    +    '⩭̸' => '&ncongdot',
    +    '⩮' => '⩮',
    +    '⩯' => '⩯',
    +    '⩰' => '⩰',
    +    '⩰̸' => '&napE',
    +    '⩱' => '⩱',
    +    '⩲' => '⩲',
    +    '⩳' => '⩳',
    +    '⩴' => '⩴',
    +    '⩵' => '⩵',
    +    '⩷' => '⩷',
    +    '⩸' => '⩸',
    +    '⩹' => '⩹',
    +    '⩺' => '⩺',
    +    '⩻' => '⩻',
    +    '⩼' => '⩼',
    +    '⩽' => '⩽',
    +    '⩽̸' => '&nles',
    +    '⩾' => '⩾',
    +    '⩾̸' => '&nges',
    +    '⩿' => '⩿',
    +    '⪀' => '⪀',
    +    '⪁' => '⪁',
    +    '⪂' => '⪂',
    +    '⪃' => '⪃',
    +    '⪄' => '⪄',
    +    '⪅' => '⪅',
    +    '⪆' => '⪆',
    +    '⪇' => '⪇',
    +    '⪈' => '⪈',
    +    '⪉' => '⪉',
    +    '⪊' => '⪊',
    +    '⪋' => '⪋',
    +    '⪌' => '⪌',
    +    '⪍' => '⪍',
    +    '⪎' => '⪎',
    +    '⪏' => '⪏',
    +    '⪐' => '⪐',
    +    '⪑' => '⪑',
    +    '⪒' => '⪒',
    +    '⪓' => '⪓',
    +    '⪔' => '⪔',
    +    '⪕' => '⪕',
    +    '⪖' => '⪖',
    +    '⪗' => '⪗',
    +    '⪘' => '⪘',
    +    '⪙' => '⪙',
    +    '⪚' => '⪚',
    +    '⪝' => '⪝',
    +    '⪞' => '⪞',
    +    '⪟' => '⪟',
    +    '⪠' => '⪠',
    +    '⪡' => '⪡',
    +    '⪡̸' => '&NotNestedLessLess',
    +    '⪢' => '⪢',
    +    '⪢̸' => '&NotNestedGreaterGreater',
    +    '⪤' => '⪤',
    +    '⪥' => '⪥',
    +    '⪦' => '⪦',
    +    '⪧' => '⪧',
    +    '⪨' => '⪨',
    +    '⪩' => '⪩',
    +    '⪪' => '⪪',
    +    '⪫' => '⪫',
    +    '⪬' => '⪬',
    +    '⪬︀' => '&smtes',
    +    '⪭' => '⪭',
    +    '⪭︀' => '&lates',
    +    '⪮' => '⪮',
    +    '⪯' => '⪯',
    +    '⪯̸' => '&NotPrecedesEqual',
    +    '⪰' => '⪰',
    +    '⪰̸' => '&NotSucceedsEqual',
    +    '⪳' => '⪳',
    +    '⪴' => '⪴',
    +    '⪵' => '⪵',
    +    '⪶' => '⪶',
    +    '⪷' => '⪷',
    +    '⪸' => '⪸',
    +    '⪹' => '⪹',
    +    '⪺' => '⪺',
    +    '⪻' => '⪻',
    +    '⪼' => '⪼',
    +    '⪽' => '⪽',
    +    '⪾' => '⪾',
    +    '⪿' => '⪿',
    +    '⫀' => '⫀',
    +    '⫁' => '⫁',
    +    '⫂' => '⫂',
    +    '⫃' => '⫃',
    +    '⫄' => '⫄',
    +    '⫅' => '⫅',
    +    '⫅̸' => '&nsubE',
    +    '⫆' => '⫆',
    +    '⫆̸' => '&nsupseteqq',
    +    '⫇' => '⫇',
    +    '⫈' => '⫈',
    +    '⫋' => '⫋',
    +    '⫋︀' => '&vsubnE',
    +    '⫌' => '⫌',
    +    '⫌︀' => '&varsupsetneqq',
    +    '⫏' => '⫏',
    +    '⫐' => '⫐',
    +    '⫑' => '⫑',
    +    '⫒' => '⫒',
    +    '⫓' => '⫓',
    +    '⫔' => '⫔',
    +    '⫕' => '⫕',
    +    '⫖' => '⫖',
    +    '⫗' => '⫗',
    +    '⫘' => '⫘',
    +    '⫙' => '⫙',
    +    '⫚' => '⫚',
    +    '⫛' => '⫛',
    +    '⫤' => '⫤',
    +    '⫦' => '⫦',
    +    '⫧' => '⫧',
    +    '⫨' => '⫨',
    +    '⫩' => '⫩',
    +    '⫫' => '⫫',
    +    '⫬' => '⫬',
    +    '⫭' => '⫭',
    +    '⫮' => '⫮',
    +    '⫯' => '⫯',
    +    '⫰' => '⫰',
    +    '⫱' => '⫱',
    +    '⫲' => '⫲',
    +    '⫳' => '⫳',
    +    '⫽︀' => '&varsupsetneqq',
    +    'ff' => 'ff',
    +    'fi' => 'fi',
    +    'fl' => 'fl',
    +    'ffi' => 'ffi',
    +    'ffl' => 'ffl',
    +    '𝒜' => '𝒜',
    +    '𝒞' => '𝒞',
    +    '𝒟' => '𝒟',
    +    '𝒢' => '𝒢',
    +    '𝒥' => '𝒥',
    +    '𝒦' => '𝒦',
    +    '𝒩' => '𝒩',
    +    '𝒪' => '𝒪',
    +    '𝒫' => '𝒫',
    +    '𝒬' => '𝒬',
    +    '𝒮' => '𝒮',
    +    '𝒯' => '𝒯',
    +    '𝒰' => '𝒰',
    +    '𝒱' => '𝒱',
    +    '𝒲' => '𝒲',
    +    '𝒳' => '𝒳',
    +    '𝒴' => '𝒴',
    +    '𝒵' => '𝒵',
    +    '𝒶' => '𝒶',
    +    '𝒷' => '𝒷',
    +    '𝒸' => '𝒸',
    +    '𝒹' => '𝒹',
    +    '𝒻' => '𝒻',
    +    '𝒽' => '𝒽',
    +    '𝒾' => '𝒾',
    +    '𝒿' => '𝒿',
    +    '𝓀' => '𝓀',
    +    '𝓁' => '𝓁',
    +    '𝓂' => '𝓂',
    +    '𝓃' => '𝓃',
    +    '𝓅' => '𝓅',
    +    '𝓆' => '𝓆',
    +    '𝓇' => '𝓇',
    +    '𝓈' => '𝓈',
    +    '𝓉' => '𝓉',
    +    '𝓊' => '𝓊',
    +    '𝓋' => '𝓋',
    +    '𝓌' => '𝓌',
    +    '𝓍' => '𝓍',
    +    '𝓎' => '𝓎',
    +    '𝓏' => '𝓏',
    +    '𝔄' => '𝔄',
    +    '𝔅' => '𝔅',
    +    '𝔇' => '𝔇',
    +    '𝔈' => '𝔈',
    +    '𝔉' => '𝔉',
    +    '𝔊' => '𝔊',
    +    '𝔍' => '𝔍',
    +    '𝔎' => '𝔎',
    +    '𝔏' => '𝔏',
    +    '𝔐' => '𝔐',
    +    '𝔑' => '𝔑',
    +    '𝔒' => '𝔒',
    +    '𝔓' => '𝔓',
    +    '𝔔' => '𝔔',
    +    '𝔖' => '𝔖',
    +    '𝔗' => '𝔗',
    +    '𝔘' => '𝔘',
    +    '𝔙' => '𝔙',
    +    '𝔚' => '𝔚',
    +    '𝔛' => '𝔛',
    +    '𝔜' => '𝔜',
    +    '𝔞' => '𝔞',
    +    '𝔟' => '𝔟',
    +    '𝔠' => '𝔠',
    +    '𝔡' => '𝔡',
    +    '𝔢' => '𝔢',
    +    '𝔣' => '𝔣',
    +    '𝔤' => '𝔤',
    +    '𝔥' => '𝔥',
    +    '𝔦' => '𝔦',
    +    '𝔧' => '𝔧',
    +    '𝔨' => '𝔨',
    +    '𝔩' => '𝔩',
    +    '𝔪' => '𝔪',
    +    '𝔫' => '𝔫',
    +    '𝔬' => '𝔬',
    +    '𝔭' => '𝔭',
    +    '𝔮' => '𝔮',
    +    '𝔯' => '𝔯',
    +    '𝔰' => '𝔰',
    +    '𝔱' => '𝔱',
    +    '𝔲' => '𝔲',
    +    '𝔳' => '𝔳',
    +    '𝔴' => '𝔴',
    +    '𝔵' => '𝔵',
    +    '𝔶' => '𝔶',
    +    '𝔷' => '𝔷',
    +    '𝔸' => '𝔸',
    +    '𝔹' => '𝔹',
    +    '𝔻' => '𝔻',
    +    '𝔼' => '𝔼',
    +    '𝔽' => '𝔽',
    +    '𝔾' => '𝔾',
    +    '𝕀' => '𝕀',
    +    '𝕁' => '𝕁',
    +    '𝕂' => '𝕂',
    +    '𝕃' => '𝕃',
    +    '𝕄' => '𝕄',
    +    '𝕆' => '𝕆',
    +    '𝕊' => '𝕊',
    +    '𝕋' => '𝕋',
    +    '𝕌' => '𝕌',
    +    '𝕍' => '𝕍',
    +    '𝕎' => '𝕎',
    +    '𝕏' => '𝕏',
    +    '𝕐' => '𝕐',
    +    '𝕒' => '𝕒',
    +    '𝕓' => '𝕓',
    +    '𝕔' => '𝕔',
    +    '𝕕' => '𝕕',
    +    '𝕖' => '𝕖',
    +    '𝕗' => '𝕗',
    +    '𝕘' => '𝕘',
    +    '𝕙' => '𝕙',
    +    '𝕚' => '𝕚',
    +    '𝕛' => '𝕛',
    +    '𝕜' => '𝕜',
    +    '𝕝' => '𝕝',
    +    '𝕞' => '𝕞',
    +    '𝕟' => '𝕟',
    +    '𝕠' => '𝕠',
    +    '𝕡' => '𝕡',
    +    '𝕢' => '𝕢',
    +    '𝕣' => '𝕣',
    +    '𝕤' => '𝕤',
    +    '𝕥' => '𝕥',
    +    '𝕦' => '𝕦',
    +    '𝕧' => '𝕧',
    +    '𝕨' => '𝕨',
    +    '𝕩' => '𝕩',
    +    '𝕪' => '𝕪',
    +    '𝕫' => '𝕫',
    +  );
    +}
    diff --git a/libraries/html5php/HTML5/Serializer/OutputRules.php b/libraries/html5php/HTML5/Serializer/OutputRules.php
    new file mode 100644
    index 0000000..3af1cde
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Serializer/OutputRules.php
    @@ -0,0 +1,314 @@
    +';
    +
    +  public function __construct($output, $options = array()) {
    +
    +    if (isset($options['encode_entities'])) {
    +      $this->encode = $options['encode_entities'];
    +    }
    +
    +    $this->outputMode = static::IM_IN_HTML;
    +    $this->out = $output;
    +  }
    +
    +  public function setTraverser(\HTML5\Serializer\Traverser $traverser) {
    +    $this->traverser = $traverser;
    +
    +    return $this;
    +  }
    +
    +  public function document($dom) {
    +    $this->doctype();
    +    $this->traverser->node($dom->documentElement);
    +    $this->nl();
    +  }
    +
    +  protected function doctype() {
    +    $this->wr(static::DOCTYPE);
    +    $this->nl();
    +  }
    +
    +  public function element($ele) {
    +    $name = $ele->tagName;
    +
    +    // Per spec:
    +    // If the element has a declared namespace in the HTML, MathML or
    +    // SVG namespaces, we use the lname instead of the tagName.
    +    if ($this->traverser->isLocalElement($ele)) {
    +      $name = $ele->localName;
    +    }
    +
    +    // If we are in SVG or MathML there is special handling.
    +    // Using if/elseif instead of switch because it's faster in PHP.
    +    if ($name == 'svg') {
    +        $this->outputMode = static::IM_IN_SVG;
    +        $name = Elements::normalizeSvgElement($name);
    +    }
    +    elseif ($name == 'math') {
    +      $this->outputMode = static::IM_IN_MATHML;
    +    }
    +
    +    $this->openTag($ele);
    +
    +    // Handle children.
    +    if ($ele->hasChildNodes()) {
    +      $this->traverser->children($ele->childNodes);
    +    }
    +
    +    // Close out the SVG or MathML special handling.
    +    if ($name == 'svg' || $name == 'math') {
    +      $this->outputMode = static::IM_IN_HTML;
    +    }
    +
    +    // If not unary, add a closing tag.
    +    if (!Elements::isA($name, Elements::VOID_TAG)) {
    +      $this->closeTag($ele);
    +    }
    +  }
    +
    +  /**
    +   * Write a text node.
    +   *
    +   * @param \DOMText $ele
    +   *   The text node to write.
    +   */
    +  public function text($ele) {
    +    if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->tagName, Elements::TEXT_RAW)) {
    +      $this->wr($ele->data);
    +      return;
    +    }
    +
    +    // FIXME: This probably needs some flags set.
    +    $this->wr($this->enc($ele->data));
    +
    +  }
    +
    +  public function cdata($ele) {
    +    // This encodes CDATA.
    +    $this->wr($ele->ownerDocument->saveXML($ele));
    +  }
    +
    +  public function comment($ele) {
    +    // These produce identical output.
    +    //$this->wr('');
    +    $this->wr($ele->ownerDocument->saveXML($ele));
    +  }
    +
    +  public function processorInstruction($ele) {
    +    $this->wr('wr($ele->target)->wr(' ')->wr($ele->data)->wr('?>');
    +  }
    +
    +  /**
    +   * Write the opening tag.
    +   *
    +   * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
    +   * qualified name (8.3).
    +   *
    +   * @param \DOMNode $ele
    +   *   The element being written.
    +   */
    +  protected function openTag($ele) {
    +    $this->wr('<')->wr($ele->tagName);
    +    $this->attrs($ele);
    +
    +    if ($this->outputMode == static::IM_IN_HTML) {
    +      $this->wr('>');
    +    }
    +    // If we are not in html mode we are in SVG, MathML, or XML embedded content.
    +    else {
    +      if ($ele->hasChildNodes()) {
    +        $this->wr('>');
    +      }
    +      // If there are no children this is self closing.
    +      else {
    +        $this->wr(' />');
    +      }
    +    }
    +  }
    +
    +  protected function attrs($ele) {
    +    // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
    +    if (!$ele->hasAttributes()) {
    +      return $this;
    +    }
    +
    +    // TODO: Currently, this always writes name="value", and does not do
    +    // value-less attributes.
    +    $map = $ele->attributes;
    +    $len = $map->length;
    +    for ($i = 0; $i < $len; ++$i) {
    +      $node = $map->item($i);
    +      $val = $this->enc($node->value, TRUE);
    +
    +      // XXX: The spec says that we need to ensure that anything in
    +      // the XML, XMLNS, or XLink NS's should use the canonical
    +      // prefix. It seems that DOM does this for us already, but there
    +      // may be exceptions.
    +      $name = $node->name;
    +
    +      // Special handling for attributes in SVG and MathML.
    +      // Using if/elseif instead of switch because it's faster in PHP.
    +      if ($this->outputMode == static::IM_IN_SVG) {
    +        $name = Elements::normalizeSvgAttribute($name);
    +      }
    +      elseif ($this->outputMode == static::IM_IN_MATHML) {
    +        $name = Elements::normalizeMathMlAttribute($name);
    +      }
    +
    +      $this->wr(' ')->wr($name);
    +      if (isset($val) && $val !== '') {
    +        $this->wr('="')->wr($val)->wr('"');
    +      }
    +    }
    +  }
    +
    +  /**
    +   * Write the closing tag.
    +   *
    +   * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
    +   * qualified name (8.3).
    +   *
    +   * @param \DOMNode $ele
    +   *   The element being written.
    +   */
    +  protected function closeTag($ele) {
    +    if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
    +      $this->wr('wr($ele->tagName)->wr('>');
    +    }
    +  }
    +
    +  /**
    +   * Write to the output.
    +   *
    +   * @param string $text
    +   *   The string to put into the output.
    +   *
    +   * @return HTML5\Serializer\Traverser
    +   *   $this so it can be used in chaining.
    +   */
    +  protected function wr($text) {
    +    fwrite($this->out, $text);
    +    return $this;
    +  }
    +
    +  /**
    +   * Write a new line character.
    +   *
    +   * @return HTML5\Serializer\Traverser
    +   *   $this so it can be used in chaining.
    +   */
    +  protected function nl() {
    +    fwrite($this->out, PHP_EOL);
    +    return $this;
    +  }
    +
    +  /**
    +   * Encode text.
    +   *
    +   * When encode is set to FALSE, the default value, the text passed in is
    +   * escaped per section 8.3 of the html5 spec. For details on how text is
    +   * escaped see the escape() method.
    +   *
    +   * When encoding is set to true the text is converted to named character
    +   * references where appropriate. Section 8.1.4 Character references of the
    +   * html5 spec refers to using named character references. This is useful for
    +   * characters that can't otherwise legally be used in the text.
    +   *
    +   * The named character references are listed in section 8.5.
    +   *
    +   * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references
    +   * 
    +   * True encoding will turn all named character references into their entities.
    +   * This includes such characters as +.# and many other common ones. By default
    +   * encoding here will just escape &'<>".
    +   *
    +   * Note, PHP 5.4+ has better html5 encoding.
    +   *
    +   * @todo Use the Entities class in php 5.3 to have html5 entities.
    +   *
    +   * @param string $text
    +   *   text to encode.
    +   * @param boolean $attribute
    +   *   True if we are encoding an attrubute, false otherwise
    +   *
    +   * @return string
    +   *   The encoded text.
    +   */
    +  protected function enc($text, $attribute = FALSE) {
    +
    +    // Escape the text rather than convert to named character references.
    +    if (!$this->encode) {
    +      return $this->escape($text, $attribute);
    +    }
    +
    +    // If we are in PHP 5.4+ we can use the native html5 entity functionality to
    +    // convert the named character references.
    +    if (defined('ENT_HTML5')) {
    +      return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', FALSE);
    +    }
    +    // If a version earlier than 5.4 html5 entities are not entirely handled.
    +    // This manually handles them.
    +    else {
    +      return strtr($text, \HTML5\Serializer\HTML5Entities::$map);
    +    }
    +  }
    +
    +  /**
    +   * Escape test.
    +   *
    +   * According to the html5 spec section 8.3 Serializing HTML fragments, text
    +   * within tags that are not style, script, xmp, iframe, noembed, and noframes
    +   * need to be properly escaped.
    +   *
    +   * The & should be converted to &, no breaking space unicode characters
    +   * converted to  , when in attribute mode the " should be converted to
    +   * ", and when not in attribute mode the < and > should be converted to
    +   * < and >.
    +   *
    +   * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
    +   *
    +   * @param string $text
    +   *   text to escape.
    +   * @param boolean $attribute
    +   *   True if we are escaping an attrubute, false otherwise
    +   */
    +  protected function escape($text, $attribute = FALSE) {
    +
    +    // Not using htmlspecialchars because, while it does escaping, it doesn't
    +    // match the requirements of section 8.5. For example, it doesn't handle
    +    // non-breaking spaces.
    +    if ($attribute) {
    +      $replace = array('"'=>'"', '&'=>'&', "\xc2\xa0"=>' ');
    +    }
    +    else {
    +      $replace = array('<'=>'<', '>'=>'>', '&'=>'&', "\xc2\xa0"=>' ');
    +    }
    +
    +    return strtr($text, $replace);
    +  }
    +}
    diff --git a/libraries/html5php/HTML5/Serializer/README.md b/libraries/html5php/HTML5/Serializer/README.md
    new file mode 100644
    index 0000000..849a47f
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Serializer/README.md
    @@ -0,0 +1,33 @@
    +# The Serializer (Writer) Model
    +
    +The serializer roughly follows sections _8.1 Writing HTML documents_ and section
    +_8.3 Serializing HTML fragments_ by converting DOMDocument, DOMDocumentFragment,
    +and DOMNodeList into HTML5.
    +
    +       [ HTML5 ]   // Interface for saving.
    +          ||
    +     [ Traverser ]   // Walk the DOM
    +          ||
    +       [ Rules ]     // Convert DOM elements into strings.
    +          ||
    +       [ HTML5 ]     // HTML5 document or fragment in text.
    +
    +
    +## HTML5 Class
    +
    +Provides the top level interface for saving.
    +
    +## The Traverser
    +
    +Walks the DOM finding each element and passing it off to the output rules to
    +convert to HTML5.
    +
    +## Output Rules
    +
    +The output rules are defined in the RulesInterface which can have multiple
    +implementations. Currently, the OutputRules is the default implementation that
    +converts a DOM as is into HTML5.
    +
    +## HTML5 String
    +
    +The output of the process it HTML5 as a string or saved to a file.
    \ No newline at end of file
    diff --git a/libraries/html5php/HTML5/Serializer/RulesInterface.php b/libraries/html5php/HTML5/Serializer/RulesInterface.php
    new file mode 100644
    index 0000000..18ac8ca
    --- /dev/null
    +++ b/libraries/html5php/HTML5/Serializer/RulesInterface.php
    @@ -0,0 +1,102 @@
    + 'html',
    +    'http://www.w3.org/1998/Math/MathML' => 'math',
    +    'http://www.w3.org/2000/svg' => 'svg',
    +  );
    +
    +  protected $dom;
    +  protected $options;
    +  protected $encode = FALSE;
    +  protected $rules;
    +  protected $out;
    +
    +  /**
    +   * Create a traverser.
    +   *
    +   * @param DOMNode|DOMNodeList $dom
    +   *   The document or node to traverse.
    +   * @param resource $out
    +   *   A stream that allows writing. The traverser will output into this 
    +   *   stream.
    +   * @param array $options
    +   *   An array or options for the traverser as key/value pairs. These include:
    +   *   - encode_entities: A bool to specify if full encding should happen for all named
    +   *     charachter references. Defaults to FALSE which escapes &'<>".
    +   *   - output_rules: The path to the class handling the output rules.
    +   */
    +  public function __construct($dom, $out, RulesInterface $rules, $options = array()) {
    +    $this->dom = $dom;
    +    $this->out = $out;
    +    $this->rules = $rules;
    +    $this->options = $options;
    +
    +    $this->rules->setTraverser($this);
    +  }
    +
    +  /**
    +   * Tell the traverser to walk the DOM.
    +   *
    +   * @return resource $out
    +   *   Returns the output stream.
    +   */
    +  public function walk() {
    +    
    +    if ($this->dom instanceof \DOMDocument) {
    +      $this->rules->document($this->dom);
    +    }
    +    elseif ($this->dom instanceof \DOMDocumentFragment) {
    +      // Document fragments are a special case. Only the children need to
    +      // be serialized.
    +      if ($this->dom->hasChildNodes()) {
    +        $this->children($this->dom->childNodes);
    +      }
    +    }
    +    // If NodeList, loop
    +    elseif ($this->dom instanceof \DOMNodeList) {
    +      // If this is a NodeList of DOMDocuments this will not work.
    +      $this->children($this->dom);
    +    }
    +    // Else assume this is a DOMNode-like datastructure.
    +    else {
    +      $this->node($this->dom);
    +    }
    +
    +    return $this->out;
    +  }
    +
    +  /**
    +   * Process a node in the DOM.
    +   *
    +   * @param mixed $node
    +   *   A node implementing \DOMNode.
    +   */
    +  public function node($node) {
    +    // A listing of types is at http://php.net/manual/en/dom.constants.php
    +    switch ($node->nodeType) {
    +      case XML_ELEMENT_NODE:
    +        $this->rules->element($node);
    +        break;
    +      case XML_TEXT_NODE:
    +        $this->rules->text($node);
    +        break;
    +      case XML_CDATA_SECTION_NODE:
    +        $this->rules->cdata($node);
    +        break;
    +      // FIXME: It appears that the parser doesn't do PI's.
    +      case XML_PI_NODE:
    +        $this->rules->processorInstruction($node);
    +        break;
    +      case XML_COMMENT_NODE:
    +        $this->rules->comment($node);
    +        break;
    +      // Currently we don't support embedding DTDs.
    +      default:
    +        print '';
    +        break;
    +    }
    +  }
    +
    +  /**
    +   * Walk through all the nodes on a node list.
    +   *
    +   * @param \DOMNodeList $nl
    +   *   A list of child elements to walk through.
    +   */
    +  public function children($nl) {
    +    foreach ($nl as $node) {
    +      $this->node($node);
    +    }
    +  }
    +
    +  /**
    +   * Is an element local?
    +   *
    +   * @param mixed $ele
    +   *   An element that implement \DOMNode.
    +   *
    +   * @return bool
    +   *   True if local and false otherwise.
    +   */
    +  public function isLocalElement($ele) {
    +    $uri = $ele->namespaceURI;
    +    if (empty($uri)) {
    +      return FALSE;
    +    }
    +    return isset(static::$local_ns[$uri]);
    +  }
    +}
    diff --git a/libraries/html5php/LICENSE.txt b/libraries/html5php/LICENSE.txt
    new file mode 100644
    index 0000000..6ecbf3e
    --- /dev/null
    +++ b/libraries/html5php/LICENSE.txt
    @@ -0,0 +1,65 @@
    +## HTML5-PHP License
    +
    +Copyright (c) 2013 The Authors of HTML5-PHP
    +
    +Matt Butcher - technosophos@gmail.com
    +Matt Farina - matt@mattfarina.com
    +
    +Permission is hereby granted, free of charge, to any person obtaining a copy of
    +this software and associated documentation files (the "Software"), to deal in 
    +the Software without restriction, including without limitation the rights to 
    +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
    +the Software, and to permit persons to whom the Software is furnished to do so,
    +subject to the following conditions:
    +
    +The above copyright notice and this permission notice shall be included in all
    +copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    +
    +## HTML5Lib License
    +
    +Portions of this are based on html5lib's PHP version, which was a
    +sub-project of html5lib. The following is the list of contributors from
    +html5lib:
    +
    +html5lib:
    +
    +Copyright (c) 2006-2009 The Authors
    +
    +Contributors:
    +James Graham - jg307@cam.ac.uk
    +Anne van Kesteren - annevankesteren@gmail.com
    +Lachlan Hunt - lachlan.hunt@lachy.id.au
    +Matt McDonald - kanashii@kanashii.ca
    +Sam Ruby - rubys@intertwingly.net
    +Ian Hickson (Google) - ian@hixie.ch
    +Thomas Broyer - t.broyer@ltgt.net
    +Jacques Distler - distler@golem.ph.utexas.edu
    +Henri Sivonen - hsivonen@iki.fi
    +Adam Barth - abarth@webkit.org
    +Eric Seidel - eric@webkit.org
    +The Mozilla Foundation (contributions from Henri Sivonen since 2008)
    +David Flanagan (Mozilla) - dflanagan@mozilla.com
    +
    +Permission is hereby granted, free of charge, to any person obtaining a copy of
    +this software and associated documentation files (the "Software"), to deal in 
    +the Software without restriction, including without limitation the rights to 
    +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
    +the Software, and to permit persons to whom the Software is furnished to do so,
    +subject to the following conditions:
    +
    +The above copyright notice and this permission notice shall be included in all
    +copies or substantial portions of the Software.
    +
    +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    diff --git a/libraries/html5php/README.md b/libraries/html5php/README.md
    new file mode 100644
    index 0000000..746ab4d
    --- /dev/null
    +++ b/libraries/html5php/README.md
    @@ -0,0 +1,182 @@
    +# HTML5-PHP
    +
    +The need for an HTML5 parser in PHP is clear. This project initially
    +began with the seemingly abandoned `html5lib` project [original source](https://code.google.com/p/html5lib/source/checkout).
    +But after some initial refactoring work, we began a new parser.
    +
    +- An HTML5 serializer
    +- Support for PHP namespaces
    +- Composer support
    +- Event-based (SAX-like) parser
    +- DOM tree builder
    +- Interoperability with QueryPath [[in progress](https://github.com/technosophos/querypath/issues/114)]
    +
    +[![Build Status](https://travis-ci.org/Masterminds/html5-php.png?branch=master)](https://travis-ci.org/Masterminds/html5-php) [![Latest Stable Version](https://poser.pugx.org/masterminds/html5/v/stable.png)](https://packagist.org/packages/masterminds/html5) [![Coverage Status](https://coveralls.io/repos/Masterminds/html5-php/badge.png?branch=master)](https://coveralls.io/r/Masterminds/html5-php?branch=master)
    +
    +## Installation
    +
    +Install HTML5-PHP using [composer](http://getcomposer.org/).
    +
    +To install, add `masterminds/html5` to your `composer.json` file:
    +
    +```
    +{
    +  "require" : {
    +    "masterminds/html5": "1.*"
    +  },
    +}
    +```
    +
    +(You may substitute `1.*` for a more specific release tag, of
    +course.)
    +
    +From there, use the `composer install` or `composer update` commands to
    +install.
    +
    +## Basic Usage
    +
    +HTML5-PHP has a high-level API and a low-level API. 
    +
    +Here is how you use the high-level `HTML5` library API:
    +
    +```php
    +
    +  
    +    TEST
    +  
    +  
    +    

    Hello World

    +

    This is a test of the HTML5 parser.

    + + +HERE; + +// Parse the document. $dom is a DOMDocument. +$dom = HTML5::loadHTML($html); + +// Render it as HTML5: +print HTML5::saveHTML($dom); + +// Or save it to a file: +HTML5::save($dom, 'out.html'); + +?> +``` + +The `$dom` created by the parser is a full `DOMDocument` object. And the +`save()` and `saveHTML()` methods will take any DOMDocument. + + +## The Low-Level API + +This library provides the following low-level APIs that you can use to +create more customized HTML5 tools: + +- An `InputStream` abstraction that can work with different kinds of +input source (not just files and strings). +- A SAX-like event-based parser that you can hook into for special kinds +of parsing. +- A flexible error-reporting mechanism that can be tuned to document +syntax checking. +- A DOM implementation that uses PHP's built-in DOM library. + +The unit tests exercise each piece of the API, and every public function +is well-documented. + +### Parser Design + +The parser is designed as follows: + +- The `InputStream` portion handles direct I/O. +- The `Scanner` handles scanning on behalf of the parser. +- The `Tokenizer` requests data off of the scanner, parses it, clasifies +it, and sends it to an `EventHandler`. It is a *recursive descent parser.* +- The `EventHandler` receives notifications and data for each specific +semantic event that occurs during tokenization. +- The `DOMBuilder` is an `EventHandler` that listens for tokenizing +events and builds a document tree (`DOMDocument`) based on the events. + +### Serializer Design + +The serializer takes a data structure (the `DOMDocument`) and transforms +it into a character representation -- an HTML5 document. + +The serializer is broken into three parts: + +- The `OutputRules` contain the rules to turn DOM elements into strings. The +rules are an implementation of the interface `RulesInterface` allowing for +different rule sets to be used. +- The `Traverser`, which is a special-purpose tree walker. It visits +each node node in the tree and uses the `OutputRules` to transform the node +into a string. +- `\HTML5` manages the `Traverser` and stores the resultant data +in the correct place. + +The serializer (`save()`, `saveHTML()`) follows the +[section 8.9 of the HTML 5.0 spec](http://www.w3.org/TR/2012/CR-html5-20121217/syntax.html#serializing-html-fragments). +So tags are serialized according to these rules: + +- A tag with children: <foo>CHILDREN</foo> +- A tag that cannot have content: <foo> (no closing tag) +- A tag that could have content, but doesn't: <foo></foo> + +## Known Issues (Or, Things We Designed Against the Spec) + +Please check the issue queue for a full list, but the following are +issues known issues that are not presently on the roadmap: + +- Namespaces: HTML5 only [supports a selected list of namespaces](http://www.w3.org/TR/html5/infrastructure.html#namespaces) + and they do not operate in the same way as XML namespaces. A `:` has no special + meaning. The parser does not support XML style namespaces via `:`. +- Scripts: This parser does not contain a JavaScript or a CSS + interpreter. While one may be supplied, not all features will be + supported. +- Rentrance: The current parser is not re-entrant. (Thus you can't pause + the parser to modify the HTML string mid-parse.) +- Validation: The current tree builder is **not** a validating parser. + While it will correct some HTML, it does not check that the HTML + conforms to the standard. (Should you wish, you can build a validating + parser by extending DOMTree or building your own EventHandler + implementation.) + * There is limited support for insertion modes. + * Some autocorrection is done automatically. + * Per the spec, many legacy tags are admitted and correctly handled, + even though they are technically not part of HTML5. +- Attribute names and values: Due to the implementation details of the + PHP implementation of DOM, attribute names that do not follow the + XML 1.0 standard are not inserted into the DOM. (Effectively, they + are ignored.) If you've got a clever fix for this, jump in! +- Processor Instructions: The HTML5 spec does not allow processor + instructions. We do. Since this is a server-side library, we think + this is useful. And that means, dear reader, that in some cases you + can parse the HTML from a mixed PHP/HTML document. This, however, + is an incidental feature, not a core feature. +- HTML manifests: Unsupported. +- PLAINTEXT: Unsupported. +- Adoption Agency Algorithm: Not yet implemented. (8.2.5.4.7) + +## Thanks to... + +We owe a huge debt of gratitude to the original authors of html5lib. + +While not much of the orignal parser remains, we learned a lot from +reading the html5lib library. And some pieces remain here. In +particular, much of the UTF-8 and Unicode handling is derived from the +html5lib project. + +## License + +This software is released under the MIT license. The original html5lib +library was also released under the MIT license. + +See LICENSE.txt + +Certain files contain copyright assertions by specific individuals +involved with html5lib. Those have been retained where appropriate. diff --git a/libraries/html5php/RELEASE.md b/libraries/html5php/RELEASE.md new file mode 100644 index 0000000..56e0cf0 --- /dev/null +++ b/libraries/html5php/RELEASE.md @@ -0,0 +1,26 @@ +# Release Notes + +1.0.4 (2014-04-29) +- #30/#31 Don't throw an exception for invalid tag names. + +1.0.3 (2014-02-28) +- #23 and #29: Ignore attributes with illegal chars in name for the PHP DOM. + +1.0.2 (2014-02-12) +- #23: Handle missing tag close in attribute list. +- #25: Fixed text escaping in the serializer (HTML% 8.3). +- #27: Fixed tests on Windows: changed "\n" -> PHP_EOL. +- #28: Fixed infinite loop for char "&" in unquoted attribute in parser. +- #26: Updated tag name case handling to deal with uppercase usage. +- #24: Newlines and tabs are allowed inside quoted attributes (HTML5 8.2.4). +- Fixed Travis CI testing. + +1.0.1 (2013-11-07) +- CDATA encoding is improved. (Non-standard; Issue #19) +- Some parser rules were not returning the new current element. (Issue #20) +- Added, to the README, details on code test coverage and to packagist version. +- Fixed processor instructions. +- Improved test coverage and documentation coverage. + +1.0.0 (2013-10-02) +- Initial release. diff --git a/libraries/html5php/autoloader.php b/libraries/html5php/autoloader.php new file mode 100644 index 0000000..559f343 --- /dev/null +++ b/libraries/html5php/autoloader.php @@ -0,0 +1,35 @@ +path = dirname(__FILE__); + } + + /** + * Autoloader + * + * @param string $class The name of the class to attempt to load. + */ + public function autoload($class) + { + // Only load the class if it starts with "HTML5" + if (strpos($class, 'HTML5') !== 0) + { + return; + } + //die($class); + + $filename = $this->path . DIRECTORY_SEPARATOR . str_replace('\\', DIRECTORY_SEPARATOR, $class) . '.php'; + include $filename; + } +} \ No newline at end of file diff --git a/libraries/humble-http-agent/HumbleHttpAgent.php b/libraries/humble-http-agent/HumbleHttpAgent.php index 963f0c0..7028113 100644 --- a/libraries/humble-http-agent/HumbleHttpAgent.php +++ b/libraries/humble-http-agent/HumbleHttpAgent.php @@ -7,11 +7,11 @@ * For environments which do not have these options, it reverts to standard sequential * requests (using file_get_contents()) * - * @version 1.4 - * @date 2013-05-10 + * @version 1.5 + * @date 2014-03-28 * @see http://php.net/HttpRequestPool * @author Keyvan Minoukadeh - * @copyright 2011-2013 Keyvan Minoukadeh + * @copyright 2011-2014 Keyvan Minoukadeh * @license http://www.gnu.org/licenses/agpl-3.0.html AGPL v3 */ @@ -31,6 +31,7 @@ class HumbleHttpAgent protected $maxParallelRequests = 5; protected $cache = null; //TODO protected $httpContext; + protected $curlOptions; protected $minimiseMemoryUse = false; //TODO protected $method; protected $cookieJar; @@ -80,6 +81,7 @@ class HumbleHttpAgent // create cookie jar $this->cookieJar = new CookieJar(); // set request options (redirect must be 0) + // HTTP PECL (http://php.net/manual/en/http.request.options.php) $this->requestOptions = array( 'timeout' => 15, 'connecttimeout' => 15, @@ -90,6 +92,7 @@ class HumbleHttpAgent if (is_array($requestOptions)) { $this->requestOptions = array_merge($this->requestOptions, $requestOptions); } + // HTTP file_get_contents $this->httpContext = array( 'http' => array( 'ignore_errors' => true, @@ -98,6 +101,23 @@ class HumbleHttpAgent 'header' => "Accept: */*\r\n" ) ); + // HTTP cURL + $this->curlOptions = array( + CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], + CURLOPT_TIMEOUT => $this->requestOptions['timeout'] + ); + // Use proxy? + if ($this->requestOptions['proxyhost']) { + // For file_get_contents (see http://stackoverflow.com/a/1336419/407938) + $this->httpContext['http']['proxy'] = 'tcp://'.$this->requestOptions['proxyhost']; + $this->httpContext['http']['request_fulluri'] = true; + // For cURL (see http://stackoverflow.com/a/9247672/407938) + $this->curlOptions[CURLOPT_PROXY] = $this->requestOptions['proxyhost']; + if (isset($this->requestOptions['proxyauth'])) { + $this->httpContext['http']['header'] .= "Proxy-Authorization: Basic ".base64_encode($this->requestOptions['proxyauth'])."\r\n"; + $this->curlOptions[CURLOPT_PROXYUSERPWD] = $this->requestOptions['proxyauth']; + } + } } protected function debug($msg) { @@ -168,7 +188,7 @@ class HumbleHttpAgent public function getMetaRefreshURL($url, $html) { if ($html == '') return false; // - if (!preg_match('!]+)["\']*>!i', $html, $match)) { + if (!preg_match('!]+)["\']?!i', $html, $match)) { return false; } $redirect_url = $match[1]; @@ -443,10 +463,7 @@ class HumbleHttpAgent $this->debug("......sending cookies: $cookies"); $headers[] = 'Cookie: '.$cookies; } - $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, array( - CURLOPT_CONNECTTIMEOUT => $this->requestOptions['timeout'], - CURLOPT_TIMEOUT => $this->requestOptions['timeout'] - )); + $httpRequest = new RollingCurlRequest($req_url, $_meth, null, $headers, $this->curlOptions); $httpRequest->set_original_url($orig); $this->requests[$orig] = array('headers'=>null, 'body'=>null, 'httpRequest'=>$httpRequest); $this->requests[$orig]['original_url'] = $orig; // TODO: is this needed anymore? @@ -661,7 +678,7 @@ class HumbleHttpAgent */ if ($remove && $response) unset($this->requests[$url]); if ($gzdecode && stripos($response['headers'], 'Content-Encoding: gzip')) { - if ($html = gzdecode($response['body'])) { + if ($html = @gzdecode($response['body'])) { $response['body'] = $html; } } diff --git a/libraries/humble-http-agent/HumbleHttpAgentDummy.php b/libraries/humble-http-agent/HumbleHttpAgentDummy.php new file mode 100644 index 0000000..f2a93e9 --- /dev/null +++ b/libraries/humble-http-agent/HumbleHttpAgentDummy.php @@ -0,0 +1,106 @@ +body = $body; + if (isset($headers)) $this->headers = $headers; + } + + public function rewriteHashbangFragment($url) { + return $url; + } + + public function getRedirectURLfromHTML($url, $html) { + return false; + } + + public function getMetaRefreshURL($url, $html) { + return false; + } + + public function getUglyURL($url, $html) { + return false; + } + + public function removeFragment($url) { + return $url; + } + + public function rewriteUrls($url) { + return $url; + } + + public function enableDebug($bool=true) { + return; + } + + public function minimiseMemoryUse($bool = true) { + return; + } + + public function setMaxParallelRequests($max) { + return; + } + + public function validateUrl($url) { + $url = filter_var($url, FILTER_SANITIZE_URL); + $test = filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); + // deal with bug http://bugs.php.net/51192 (present in PHP 5.2.13 and PHP 5.3.2) + if ($test === false) { + $test = filter_var(strtr($url, '-', '_'), FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); + } + if ($test !== false && $test !== null && preg_match('!^https?://!', $url)) { + return $url; + } else { + return false; + } + } + + public function fetchAll(array $urls) { + return; + } + + // fetch all URLs without following redirects + public function fetchAllOnce(array $urls, $isRedirect=false) { + return; + } + + public function get($url, $remove=false, $gzdecode=true) { + return array( + 'body' => $this->body, + 'headers' => $this->headers, + 'status_code' => 200, + 'effective_url' => $url + ); + } + + public function parallelSupport() { + return false; + } +} \ No newline at end of file diff --git a/libraries/readability/Readability.php b/libraries/readability/Readability.php index d0f09d7..fc4cce9 100644 --- a/libraries/readability/Readability.php +++ b/libraries/readability/Readability.php @@ -12,7 +12,7 @@ * More information: http://fivefilters.org/content-only/ * License: Apache License, Version 2.0 * Requires: PHP5 -* Date: 2012-09-19 +* Date: 2014-03-27 * * Differences between the PHP port and the original * ------------------------------------------------------ @@ -71,7 +71,7 @@ class Readability public $revertForcedParagraphElements = true; public $articleTitle; public $articleContent; - public $dom; + public $dom = null; public $url = null; // optional - URL where HTML was retrieved public $debug = false; public $lightClean = true; // preserves more content (experimental) added 2012-09-19 @@ -95,7 +95,7 @@ class Readability // 'trimRe' => '/^\s+|\s+$/g', // PHP has trim() 'normalize' => '/\s{2,}/', 'killBreaks' => '/((\s| ?)*){1,}/', - 'video' => '!//(player\.|www\.)?(youtube|vimeo|viddler)\.com!i', + 'video' => '!//(player\.|www\.)?(youtube\.com|vimeo\.com|viddler\.com|twitch\.tv)!i', 'skipFootnoteLink' => '/^\s*(\[?[a-z0-9]{1,2}\]?|^|edit|citation needed)\s*$/i' ); @@ -118,9 +118,12 @@ class Readability $html = preg_replace($this->regexps['replaceFonts'], '<$1span>', $html); $html = mb_convert_encoding($html, 'HTML-ENTITIES', "UTF-8"); if (trim($html) == '') $html = ''; - if ($parser=='html5lib' && ($this->dom = HTML5_Parser::parse($html))) { - // all good - } else { + if ($parser=='html5lib' || $parser=='html5php') { + if (version_compare(PHP_VERSION, '5.3.0') >= 0) { + $this->dom = HTML5::loadHTML($html); + } + } + if ($this->dom === null) { $this->dom = new DOMDocument(); $this->dom->preserveWhiteSpace = false; @$this->dom->loadHTML($html); diff --git a/makefulltextfeed.php b/makefulltextfeed.php index ac4c81b..29642df 100644 --- a/makefulltextfeed.php +++ b/makefulltextfeed.php @@ -1,10 +1,10 @@ $v) { unset($process[$key][$k]); @@ -68,12 +76,13 @@ function autoload($class_name) { 'HumbleHttpAgent' => 'humble-http-agent/HumbleHttpAgent.php', 'SimplePie_HumbleHttpAgent' => 'humble-http-agent/SimplePie_HumbleHttpAgent.php', 'CookieJar' => 'humble-http-agent/CookieJar.php', + 'HumbleHttpAgentDummy' => 'humble-http-agent/HumbleHttpAgentDummy.php', // Include Zend Cache to improve performance (cache results) 'Zend_Cache' => 'Zend/Cache.php', // Language detect 'Text_LanguageDetect' => 'language-detect/LanguageDetect.php', - // HTML5 Lib - 'HTML5_Parser' => 'html5/Parser.php', + // HTML5 PHP (can't be used unless PHP version is >= 5.3) + 'HTML5' => 'html5php/HTML5.php', // htmLawed - used if XSS filter is enabled (xss_filter) 'htmLawed' => 'htmLawed/htmLawed.php' ); @@ -87,6 +96,7 @@ function autoload($class_name) { } spl_autoload_register('autoload'); require dirname(__FILE__).'/libraries/simplepie/autoloader.php'; +require dirname(__FILE__).'/libraries/html5php/autoloader.php'; //////////////////////////////// // Load config file @@ -103,6 +113,11 @@ require dirname(__FILE__).'/config.php'; //////////////////////////////// header('X-Robots-Tag: noindex, nofollow'); +//////////////////////////////// +// Content security headers +//////////////////////////////// +header("Content-Security-Policy: script-src 'self'; connect-src 'none'; font-src 'none'; style-src 'self'"); + //////////////////////////////// // Check if service is enabled //////////////////////////////// @@ -115,7 +130,9 @@ if (!$options->enabled) { // See the config file for debug options. //////////////////////////////// $debug_mode = false; -if (isset($_GET['debug'])) { +$debug_show_raw_html = false; +$debug_show_parsed_html = false; +if (isset($_REQUEST['debug'])) { if ($options->debug === true || $options->debug == 'user') { $debug_mode = true; } elseif ($options->debug == 'admin') { @@ -124,6 +141,8 @@ if (isset($_GET['debug'])) { } if ($debug_mode) { header('Content-Type: text/plain; charset=utf-8'); + $debug_show_raw_html = ($_REQUEST['debug'] === 'rawhtml'); + $debug_show_parsed_html = ($_REQUEST['debug'] === 'parsedhtml'); } else { if ($options->debug == 'admin') { die('You must be logged in to the admin area to see debug output.'); @@ -151,10 +170,10 @@ $options->smart_cache = $options->smart_cache && function_exists('apc_inc'); //////////////////////////////// // Check for feed URL //////////////////////////////// -if (!isset($_GET['url'])) { +if (!isset($_REQUEST['url'])) { die('No URL supplied'); } -$url = trim($_GET['url']); +$url = trim($_REQUEST['url']); if (strtolower(substr($url, 0, 7)) == 'feed://') { $url = 'http://'.substr($url, 7); } @@ -178,26 +197,30 @@ debug("Supplied URL: $url"); ///////////////////////////////// // Redirect to hide API key +// (if in 'full' mode) ///////////////////////////////// -if (isset($_GET['key']) && ($key_index = array_search($_GET['key'], $options->api_keys)) !== false) { +if ((_FF_FTR_MODE == 'full') && isset($_REQUEST['key']) && ($key_index = array_search($_REQUEST['key'], $options->api_keys)) !== false) { $host = $_SERVER['HTTP_HOST']; $path = rtrim(dirname($_SERVER['SCRIPT_NAME']), '/\\'); $_qs_url = (strtolower(substr($url, 0, 7)) == 'http://') ? substr($url, 7) : $url; $redirect = 'http://'.htmlspecialchars($host.$path).'/makefulltextfeed.php?url='.urlencode($_qs_url); $redirect .= '&key='.$key_index; - $redirect .= '&hash='.urlencode(sha1($_GET['key'].$url)); - if (isset($_GET['html'])) $redirect .= '&html='.urlencode($_GET['html']); - if (isset($_GET['max'])) $redirect .= '&max='.(int)$_GET['max']; - if (isset($_GET['links'])) $redirect .= '&links='.urlencode($_GET['links']); - if (isset($_GET['exc'])) $redirect .= '&exc='.urlencode($_GET['exc']); - if (isset($_GET['format'])) $redirect .= '&format='.urlencode($_GET['format']); - if (isset($_GET['callback'])) $redirect .= '&callback='.urlencode($_GET['callback']); - if (isset($_GET['l'])) $redirect .= '&l='.urlencode($_GET['l']); - if (isset($_GET['xss'])) $redirect .= '&xss'; - if (isset($_GET['use_extracted_title'])) $redirect .= '&use_extracted_title'; - if (isset($_GET['content'])) $redirect .= '&content='.urlencode($_GET['content']); - if (isset($_GET['summary'])) $redirect .= '&summary='.urlencode($_GET['summary']); - if (isset($_GET['debug'])) $redirect .= '&debug'; + $redirect .= '&hash='.urlencode(sha1($_REQUEST['key'].$url)); + if (isset($_REQUEST['html'])) $redirect .= '&html='.urlencode($_REQUEST['html']); + if (isset($_REQUEST['max'])) $redirect .= '&max='.(int)$_REQUEST['max']; + if (isset($_REQUEST['links'])) $redirect .= '&links='.urlencode($_REQUEST['links']); + if (isset($_REQUEST['exc'])) $redirect .= '&exc='.urlencode($_REQUEST['exc']); + if (isset($_REQUEST['format'])) $redirect .= '&format='.urlencode($_REQUEST['format']); + if (isset($_REQUEST['callback'])) $redirect .= '&callback='.urlencode($_REQUEST['callback']); + if (isset($_REQUEST['l'])) $redirect .= '&l='.urlencode($_REQUEST['l']); + if (isset($_REQUEST['lang'])) $redirect .= '&lang='.urlencode($_REQUEST['lang']); + if (isset($_REQUEST['xss'])) $redirect .= '&xss'; + if (isset($_REQUEST['use_extracted_title'])) $redirect .= '&use_extracted_title'; + if (isset($_REQUEST['content'])) $redirect .= '&content='.urlencode($_REQUEST['content']); + if (isset($_REQUEST['summary'])) $redirect .= '&summary='.urlencode($_REQUEST['summary']); + if (isset($_REQUEST['debug'])) $redirect .= '&debug'; + if (isset($_REQUEST['parser'])) $redirect .= '&parser='.urlencode($_REQUEST['parser']); + if (isset($_REQUEST['proxy'])) $redirect .= '&proxy='.urlencode($_REQUEST['proxy']); if ($debug_mode) { debug('Redirecting to hide access key, follow URL below to continue'); debug("Location: $redirect"); @@ -220,20 +243,27 @@ if (!ini_get('date.timezone') || !@date_default_timezone_set(ini_get('date.timez /////////////////////////////////////////////// // Check if the request is explicitly for an HTML page /////////////////////////////////////////////// -$html_only = (isset($_GET['html']) && ($_GET['html'] == '1' || $_GET['html'] == 'true')); +$html_only = (isset($_REQUEST['html']) && ($_REQUEST['html'] == '1' || $_REQUEST['html'] == 'true')); /////////////////////////////////////////////// // Check if valid key supplied /////////////////////////////////////////////// $valid_key = false; -if (isset($_GET['key']) && isset($_GET['hash']) && isset($options->api_keys[(int)$_GET['key']])) { - $valid_key = ($_GET['hash'] == sha1($options->api_keys[(int)$_GET['key']].$url)); +$key_index = false; +// first check for hidden key using hash (key (int) + hash parameters) (can appear in both simple and full modes) +if (isset($_REQUEST['key']) && isset($_REQUEST['hash']) && isset($options->api_keys[(int)$_REQUEST['key']])) { + $valid_key = ($_REQUEST['hash'] == sha1($options->api_keys[(int)$_REQUEST['key']].$url)); + if ($valid_key) $key_index = (int)$_REQUEST['key']; +} +// next check for full key (string) passed in request (only simple mode) +if (!$valid_key && _FF_FTR_MODE === 'simple' && isset($_REQUEST['key'])) { + $key_index = array_search($_REQUEST['key'], $options->api_keys); + if ($key_index !== false) $valid_key = true; } -$key_index = ($valid_key) ? (int)$_GET['key'] : 0; if (!$valid_key && $options->key_required) { die('A valid key must be supplied'); } -if (!$valid_key && isset($_GET['key']) && $_GET['key'] != '') { +if (!$valid_key && isset($_REQUEST['key']) && $_REQUEST['key'] != '') { die('The entered key is invalid'); } @@ -248,8 +278,8 @@ if (!url_allowed($url)) die('URL blocked'); // Max entries // see config.php to find these values /////////////////////////////////////////////// -if (isset($_GET['max'])) { - $max = (int)$_GET['max']; +if (isset($_REQUEST['max'])) { + $max = (int)$_REQUEST['max']; if ($valid_key) { $max = min($max, $options->max_entries_with_key); } else { @@ -266,8 +296,8 @@ if (isset($_GET['max'])) { /////////////////////////////////////////////// // Link handling /////////////////////////////////////////////// -if (isset($_GET['links']) && in_array($_GET['links'], array('preserve', 'footnotes', 'remove'))) { - $links = $_GET['links']; +if (isset($_REQUEST['links']) && in_array($_REQUEST['links'], array('preserve', 'footnotes', 'remove'))) { + $links = $_REQUEST['links']; } else { $links = 'preserve'; } @@ -277,7 +307,7 @@ if (isset($_GET['links']) && in_array($_GET['links'], array('preserve', 'footnot /////////////////////////////////////////////// $favour_feed_titles = true; if ($options->favour_feed_titles == 'user') { - $favour_feed_titles = !isset($_GET['use_extracted_title']); + $favour_feed_titles = !isset($_REQUEST['use_extracted_title']); } else { $favour_feed_titles = $options->favour_feed_titles; } @@ -286,7 +316,7 @@ if ($options->favour_feed_titles == 'user') { // Include full content in output? /////////////////////////////////////////////// if ($options->content === 'user') { - if (isset($_GET['content']) && $_GET['content'] === '0') { + if (isset($_REQUEST['content']) && $_REQUEST['content'] === '0') { $options->content = false; } else { $options->content = true; @@ -297,7 +327,7 @@ if ($options->content === 'user') { // Include summaries in output? /////////////////////////////////////////////// if ($options->summary === 'user') { - if (isset($_GET['summary']) && $_GET['summary'] === '1') { + if (isset($_REQUEST['summary']) && $_REQUEST['summary'] === '1') { $options->summary = true; } else { $options->summary = false; @@ -308,7 +338,7 @@ if ($options->summary === 'user') { // Exclude items if extraction fails /////////////////////////////////////////////// if ($options->exclude_items_on_fail === 'user') { - $exclude_on_fail = (isset($_GET['exc']) && ($_GET['exc'] == '1')); + $exclude_on_fail = (isset($_REQUEST['exc']) && ($_REQUEST['exc'] == '1')); } else { $exclude_on_fail = $options->exclude_items_on_fail; } @@ -317,8 +347,9 @@ if ($options->exclude_items_on_fail === 'user') { // Detect language /////////////////////////////////////////////// if ($options->detect_language === 'user') { - if (isset($_GET['l'])) { - $detect_language = (int)$_GET['l']; + if (isset($_REQUEST['lang'])) $_REQUEST['l'] = $_REQUEST['lang']; + if (isset($_REQUEST['l'])) { + $detect_language = (int)$_REQUEST['l']; } else { $detect_language = 1; } @@ -332,7 +363,7 @@ $use_cld = extension_loaded('cld') && (version_compare(PHP_VERSION, '5.3.0') >= // Check for valid format // (stick to RSS (or RSS as JSON) for the time being) ///////////////////////////////////// -if (isset($_GET['format']) && $_GET['format'] == 'json') { +if (isset($_REQUEST['format']) && $_REQUEST['format'] == 'json') { $format = 'json'; } else { $format = 'rss'; @@ -342,11 +373,11 @@ if (isset($_GET['format']) && $_GET['format'] == 'json') { // Should we do XSS filtering? ///////////////////////////////////// if ($options->xss_filter === 'user') { - $xss_filter = isset($_GET['xss']); + $xss_filter = isset($_REQUEST['xss']) && $_REQUEST['xss'] !== '0'; } else { $xss_filter = $options->xss_filter; } -if (!$xss_filter && isset($_GET['xss'])) { +if (!$xss_filter && (isset($_REQUEST['xss']) && $_REQUEST['xss'] !== '0')) { die('XSS filtering is disabled in config'); } @@ -355,8 +386,8 @@ if (!$xss_filter && isset($_GET['xss'])) { // Regex from https://gist.github.com/1217080 ///////////////////////////////////// $callback = null; -if ($format =='json' && isset($_GET['callback'])) { - $callback = trim($_GET['callback']); +if ($format =='json' && isset($_REQUEST['callback'])) { + $callback = trim($_REQUEST['callback']); foreach (explode('.', $callback) as $_identifier) { if (!preg_match('/^[a-zA-Z_$][0-9a-zA-Z_$]*(?:\[(?:".+"|\'.+\'|\d+)\])*?$/', $_identifier)) { die('Invalid JSONP callback'); @@ -365,20 +396,78 @@ if ($format =='json' && isset($_GET['callback'])) { debug("JSONP callback: $callback"); } +/////////////////////////////////////////////// +// Override default HTML parser? +/////////////////////////////////////////////// +$parser = null; +if ($options->allow_parser_override && isset($_REQUEST['parser']) && in_array($_REQUEST['parser'], $options->allowed_parsers)) { + $parser = $_REQUEST['parser']; +} + +/////////////////////////////////////////////// +// Use proxy? +/////////////////////////////////////////////// +$proxy = false; +if (!empty($options->proxy_servers)) { + if (isset($_REQUEST['proxy'])) { + // We're choosing proxy based on &proxy value (unless it's not allowed...) + if (!$options->allow_proxy_override) die('Proxy overriding is disabled.'); + $proxy = $_REQUEST['proxy']; + if ($proxy === '0') { + $proxy = false; + } elseif ($proxy === '1') { + $proxy = true; // random + } + } else { + // We'll use proxy based on config setting + $proxy = $options->proxy; + } + // Is it a valid value (false, true, or one of the proxies in config) + if ($proxy !== false && $proxy !== true && !in_array($proxy, array_keys($options->proxy_servers))) { + die('Proxy not recognised.'); + } + if ($proxy === false) { + debug('Proxy will not be used'); + } else { + if ($proxy === true) { + $proxy = array_rand($options->proxy_servers); + } + if (is_string($options->proxy_servers[$proxy]) && $options->proxy_servers[$proxy] === 'direct') { + debug('Proxy will not be used'); + $proxy = false; + } else { + debug('Proxy '.$proxy.' will be used.'); + $proxy = $options->proxy_servers[$proxy]; + } + } +} + ////////////////////////////////// // Enable Cross-Origin Resource Sharing (CORS) ////////////////////////////////// if ($options->cors) header('Access-Control-Allow-Origin: *'); +////////////////////////////////// +// Has the HTML been given in the request? +////////////////////////////////// +if (isset($_REQUEST['inputhtml']) && _FF_FTR_MODE == 'simple') { + // disable multi-page processing (what we have is what we have) + $options->singlepage = false; + $options->multipage = false; + // disable disk caching + $options->caching = false; +} + ////////////////////////////////// // Check for cached copy ////////////////////////////////// if ($options->caching) { debug('Caching is enabled...'); - $cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary.(int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.(int)isset($_GET['pubsub'])); + $cache_id = md5($max.$url.(int)$valid_key.$links.(int)$favour_feed_titles.(int)$options->content.(int)$options->summary. + (int)$xss_filter.(int)$exclude_on_fail.$format.$detect_language.$parser._FF_FTR_MODE); $check_cache = true; if ($options->apc && $options->smart_cache) { - apc_add("cache.$cache_id", 0, 10*60); + apc_add("cache.$cache_id", 0, $options->cache_time*60); $apc_cache_hits = (int)apc_fetch("cache.$cache_id"); $check_cache = ($apc_cache_hits >= 2); apc_inc("cache.$cache_id"); @@ -417,20 +506,37 @@ if ($options->caching) { } ////////////////////////////////// -// Set Expires header +// Set cache header ////////////////////////////////// if (!$debug_mode) { - header('Expires: ' . gmdate('D, d M Y H:i:s', time()+(60*10)) . ' GMT'); + if ($options->cache_time) { + header('Cache-Control: public, max-age='.($options->cache_time*60)); + header('Expires: '.gmdate('D, d M Y H:i:s', time()+($options->cache_time*60)).' GMT'); + } } ////////////////////////////////// // Set up HTTP agent ////////////////////////////////// -$http = new HumbleHttpAgent(); -$http->debug = $debug_mode; -$http->userAgentMap = $options->user_agents; -$http->headerOnlyTypes = array_keys($options->content_type_exc); -$http->rewriteUrls = $options->rewrite_url; +if (isset($_REQUEST['inputhtml']) && _FF_FTR_MODE == 'simple') { + // the user has supplied the HTML, so we use the Dummy agent with + // the given HTML (it will always return this HTML) + $http = new HumbleHttpAgentDummy($_REQUEST['inputhtml']); +} else { + $_req_options = null; + if ($proxy !== false) { + $_req_options = array('proxyhost' => $proxy['host']); + if (isset($proxy['auth'])) { + $_req_options['proxyauth'] = $proxy['auth']; + } + } + $http = new HumbleHttpAgent($_req_options); + $http->debug = $debug_mode; + $http->userAgentMap = $options->user_agents; + $http->headerOnlyTypes = array_keys($options->content_type_exc); + $http->rewriteUrls = $options->rewrite_url; + unset($_req_options); +} ////////////////////////////////// // Set up Content Extractor @@ -441,6 +547,7 @@ SiteConfig::$debug = $debug_mode; SiteConfig::use_apc($options->apc); $extractor->fingerprints = $options->fingerprints; $extractor->allowedParsers = $options->allowed_parsers; +$extractor->parserOverride = $parser; //////////////////////////////// // Get RSS/Atom feed @@ -497,6 +604,7 @@ if ($html_only || !$result) { public function get_language() { return false; } public function get_image_url() { return false; } public function get_items($start=0, $max=1) { return array(0=>$this->item); } + public function get_channel_tags($namespace='', $tag='') { return null; } } class DummySingleItem { public $url; @@ -518,14 +626,16 @@ if ($html_only || !$result) { // Create full-text feed //////////////////////////////////////////// $output = new FeedWriter(); +if (_FF_FTR_MODE === 'simple') $output->enableSimpleJson(); $output->setTitle(strip_tags($feed->get_title())); $output->setDescription(strip_tags($feed->get_description())); $output->setXsl('css/feed.xsl'); // Chrome uses this, most browsers ignore it -if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment - $output->addHub('http://fivefilters.superfeedr.com/'); - $output->addHub('http://pubsubhubbub.appspot.com/'); - $output->setSelf('http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI']); +$ttl = $feed->get_channel_tags(SIMPLEPIE_NAMESPACE_RSS_20, 'ttl'); +if ($ttl !== null) { + $ttl = (int)$ttl[0]['data']; + $output->setTtl($ttl); } +//$output->setSelf('http://'.$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI']); $output->setLink($feed->get_link()); // Google Reader uses this for pulling in favicons if ($img_url = $feed->get_image_url()) { $output->setImage($feed->get_title(), $feed->get_link(), $img_url); @@ -573,23 +683,19 @@ foreach ($items as $key => $item) { } $newitem = $output->createNewItem(); $newitem->setTitle($feed_item_title); - if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment - if ($permalink !== false) { - $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($permalink)); - } else { - $newitem->setLink('http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink())); - } + if ($permalink !== false) { + $newitem->setLink($permalink); } else { - if ($permalink !== false) { - $newitem->setLink($permalink); - } else { - $newitem->setLink($item->get_permalink()); - } + $newitem->setLink($item->get_permalink()); } - //if ($permalink && ($response = $http->get($permalink, true)) && $response['status_code'] < 300) { - // Allowing error codes - some sites return correct content with error status - // e.g. prospectmagazine.co.uk returns 403 - if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { + // Status codes to accept (200 range) + // Some sites might return correct content with error status codes + // e.g. prospectmagazine.co.uk returns 403 - in some earlier versions of FTR we accepted a wider range of status codes + // to allow for such cases: + //if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300 || $response['status_code'] > 400)) { + // With the introduction of proxy support in 3.3, we're limiting range of acceptable status codes to avoid proxy + // errors being treated as valid responses. + if ($permalink && ($response = $http->get($permalink, true)) && ($response['status_code'] < 300)) { $effective_url = $response['effective_url']; if (!url_allowed($effective_url)) continue; // check if action defined for returned Content-Type @@ -612,9 +718,16 @@ foreach ($items as $key => $item) { // remove strange things $html = str_replace('', '', $html); $html = convert_to_utf8($html, $response['headers']); + // if user has asked to see raw HTML from remote server, show it and exit. + if ($debug_show_raw_html) { + debug("Here are the HTTP response headers from the remote server:"); + echo $response['headers']; + debug("Here's the raw HTML (after attempted UTF-8 conversion):"); + die($html); + } // check site config for single page URL - fetch it if found $is_single_page = false; - if ($single_page_response = getSinglePage($item, $html, $effective_url)) { + if ($options->singlepage && ($single_page_response = getSinglePage($item, $html, $effective_url))) { $is_single_page = true; $effective_url = $single_page_response['effective_url']; // check if action defined for returned Content-Type @@ -647,6 +760,11 @@ foreach ($items as $key => $item) { debug('Attempting to extract content'); $extract_result = $extractor->process($html, $effective_url); $readability = $extractor->readability; + // if user has asked to see parsed HTML, show it and exit. + if ($debug_show_parsed_html) { + debug("Here's the full HTML after it's been parsed by Full-Text RSS:"); + die($readability->dom->saveXML($readability->dom->documentElement)); + } $content_block = ($extract_result) ? $extractor->getContent() : null; $extracted_title = ($extract_result) ? $extractor->getTitle() : ''; // Deal with multi-page articles @@ -731,6 +849,14 @@ foreach ($items as $key => $item) { if (($links == 'footnotes') && (strpos($effective_url, 'wikipedia.org') === false)) { $readability->addFootnotes($content_block); } + // normalise + $content_block->normalize(); + // remove empty text nodes + foreach ($content_block->childNodes as $_n) { + if ($_n->nodeType === XML_TEXT_NODE && trim($_n->textContent) == '') { + $content_block->removeChild($_n); + } + } // remove nesting:

    test

    =

    test

    while ($content_block->childNodes->length == 1 && $content_block->firstChild->nodeType === XML_ELEMENT_NODE) { // only follow these tag names @@ -740,8 +866,10 @@ foreach ($items as $key => $item) { } // convert content block to HTML string // Need to preserve things like body: //img[@id='feature'] - if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer'))) { + if (in_array(strtolower($content_block->tagName), array('div', 'article', 'section', 'header', 'footer', 'li', 'td'))) { $html = $content_block->innerHTML; + //} elseif (in_array(strtolower($content_block->tagName), array('td', 'li'))) { + // $html = '
    '.$content_block->innerHTML.'
    '; } else { $html = $content_block->ownerDocument->saveXML($content_block); // essentially outerHTML } @@ -758,11 +886,7 @@ foreach ($items as $key => $item) { } } - if ($valid_key && isset($_GET['pubsub'])) { // used only on fivefilters.org at the moment - $newitem->addElement('guid', 'http://fivefilters.org/content-only/redirect.php?url='.urlencode($item->get_permalink()), array('isPermaLink'=>'false')); - } else { - $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true')); - } + $newitem->addElement('guid', $item->get_permalink(), array('isPermaLink'=>'true')); // filter xss? if ($xss_filter) { @@ -935,16 +1059,20 @@ if (!$debug_mode) { // apc purge code adapted from from http://www.thimbleopensource.com/tutorials-snippets/php-apc-expunge-script $_apc_data = apc_cache_info('user'); foreach ($_apc_data['cache_list'] as $_apc_item) { - if ($_apc_item['ttl'] > 0 && ($_apc_item['ttl'] + $_apc_item['creation_time'] < time())) { - apc_delete($_apc_item['info']); - } + // APCu keys incompatible with original APC keys, apparently fixed in newer versions, but not in 4.0.4 + // So let's look for those keys and fix here (ctime -> creation_time, key -> info). + if (isset($_apc_item['ctime'])) $_apc_item['creation_time'] = $_apc_item['ctime']; + if (isset($_apc_item['key'])) $_apc_item['info'] = $_apc_item['key']; + if ($_apc_item['ttl'] > 0 && ($_apc_item['ttl'] + $_apc_item['creation_time'] < time())) { + apc_delete($_apc_item['info']); + } } } } } if ($add_to_cache) { ob_start(); - $output->genarateFeed(); + $output->generateFeed(); $output = ob_get_contents(); ob_end_clean(); if ($html_only && $item_count == 0) { @@ -955,7 +1083,7 @@ if (!$debug_mode) { } echo $output; } else { - $output->genarateFeed(); + $output->generateFeed(); } if ($callback) echo ');'; } @@ -1022,8 +1150,7 @@ function url_allowed($url) { // (uses HTTP headers and HTML to find encoding) // adapted from http://stackoverflow.com/questions/910793/php-detect-encoding-and-make-everything-utf-8 ////////////////////////////////////////////// -function convert_to_utf8($html, $header=null) -{ +function convert_to_utf8($html, $header=null) { $encoding = null; if ($html || $header) { if (is_array($header)) $header = implode("\n", $header); @@ -1245,7 +1372,7 @@ function get_cache() { static $cache = null; if ($cache === null) { $frontendOptions = array( - 'lifetime' => 10*60, // cache lifetime of 10 minutes + 'lifetime' => $options->cache_time*60, // cache lifetime 'automatic_serialization' => false, 'write_control' => false, 'automatic_cleaning_factor' => $options->cache_cleanup, diff --git a/site_config/custom/index.php b/site_config/custom/index.php index a3d5f73..76ca8b3 100644 --- a/site_config/custom/index.php +++ b/site_config/custom/index.php @@ -1,3 +1,2 @@ \ No newline at end of file +// this is here to prevent directory listing over the web \ No newline at end of file diff --git a/site_config/standard/.wikipedia.org.txt b/site_config/standard/.wikipedia.org.txt index 8b98ae4..13bccad 100644 --- a/site_config/standard/.wikipedia.org.txt +++ b/site_config/standard/.wikipedia.org.txt @@ -3,17 +3,19 @@ body: //div[@id = 'bodyContent'] strip_id_or_class: editsection #strip_id_or_class: toc strip_id_or_class: vertical-navbox -strip: //table[@id='toc'] +strip: //*[@id='toc'] strip: //div[@id='catlinks'] strip: //div[@id='jump-to-nav'] strip: //div[@class='thumbcaption']//div[@class='magnify'] strip: //table[@class='navbox'] -strip: //table[contains(@class, 'infobox')] +#strip: //table[contains(@class, 'infobox')] strip: //div[@class='dablink'] strip: //div[@id='contentSub'] strip: //table[contains(@class, 'metadata')] strip: //*[contains(@class, 'noprint')] -strip: //span[@title='pronunciation:'] +strip: //span[@class='noexcerpt'] prune: no tidy: no -test_url: http://en.wikipedia.org/wiki/Christopher_Lloyd \ No newline at end of file +test_url: http://en.wikipedia.org/wiki/Christopher_Lloyd +test_url: https://en.wikipedia.org/wiki/Ronnie_James_Dio +test_url: https://en.wikipedia.org/wiki/Metallica \ No newline at end of file diff --git a/site_config/standard/README.md b/site_config/standard/README.md new file mode 100644 index 0000000..d44ed4e --- /dev/null +++ b/site_config/standard/README.md @@ -0,0 +1,36 @@ +Full-Text RSS site config files +================ + +[Full-Text RSS](http://fivefilters.org/content-only/), our article extraction tool, makes use of site-specific extraction rules to improve results. Each time a URL is processed, it checks to see if there are extraction rules for the site being processed. If there are no site patterns, it tries to detect the content block automatically. + +This repository contains the site config files we use in Full-Text RSS. + +### Contributing changes + +We chose GitHub for this set of files because they offer one feature which we hope will make contributing changes easier: [file editing](https://github.com/blog/844-forking-with-the-edit-button) through the web interface. + +You can now make changes to any of our site config files and request that your changes be pulled into the main set we maintain. This is what GitHub calls the Fork and Pull model: + +> The Fork & Pull Model lets anyone fork an existing repository and push changes to their personal fork without requiring access be granted to the source repository. The changes must then be pulled into the source repository by the project maintainer. This model reduces the amount of friction for new contributors and is popular with open source projects because it allows people to work independently without upfront coordination. + +When we receive a pull request we'll review the changes and if everything's okay we'll update our copy. + +If a site is not in our set, you can create a file for it in the same way. See [Creating files on GitHub](https://github.com/blog/1327-creating-files-on-github). + +### How to write a site config file + +Please see our [help page](http://help.fivefilters.org/customer/portal/articles/223153-site-patterns) for a brief guide. We hope to have some tutorials up soon. + +### Instapaper + +When we introduced site patterns, we chose to adopt the [same format](http://blog.instapaper.com/post/730281947) used by Instapaper. This allows us to make use of the existing extraction rules contributed by Instapaper users. + +Marco, Instapaper's creator, graciously opened up the database of contributions to everyone: + +> And, recognizing that your efforts could be useful to a wide range of other tools and services, I'll make the list of all of these site-specific configurations available to the public, free, with no strings attached. + +Most of the extraction rules in our set are borrowed from Instapaper. You can see the list maintained by Instapaper at [instapaper.com/bodytext/](http://instapaper.com/bodytext/) (login required). + +### Testing site config files + +Currently you will have to have a copy of Full-Text RSS to test changes to the site config files. In the future we will try to make this process easier. diff --git a/site_config/standard/version.txt b/site_config/standard/version.txt index eaf01eb..8e686ee 100644 --- a/site_config/standard/version.txt +++ b/site_config/standard/version.txt @@ -1 +1 @@ -2013-05-12T22:53:07Z \ No newline at end of file +2014-05-05T08:36:15Z \ No newline at end of file