2011-01-11 19:06:12 +01:00
< ? php
/**
* IRI parser / serialiser / normaliser
*
2011-03-23 23:39:01 +01:00
* Copyright ( c ) 2007 - 2010 , Geoffrey Sneddon and Steve Minutillo .
2011-01-11 19:06:12 +01:00
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are met :
*
* * Redistributions of source code must retain the above copyright notice ,
* this list of conditions and the following disclaimer .
*
* * Redistributions in binary form must reproduce the above copyright notice ,
* this list of conditions and the following disclaimer in the documentation
* and / or other materials provided with the distribution .
*
* * Neither the name of the SimplePie Team nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission .
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS " AS IS "
* AND ANY EXPRESS OR IMPLIED WARRANTIES , INCLUDING , BUT NOT LIMITED TO , THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED . IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT , INDIRECT , INCIDENTAL , SPECIAL , EXEMPLARY , OR
* CONSEQUENTIAL DAMAGES ( INCLUDING , BUT NOT LIMITED TO , PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE , DATA , OR PROFITS ; OR BUSINESS
* INTERRUPTION ) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY , WHETHER IN
* CONTRACT , STRICT LIABILITY , OR TORT ( INCLUDING NEGLIGENCE OR OTHERWISE )
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE , EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE .
*
* @ package IRI
* @ author Geoffrey Sneddon
* @ author Steve Minutillo
* @ copyright 2007 - 2009 Geoffrey Sneddon and Steve Minutillo
* @ license http :// www . opensource . org / licenses / bsd - license . php
* @ link http :// hg . gsnedders . com / iri /
*
* @ todo Per - scheme validation
*/
class IRI
{
/**
* Scheme
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $scheme = null ;
2011-01-11 19:06:12 +01:00
/**
* User Information
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $iuserinfo = null ;
2011-01-11 19:06:12 +01:00
/**
* ihost
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $ihost = null ;
2011-01-11 19:06:12 +01:00
/**
* Port
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $port = null ;
2011-01-11 19:06:12 +01:00
/**
* ipath
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $ipath = '' ;
2011-01-11 19:06:12 +01:00
/**
* iquery
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $iquery = null ;
2011-01-11 19:06:12 +01:00
/**
* ifragment
*
* @ var string
*/
2011-03-23 23:39:01 +01:00
private $ifragment = null ;
2011-01-11 19:06:12 +01:00
/**
* Normalization database
*
* Each key is the scheme , each value is an array with each key as the IRI
* part and value as the default value for that part .
*/
private $normalization = array (
'acap' => array (
'port' => 674
),
'dict' => array (
'port' => 2628
),
'file' => array (
'ihost' => 'localhost'
),
'http' => array (
'port' => 80 ,
'ipath' => '/'
),
'https' => array (
'port' => 443 ,
'ipath' => '/'
),
);
/**
* Return the entire IRI when you try and read the object as a string
*
* @ return string
*/
public function __toString ()
{
2011-03-23 23:39:01 +01:00
return $this -> get_iri ();
2011-01-11 19:06:12 +01:00
}
/**
* Overload __set () to provide access via properties
*
* @ param string $name Property name
* @ param mixed $value Property value
* @ return void
*/
public function __set ( $name , $value )
{
if ( method_exists ( $this , 'set_' . $name ))
{
call_user_func ( array ( $this , 'set_' . $name ), $value );
}
elseif (
$name === 'iauthority'
|| $name === 'iuserinfo'
|| $name === 'ihost'
|| $name === 'ipath'
|| $name === 'iquery'
|| $name === 'ifragment'
)
{
call_user_func ( array ( $this , 'set_' . substr ( $name , 1 )), $value );
}
}
/**
* Overload __get () to provide access via properties
*
* @ param string $name Property name
* @ return mixed
*/
public function __get ( $name )
{
2011-03-23 23:39:01 +01:00
if (
$name === 'iri' ||
$name === 'uri' ||
$name === 'iauthority' ||
$name === 'authority'
)
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
$return = $this -> { " get_ $name " }();
2011-01-11 19:06:12 +01:00
}
elseif ( isset ( $this -> $name ))
{
$return = $this -> $name ;
}
else
{
trigger_error ( 'Undefined property: ' . get_class ( $this ) . '::' . $name , E_USER_NOTICE );
$return = null ;
}
if ( $return === null && isset ( $this -> normalization [ $this -> scheme ][ $name ]))
{
return $this -> normalization [ $this -> scheme ][ $name ];
}
else
{
return $return ;
}
}
/**
* Overload __isset () to provide access via properties
*
* @ param string $name Property name
* @ return bool
*/
public function __isset ( $name )
{
if ( method_exists ( $this , 'get_' . $name ) || isset ( $this -> $name ))
{
return true ;
}
else
{
return false ;
}
}
/**
* Overload __unset () to provide access via properties
*
* @ param string $name Property name
* @ param mixed $value Property value
* @ return void
*/
public function __unset ( $name )
{
if ( method_exists ( $this , 'set_' . $name ))
{
call_user_func ( array ( $this , 'set_' . $name ), '' );
}
}
/**
* Create a new IRI object , from a specified string
*
* @ param string $iri
* @ return IRI
*/
public function __construct ( $iri = null )
{
$this -> set_iri ( $iri );
}
/**
* Create a new IRI object by resolving a relative IRI
*
* Returns false if $base is not absolute , otherwise an IRI .
*
2011-03-23 23:39:01 +01:00
* @ param IRI | string $base ( Absolute ) Base IRI
2011-01-11 19:06:12 +01:00
* @ param IRI | string $relative Relative IRI
* @ return IRI | false
*/
2011-03-23 23:39:01 +01:00
public static function absolutize ( $base , $relative )
2011-01-11 19:06:12 +01:00
{
if ( ! ( $relative instanceof IRI ))
{
$relative = new IRI ( $relative );
}
2011-03-23 23:39:01 +01:00
if ( ! $relative -> is_valid ())
{
return false ;
}
elseif ( $relative -> scheme !== null )
{
return clone $relative ;
}
else
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
if ( ! ( $base instanceof IRI ))
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
$base = new IRI ( $base );
}
if ( $base -> scheme !== null && $base -> is_valid ())
{
if ( $relative -> get_iri () !== '' )
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
if ( $relative -> iuserinfo !== null || $relative -> ihost !== null || $relative -> port !== null )
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
$target = clone $relative ;
$target -> scheme = $base -> scheme ;
2011-01-11 19:06:12 +01:00
}
else
{
2011-03-23 23:39:01 +01:00
$target = new IRI ;
$target -> scheme = $base -> scheme ;
$target -> iuserinfo = $base -> iuserinfo ;
$target -> ihost = $base -> ihost ;
$target -> port = $base -> port ;
if ( $relative -> ipath !== '' )
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
if ( $relative -> ipath [ 0 ] === '/' )
{
$target -> ipath = $relative -> ipath ;
}
elseif (( $base -> iuserinfo !== null || $base -> ihost !== null || $base -> port !== null ) && $base -> ipath === '' )
{
$target -> ipath = '/' . $relative -> ipath ;
}
elseif (( $last_segment = strrpos ( $base -> ipath , '/' )) !== false )
{
$target -> ipath = substr ( $base -> ipath , 0 , $last_segment + 1 ) . $relative -> ipath ;
}
else
{
$target -> ipath = $relative -> ipath ;
}
$target -> ipath = $target -> remove_dot_segments ( $target -> ipath );
2011-01-11 19:06:12 +01:00
$target -> iquery = $relative -> iquery ;
}
2011-03-23 23:39:01 +01:00
else
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
$target -> ipath = $base -> ipath ;
if ( $relative -> iquery !== null )
{
$target -> iquery = $relative -> iquery ;
}
elseif ( $base -> iquery !== null )
{
$target -> iquery = $base -> iquery ;
}
2011-01-11 19:06:12 +01:00
}
2011-03-23 23:39:01 +01:00
$target -> ifragment = $relative -> ifragment ;
2011-01-11 19:06:12 +01:00
}
}
2011-03-23 23:39:01 +01:00
else
{
$target = clone $base ;
$target -> ifragment = null ;
}
$target -> scheme_normalization ();
return $target ;
2011-01-11 19:06:12 +01:00
}
else
{
2011-03-23 23:39:01 +01:00
return false ;
2011-01-11 19:06:12 +01:00
}
}
}
/**
* Create a new IRI object by creating a relative IRI from two IRIs
*
* @ param IRI $base Base IRI
* @ param IRI $destination Destination IRI
* @ return IRI
*/
public static function build_relative ( IRI $base , IRI $destination )
{
}
/**
* Parse an IRI into scheme / authority / path / query / fragment segments
*
* @ param string $iri
* @ return array
*/
private function parse_iri ( $iri )
{
$iri = trim ( $iri , " \x20 \x09 \x0A \x0C \x0D " );
2011-03-23 23:39:01 +01:00
if ( preg_match ( '/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/' , $iri , $match ))
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
if ( $match [ 1 ] === '' )
2011-01-11 19:06:12 +01:00
{
$match [ 'scheme' ] = null ;
}
if ( ! isset ( $match [ 3 ]) || $match [ 3 ] === '' )
{
$match [ 'authority' ] = null ;
}
2011-03-23 23:39:01 +01:00
if ( ! isset ( $match [ 5 ]))
2011-01-11 19:06:12 +01:00
{
$match [ 'path' ] = '' ;
}
if ( ! isset ( $match [ 6 ]) || $match [ 6 ] === '' )
{
$match [ 'query' ] = null ;
}
if ( ! isset ( $match [ 8 ]) || $match [ 8 ] === '' )
{
$match [ 'fragment' ] = null ;
}
2011-03-23 23:39:01 +01:00
return $match ;
}
else
{
trigger_error ( 'This should never happen' , E_USER_ERROR );
die ;
2011-01-11 19:06:12 +01:00
}
}
/**
* Remove dot segments from a path
*
* @ param string $input
* @ return string
*/
private function remove_dot_segments ( $input )
{
$output = '' ;
while ( strpos ( $input , './' ) !== false || strpos ( $input , '/.' ) !== false || $input === '.' || $input === '..' )
{
// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
if ( strpos ( $input , '../' ) === 0 )
{
$input = substr ( $input , 3 );
}
elseif ( strpos ( $input , './' ) === 0 )
{
$input = substr ( $input , 2 );
}
// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
elseif ( strpos ( $input , '/./' ) === 0 )
{
2011-03-23 23:39:01 +01:00
$input = substr ( $input , 2 );
2011-01-11 19:06:12 +01:00
}
elseif ( $input === '/.' )
{
$input = '/' ;
}
// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
elseif ( strpos ( $input , '/../' ) === 0 )
{
2011-03-23 23:39:01 +01:00
$input = substr ( $input , 3 );
2011-01-11 19:06:12 +01:00
$output = substr_replace ( $output , '' , strrpos ( $output , '/' ));
}
elseif ( $input === '/..' )
{
$input = '/' ;
$output = substr_replace ( $output , '' , strrpos ( $output , '/' ));
}
// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
elseif ( $input === '.' || $input === '..' )
{
$input = '' ;
}
// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
elseif (( $pos = strpos ( $input , '/' , 1 )) !== false )
{
$output .= substr ( $input , 0 , $pos );
$input = substr_replace ( $input , '' , 0 , $pos );
}
else
{
$output .= $input ;
$input = '' ;
}
}
return $output . $input ;
}
/**
* Replace invalid character with percent encoding
*
* @ param string $string Input string
* @ param string $extra_chars Valid characters not in iunreserved or
* iprivate ( this is ASCII - only )
* @ param bool $iprivate Allow iprivate
* @ return string
*/
private function replace_invalid_with_pct_encoding ( $string , $extra_chars , $iprivate = false )
{
// Normalize as many pct-encoded sections as possible
$string = preg_replace_callback ( '/(?:%[A-Fa-f0-9]{2})+/' , array ( & $this , 'remove_iunreserved_percent_encoded' ), $string );
2011-03-23 23:39:01 +01:00
// Replace invalid percent characters
$string = preg_replace ( '/%(?![A-Fa-f0-9]{2})/' , '%25' , $string );
2011-01-11 19:06:12 +01:00
// Add unreserved and % to $extra_chars (the latter is safe because all
// pct-encoded sections are now valid).
$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%' ;
// Now replace any bytes that aren't allowed with their pct-encoded versions
$position = 0 ;
$strlen = strlen ( $string );
while (( $position += strspn ( $string , $extra_chars , $position )) < $strlen )
{
$value = ord ( $string [ $position ]);
// Start position
$start = $position ;
// By default we are valid
$valid = true ;
// No one byte sequences are valid due to the while.
// Two byte sequence:
if (( $value & 0xE0 ) === 0xC0 )
{
$character = ( $value & 0x1F ) << 6 ;
$length = 2 ;
$remaining = 1 ;
}
// Three byte sequence:
elseif (( $value & 0xF0 ) === 0xE0 )
{
$character = ( $value & 0x0F ) << 12 ;
$length = 3 ;
$remaining = 2 ;
}
// Four byte sequence:
elseif (( $value & 0xF8 ) === 0xF0 )
{
$character = ( $value & 0x07 ) << 18 ;
$length = 4 ;
$remaining = 3 ;
}
// Invalid byte:
else
{
$valid = false ;
$length = 1 ;
$remaining = 0 ;
}
if ( $remaining )
{
if ( $position + $length <= $strlen )
{
for ( $position ++ ; $remaining ; $position ++ )
{
$value = ord ( $string [ $position ]);
// Check that the byte is valid, then add it to the character:
if (( $value & 0xC0 ) === 0x80 )
{
$character |= ( $value & 0x3F ) << ( -- $remaining * 6 );
}
// If it is invalid, count the sequence as invalid and reprocess the current byte:
else
{
$valid = false ;
$position -- ;
break ;
}
}
}
else
{
$position = $strlen - 1 ;
$valid = false ;
}
}
// Percent encode anything invalid or not in ucschar
if (
// Invalid sequences
! $valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of ucschar codepoints
// Noncharacters
|| ( $character & 0xFFFE ) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
|| (
// Everything else not in ucschar
$character > 0xD7FF && $character < 0xF900
|| $character < 0xA0
|| $character > 0xEFFFD
)
&& (
// Everything not in iprivate, if it applies
! $iprivate
|| $character < 0xE000
|| $character > 0x10FFFD
)
)
{
// If we were a character, pretend we weren't, but rather an error.
if ( $valid )
$position -- ;
for ( $j = $start ; $j <= $position ; $j ++ )
{
$string = substr_replace ( $string , sprintf ( '%%%02X' , ord ( $string [ $j ])), $j , 1 );
$j += 2 ;
$position += 2 ;
$strlen += 2 ;
}
}
}
return $string ;
}
/**
* Callback function for preg_replace_callback .
*
* Removes sequences of percent encoded bytes that represent UTF - 8
* encoded characters in iunreserved
*
* @ param array $match PCRE match
* @ return string Replacement
*/
private function remove_iunreserved_percent_encoded ( $match )
{
// As we just have valid percent encoded sequences we can just explode
// and ignore the first member of the returned array (an empty string).
$bytes = explode ( '%' , $match [ 0 ]);
// Initialize the new string (this is what will be returned) and that
// there are no bytes remaining in the current sequence (unsurprising
// at the first byte!).
$string = '' ;
$remaining = 0 ;
// Loop over each and every byte, and set $value to its value
for ( $i = 1 , $len = count ( $bytes ); $i < $len ; $i ++ )
{
$value = hexdec ( $bytes [ $i ]);
// If we're the first byte of sequence:
if ( ! $remaining )
{
// Start position
$start = $i ;
// By default we are valid
$valid = true ;
// One byte sequence:
if ( $value <= 0x7F )
{
$character = $value ;
$length = 1 ;
}
// Two byte sequence:
elseif (( $value & 0xE0 ) === 0xC0 )
{
$character = ( $value & 0x1F ) << 6 ;
$length = 2 ;
$remaining = 1 ;
}
// Three byte sequence:
elseif (( $value & 0xF0 ) === 0xE0 )
{
$character = ( $value & 0x0F ) << 12 ;
$length = 3 ;
$remaining = 2 ;
}
// Four byte sequence:
elseif (( $value & 0xF8 ) === 0xF0 )
{
$character = ( $value & 0x07 ) << 18 ;
$length = 4 ;
$remaining = 3 ;
}
// Invalid byte:
else
{
$valid = false ;
$remaining = 0 ;
}
}
// Continuation byte:
else
{
// Check that the byte is valid, then add it to the character:
if (( $value & 0xC0 ) === 0x80 )
{
$remaining -- ;
$character |= ( $value & 0x3F ) << ( $remaining * 6 );
}
// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
else
{
$valid = false ;
$remaining = 0 ;
$i -- ;
}
}
// If we've reached the end of the current byte sequence, append it to Unicode::$data
if ( ! $remaining )
{
// Percent encode anything invalid or not in iunreserved
if (
// Invalid sequences
! $valid
// Non-shortest form sequences are invalid
|| $length > 1 && $character <= 0x7F
|| $length > 2 && $character <= 0x7FF
|| $length > 3 && $character <= 0xFFFF
// Outside of range of iunreserved codepoints
|| $character < 0x2D
|| $character > 0xEFFFD
// Noncharacters
|| ( $character & 0xFFFE ) === 0xFFFE
|| $character >= 0xFDD0 && $character <= 0xFDEF
// Everything else not in iunreserved (this is all BMP)
|| $character === 0x2F
|| $character > 0x39 && $character < 0x41
|| $character > 0x5A && $character < 0x61
|| $character > 0x7A && $character < 0x7E
|| $character > 0x7E && $character < 0xA0
|| $character > 0xD7FF && $character < 0xF900
)
{
for ( $j = $start ; $j <= $i ; $j ++ )
{
$string .= '%' . strtoupper ( $bytes [ $j ]);
}
}
else
{
for ( $j = $start ; $j <= $i ; $j ++ )
{
$string .= chr ( hexdec ( $bytes [ $j ]));
}
}
}
}
// If we have any bytes left over they are invalid (i.e., we are
// mid-way through a multi-byte sequence)
if ( $remaining )
{
for ( $j = $start ; $j < $len ; $j ++ )
{
$string .= '%' . strtoupper ( $bytes [ $j ]);
}
}
return $string ;
}
private function scheme_normalization ()
{
if ( isset ( $this -> normalization [ $this -> scheme ][ 'iuserinfo' ]) && $this -> iuserinfo === $this -> normalization [ $this -> scheme ][ 'iuserinfo' ])
{
$this -> iuserinfo = null ;
}
if ( isset ( $this -> normalization [ $this -> scheme ][ 'ihost' ]) && $this -> ihost === $this -> normalization [ $this -> scheme ][ 'ihost' ])
{
$this -> ihost = null ;
}
if ( isset ( $this -> normalization [ $this -> scheme ][ 'port' ]) && $this -> port === $this -> normalization [ $this -> scheme ][ 'port' ])
{
$this -> port = null ;
}
if ( isset ( $this -> normalization [ $this -> scheme ][ 'ipath' ]) && $this -> ipath === $this -> normalization [ $this -> scheme ][ 'ipath' ])
{
2011-03-23 23:39:01 +01:00
$this -> ipath = '' ;
2011-01-11 19:06:12 +01:00
}
if ( isset ( $this -> normalization [ $this -> scheme ][ 'iquery' ]) && $this -> iquery === $this -> normalization [ $this -> scheme ][ 'iquery' ])
{
$this -> iquery = null ;
}
if ( isset ( $this -> normalization [ $this -> scheme ][ 'ifragment' ]) && $this -> ifragment === $this -> normalization [ $this -> scheme ][ 'ifragment' ])
{
$this -> ifragment = null ;
}
}
/**
* Check if the object represents a valid IRI . This needs to be done on each
* call as some things change depending on another part of the IRI .
*
* @ return bool
*/
public function is_valid ()
{
2011-03-23 23:39:01 +01:00
$isauthority = $this -> iuserinfo !== null || $this -> ihost !== null || $this -> port !== null ;
if ( $this -> ipath !== '' &&
(
$isauthority && (
$this -> ipath [ 0 ] !== '/' ||
substr ( $this -> ipath , 0 , 2 ) === '//'
) ||
(
$this -> scheme === null &&
! $isauthority &&
strpos ( $this -> ipath , ':' ) !== false &&
( strpos ( $this -> ipath , '/' ) === false ? true : strpos ( $this -> ipath , ':' ) < strpos ( $this -> ipath , '/' ))
)
2011-01-11 19:06:12 +01:00
)
)
{
return false ;
}
return true ;
}
/**
* Set the entire IRI . Returns true on success , false on failure ( if there
* are any invalid characters ) .
*
* @ param string $iri
* @ return bool
*/
private function set_iri ( $iri )
2011-03-23 23:39:01 +01:00
{
if ( $iri === null )
{
return true ;
}
else
2011-01-11 19:06:12 +01:00
{
$parsed = $this -> parse_iri (( string ) $iri );
2011-03-23 23:39:01 +01:00
$return = $this -> set_scheme ( $parsed [ 'scheme' ])
2011-01-11 19:06:12 +01:00
&& $this -> set_authority ( $parsed [ 'authority' ])
&& $this -> set_path ( $parsed [ 'path' ])
&& $this -> set_query ( $parsed [ 'query' ])
&& $this -> set_fragment ( $parsed [ 'fragment' ]);
2011-03-23 23:39:01 +01:00
return $return ;
2011-01-11 19:06:12 +01:00
}
}
/**
* Set the scheme . Returns true on success , false on failure ( if there are
* any invalid characters ) .
*
* @ param string $scheme
* @ return bool
*/
private function set_scheme ( $scheme )
{
if ( $scheme === null )
{
$this -> scheme = null ;
}
2011-03-23 23:39:01 +01:00
elseif ( ! preg_match ( '/^[A-Za-z][0-9A-Za-z+\-.]*$/' , $scheme ))
2011-01-11 19:06:12 +01:00
{
$this -> scheme = null ;
return false ;
}
else
{
$this -> scheme = strtolower ( $scheme );
}
return true ;
}
/**
* Set the authority . Returns true on success , false on failure ( if there are
* any invalid characters ) .
*
* @ param string $authority
* @ return bool
*/
private function set_authority ( $authority )
2011-03-23 23:39:01 +01:00
{
if ( $authority === null )
2011-01-11 19:06:12 +01:00
{
2011-03-23 23:39:01 +01:00
$this -> iuserinfo = null ;
$this -> ihost = null ;
$this -> port = null ;
return true ;
2011-01-11 19:06:12 +01:00
}
else
{
2011-03-23 23:39:01 +01:00
$remaining = $authority ;
if (( $iuserinfo_end = strrpos ( $remaining , '@' )) !== false )
{
$iuserinfo = substr ( $remaining , 0 , $iuserinfo_end );
$remaining = substr ( $remaining , $iuserinfo_end + 1 );
}
else
{
$iuserinfo = null ;
}
if (( $port_start = strpos ( $remaining , ':' , strpos ( $remaining , ']' ))) !== false )
{
if (( $port = substr ( $remaining , $port_start + 1 )) === false )
{
$port = null ;
}
$remaining = substr ( $remaining , 0 , $port_start );
}
else
2011-01-11 19:06:12 +01:00
{
$port = null ;
}
2011-03-23 23:39:01 +01:00
$return = $this -> set_userinfo ( $iuserinfo ) &&
$this -> set_host ( $remaining ) &&
$this -> set_port ( $port );
return $return ;
2011-01-11 19:06:12 +01:00
}
}
/**
* Set the iuserinfo .
*
* @ param string $iuserinfo
* @ return bool
*/
private function set_userinfo ( $iuserinfo )
{
if ( $iuserinfo === null )
{
$this -> iuserinfo = null ;
}
else
{
$this -> iuserinfo = $this -> replace_invalid_with_pct_encoding ( $iuserinfo , '!$&\'()*+,;=:' );
$this -> scheme_normalization ();
}
return true ;
}
/**
* Set the ihost . Returns true on success , false on failure ( if there are
* any invalid characters ) .
*
* @ param string $ihost
* @ return bool
*/
private function set_host ( $ihost )
{
if ( $ihost === null )
{
$this -> ihost = null ;
return true ;
}
elseif ( substr ( $ihost , 0 , 1 ) === '[' && substr ( $ihost , - 1 ) === ']' )
{
if ( Net_IPv6 :: check_ipv6 ( substr ( $ihost , 1 , - 1 )))
{
$this -> ihost = '[' . Net_IPv6 :: compress ( substr ( $ihost , 1 , - 1 )) . ']' ;
}
else
{
$this -> ihost = null ;
return false ;
}
}
else
{
$ihost = $this -> replace_invalid_with_pct_encoding ( $ihost , '!$&\'()*+,;=' );
// Lowercase, but ignore pct-encoded sections (as they should
// remain uppercase). This must be done after the previous step
// as that can add unescaped characters.
$position = 0 ;
$strlen = strlen ( $ihost );
while (( $position += strcspn ( $ihost , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%' , $position )) < $strlen )
{
if ( $ihost [ $position ] === '%' )
{
$position += 3 ;
}
else
{
$ihost [ $position ] = strtolower ( $ihost [ $position ]);
$position ++ ;
}
}
$this -> ihost = $ihost ;
}
$this -> scheme_normalization ();
return true ;
}
/**
* Set the port . Returns true on success , false on failure ( if there are
* any invalid characters ) .
*
* @ param string $port
* @ return bool
*/
private function set_port ( $port )
{
if ( $port === null )
{
$this -> port = null ;
return true ;
}
elseif ( strspn ( $port , '0123456789' ) === strlen ( $port ))
{
$this -> port = ( int ) $port ;
$this -> scheme_normalization ();
return true ;
}
else
{
$this -> port = null ;
return false ;
}
}
/**
* Set the ipath .
*
* @ param string $ipath
* @ return bool
*/
private function set_path ( $ipath )
2011-03-23 23:39:01 +01:00
{
$ipath = ( string ) $ipath ;
$valid = $this -> replace_invalid_with_pct_encoding ( $ipath , '!$&\'()*+,;=@:/' );
$removed = $this -> remove_dot_segments ( $valid );
$this -> ipath = ( $this -> scheme !== null ) ? $removed : $valid ;
$this -> scheme_normalization ();
return true ;
2011-01-11 19:06:12 +01:00
}
/**
* Set the iquery .
*
* @ param string $iquery
* @ return bool
*/
private function set_query ( $iquery )
{
if ( $iquery === null )
{
$this -> iquery = null ;
}
else
{
$this -> iquery = $this -> replace_invalid_with_pct_encoding ( $iquery , '!$&\'()*+,;=:@/?' , true );
$this -> scheme_normalization ();
}
return true ;
}
/**
* Set the ifragment .
*
* @ param string $ifragment
* @ return bool
*/
private function set_fragment ( $ifragment )
{
if ( $ifragment === null )
{
$this -> ifragment = null ;
}
else
{
$this -> ifragment = $this -> replace_invalid_with_pct_encoding ( $ifragment , '!$&\'()*+,;=:@/?' );
$this -> scheme_normalization ();
}
return true ;
}
/**
* Convert an IRI to a URI ( or parts thereof )
*
* @ return string
*/
private function to_uri ( $string )
{
2011-03-23 23:39:01 +01:00
static $non_ascii ;
if ( ! $non_ascii )
{
$non_ascii = implode ( '' , range ( " \x80 " , " \xFF " ));
}
2011-01-11 19:06:12 +01:00
$position = 0 ;
$strlen = strlen ( $string );
while (( $position += strcspn ( $string , $non_ascii , $position )) < $strlen )
{
$string = substr_replace ( $string , sprintf ( '%%%02X' , ord ( $string [ $position ])), $position , 1 );
$position += 3 ;
$strlen += 2 ;
}
return $string ;
}
/**
* Get the complete IRI
*
* @ return string
*/
private function get_iri ()
{
2011-03-23 23:39:01 +01:00
if ( ! $this -> is_valid ())
{
return false ;
}
2011-01-11 19:06:12 +01:00
$iri = '' ;
if ( $this -> scheme !== null )
{
$iri .= $this -> scheme . ':' ;
}
2011-03-23 23:39:01 +01:00
if (( $iauthority = $this -> get_iauthority ()) !== null )
2011-01-11 19:06:12 +01:00
{
$iri .= '//' . $iauthority ;
}
2011-03-23 23:39:01 +01:00
$iri .= $this -> ipath ;
2011-01-11 19:06:12 +01:00
if ( $this -> iquery !== null )
{
$iri .= '?' . $this -> iquery ;
}
if ( $this -> ifragment !== null )
{
$iri .= '#' . $this -> ifragment ;
}
2011-03-23 23:39:01 +01:00
return $iri ;
2011-01-11 19:06:12 +01:00
}
/**
* Get the complete URI
*
* @ return string
*/
private function get_uri ()
{
2011-03-23 23:39:01 +01:00
return $this -> to_uri ( $this -> get_iri ());
2011-01-11 19:06:12 +01:00
}
/**
* Get the complete iauthority
*
* @ return string
*/
private function get_iauthority ()
{
if ( $this -> iuserinfo !== null || $this -> ihost !== null || $this -> port !== null )
{
2011-03-23 23:39:01 +01:00
$iauthority = '' ;
if ( $this -> iuserinfo !== null )
{
$iauthority .= $this -> iuserinfo . '@' ;
}
if ( $this -> ihost !== null )
{
$iauthority .= $this -> ihost ;
}
if ( $this -> port !== null )
{
$iauthority .= ':' . $this -> port ;
}
2011-01-11 19:06:12 +01:00
return $iauthority ;
}
else
{
return null ;
}
}
/**
* Get the complete authority
*
* @ return string
*/
private function get_authority ()
{
2011-03-23 23:39:01 +01:00
$iauthority = $this -> get_iauthority ();
2011-01-11 19:06:12 +01:00
if ( is_string ( $iauthority ))
return $this -> to_uri ( $iauthority );
else
return $iauthority ;
}
2011-03-23 23:39:01 +01:00
}