diff options
| -rw-r--r-- | phpBB/develop/regex_idn.php | 151 | ||||
| -rw-r--r-- | phpBB/includes/functions.php | 26 | ||||
| -rw-r--r-- | phpBB/includes/message_parser.php | 2 | ||||
| -rw-r--r-- | phpBB/phpbb/profilefields/type/type_url.php | 2 | ||||
| -rw-r--r-- | tests/functions/make_clickable_test.php | 84 | ||||
| -rw-r--r-- | tests/profilefields/type_url_test.php | 40 | ||||
| -rw-r--r-- | tests/regex/url_test.php | 2 | 
7 files changed, 294 insertions, 13 deletions
diff --git a/phpBB/develop/regex_idn.php b/phpBB/develop/regex_idn.php new file mode 100644 index 0000000000..d871695c50 --- /dev/null +++ b/phpBB/develop/regex_idn.php @@ -0,0 +1,151 @@ +<?php +// +// Security message: +// +// This script is potentially dangerous. +// Remove or comment the next line (die(".... ) to enable this script. +// Do NOT FORGET to either remove this script or disable it after you have used it. +// +die("Please read the first lines of this script for instructions on how to enable it"); + +// IP regular expressions + +$dec_octet = '(?:\d{1,2}|1\d\d|2[0-4]\d|25[0-5])'; +$h16 = '[\dA-F]{1,4}'; +$ipv4 = "(?:$dec_octet\.){3}$dec_octet"; +$ls32 = "(?:$h16:$h16|$ipv4)"; + +$ipv6_construct = array( +	array(false,	'',		'{6}',	$ls32), +	array(false,	'::',	'{0,5}', "(?:$h16(?::$h16)?|$ipv4)"), +	array('',		':',	'{4}',	$ls32), +	array('{1,2}',	':',	'{3}',	$ls32), +	array('{1,3}',	':',	'{2}',	$ls32), +	array('{1,4}',	':',	'',		$ls32), +	array('{1,5}',	':',	false,	$ls32), +	array('{1,6}',	':',	false,	$h16), +	array('{1,7}',	':',	false,	''), +	array(false, '::', false, '') +); + +$ipv6 = '(?:'; +foreach ($ipv6_construct as $ip_type) +{ +	$ipv6 .= '(?:'; +	if ($ip_type[0] !== false) +	{ +		$ipv6 .= "(?:$h16:)" . $ip_type[0]; +	} +	$ipv6 .= $ip_type[1]; +	if ($ip_type[2] !== false) +	{ +		$ipv6 .= "(?:$h16:)" . $ip_type[2]; +	} +	$ipv6 .= $ip_type[3] . ')|'; +} +$ipv6 = substr($ipv6, 0, -1) . ')'; + +echo 'IPv4: ' . $ipv4 . "<br /><br />\n\nIPv6: " . $ipv6 . "<br /><br />\n\n"; + +// URL regular expressions + +/* IDN2008 characters derivation +** http://unicode.org/faq/idn.html#33	- IDN FAQ: derivation of valid characters in terms of Unicode properties +** http://unicode.org/reports/tr46/		- Unicode Technical Standard #46. Unicode IDNA Compatibility Processing +** http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt	- Unicode Character Database +*/ +/* +** Remove Control Characters and Whitespace (as in IDNA2003) +*/ +$no_cc = '\p{C}\p{Z}'; +/* +** Remove Symbols, Punctuation, non-decimal Numbers, and Enclosing Marks +*/ +$no_symbol = '\p{S}\p{P}\p{Nl}\p{No}\p{Me}'; +/* +** Remove characters used for archaic Hangul (Korean) - \p{HST=L} and \p{HST=V} +** as per http://unicode.org/Public/UNIDATA/HangulSyllableType.txt +*/ +$no_hangul = '\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}'; +/* +** Remove three blocks of technical or archaic symbols. +*/ +$no_cdm = '\x{20D0}-\x{20FF}';						// \p{block=Combining_Diacritical_Marks_For_Symbols} +$no_musical = '\x{1D100}-\x{1D1FF}';					// \p{block=Musical_Symbols} +$no_ancient_greek_musical = '\x{1D200}-\x{1D24F}';	// \p{block=Ancient_Greek_Musical_Notation}	 +/* Remove certain exceptions: +** U+0640 ARABIC TATWEEL +** U+07FA NKO LAJANYALAN +** U+302E HANGUL SINGLE DOT TONE MARK +** U+302F HANGUL DOUBLE DOT TONE MARK +** U+3031 VERTICAL KANA REPEAT MARK +** U+3032 VERTICAL KANA REPEAT WITH VOICED SOUND MARK +** .. +** U+3035 VERTICAL KANA REPEAT MARK LOWER HALF +** U+303B VERTICAL IDEOGRAPHIC ITERATION MARK +*/ +$no_certain_exceptions = '\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}'; +/* Add certain exceptions: +** U+00B7 MIDDLE DOT +** U+0375 GREEK LOWER NUMERAL SIGN +** U+05F3 HEBREW PUNCTUATION GERESH +** U+05F4 HEBREW PUNCTUATION GERSHAYIM +** U+30FB KATAKANA MIDDLE DOT +** U+002D HYPHEN-MINUS +** U+06FD ARABIC SIGN SINDHI AMPERSAND +** U+06FE ARABIC SIGN SINDHI POSTPOSITION MEN +** U+0F0B TIBETAN MARK INTERSYLLABIC TSHEG +** U+3007 IDEOGRAPHIC NUMBER ZERO +*/ +$add_certain_exceptions = '\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}'; +/* Add special exceptions (Deviations): +** U+00DF LATIN SMALL LETTER SHARP S +** U+03C2 GREEK SMALL LETTER FINAL SIGMA +** U+200C ZERO WIDTH NON-JOINER +** U+200D ZERO WIDTH JOINER +*/ +$add_deviations = '\x{00DF}\x{03C2}\x{200C}\x{200D}'; + +// Concatenate remove/add regexes respectively +$remove_chars = "$no_cc$no_symbol$no_hangul$no_cdm$no_musical$no_ancient_greek_musical$no_certain_exceptions"; +$add_chars = "$add_certain_exceptions$add_deviations"; + +// Initialize inline mode +$inline = false; + +do +{ +	$inline = !$inline; + +	$pct_encoded = "%[\dA-F]{2}"; +	$unreserved = "$add_chars\pL0-9\-._~"; +	$sub_delims = ($inline) ? '!$&\'(*+,;=' : '!$&\'()*+,;='; +	$scheme = ($inline) ? '[a-z][a-z\d+]*': '[a-z][a-z\d+\-.]*' ; // avoid automatic parsing of "word" in "last word.http://..." +	$pchar = "(?:[^$remove_chars]*[$unreserved$sub_delims:@|]+|$pct_encoded)"; // rfc: no "|" + +	$reg_name = "(?:[^$remove_chars]*[$unreserved$sub_delims:@|]+|$pct_encoded)+"; // rfc: * instead of + and no "|" and no "@" and no ":" (included instead of userinfo) +	//$userinfo = "(?:(?:[$unreserved$sub_delims:]+|$pct_encoded))*"; +	$ipv4_simple = '[0-9.]+'; +	$ipv6_simple = '\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\]'; +	$host = "(?:$reg_name|$ipv4_simple|$ipv6_simple)"; +	$port = '\d*'; +	//$authority = "(?:$userinfo@)?$host(?::$port)?"; +	$authority = "$host(?::$port)?"; +	$segment = "$pchar*"; +	$path_abempty = "(?:/$segment)*"; +	$hier_part = "/{2}$authority$path_abempty"; +	$query = "(?:[^$remove_chars]*[$unreserved$sub_delims:@/?|]+|$pct_encoded)*"; // pchar | "/" | "?", rfc: no "|" +	$fragment = $query; + +	$url =  "$scheme:$hier_part(?:\?$query)?(?:\#$fragment)?"; +	echo (($inline) ? 'URL inline: ' : 'URL: ') . $url . "<br /><br />\n\n"; + +	// no scheme, shortened authority, but host has to start with www. +	$www_url =  "www\.$reg_name(?::$port)?$path_abempty(?:\?$query)?(?:\#$fragment)?"; +	echo (($inline) ? 'www.URL_inline: ' : 'www.URL: ') . $www_url . "<br /><br />\n\n"; + +	// no schema and no authority +	$relative_url = "$segment$path_abempty(?:\?$query)?(?:\#$fragment)?"; +	echo (($inline) ? 'relative URL inline: ' : 'relative URL: ') . $relative_url . "<br /><br />\n\n"; +} +while ($inline); diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index a016f8e62a..321394639b 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3340,23 +3340,33 @@ function get_preg_expression($mode)  		break;  		case 'url': +			// generated with regex_idn.php file in the develop folder +			return "[a-z][a-z\d+\-.]*:/{2}(?:(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})+|[0-9.]+|\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\])(?::\d*)?(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +		break; +  		case 'url_inline': -			$inline = ($mode == 'url') ? ')' : ''; -			$scheme = ($mode == 'url') ? '[a-z\d+\-.]' : '[a-z\d+]'; // avoid automatic parsing of "word" in "last word.http://..." -			// generated with regex generation file in the develop folder -			return "[a-z]$scheme*:/{2}(?:(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})+|[0-9.]+|\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\])(?::\d*)?(?:/(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +			// generated with regex_idn.php file in the develop folder +			return "[a-z][a-z\d+]*:/{2}(?:(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})+|[0-9.]+|\[[a-z0-9.]+:[a-z0-9.]+:[a-z0-9.:]+\])(?::\d*)?(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?";  		break;  		case 'www_url': +			// generated with regex_idn.php file in the develop folder +			return "www\.(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})+(?::\d*)?(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +		break; +  		case 'www_url_inline': -			$inline = ($mode == 'www_url') ? ')' : ''; -			return "www\.(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})+(?::\d*)?(?:/(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +			// generated with regex_idn.php file in the develop folder +			return "www\.(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})+(?::\d*)?(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?";  		break;  		case 'relative_url': +			// generated with regex_idn.php file in the develop folder +			return "(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})*(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'()*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +		break; +  		case 'relative_url_inline': -			$inline = ($mode == 'relative_url') ? ')' : ''; -			return "(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})*(?:/(?:[a-z0-9\-._~!$&'($inline*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[a-z0-9\-._~!$&'($inline*+,;=:@/?|]+|%[\dA-F]{2})*)?"; +			// generated with regex_idn.php file in the develop folder +			return "(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})*(?:/(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@|]+|%[\dA-F]{2})*)*(?:\?(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?(?:\#(?:[^\p{C}\p{Z}\p{S}\p{P}\p{Nl}\p{No}\p{Me}\x{1100}-\x{115F}\x{A960}-\x{A97C}\x{1160}-\x{11A7}\x{D7B0}-\x{D7C6}\x{20D0}-\x{20FF}\x{1D100}-\x{1D1FF}\x{1D200}-\x{1D24F}\x{0640}\x{07FA}\x{302E}\x{302F}\x{3031}-\x{3035}\x{303B}]*[\x{00B7}\x{0375}\x{05F3}\x{05F4}\x{30FB}\x{002D}\x{06FD}\x{06FE}\x{0F0B}\x{3007}\x{00DF}\x{03C2}\x{200C}\x{200D}\pL0-9\-._~!$&'(*+,;=:@/?|]+|%[\dA-F]{2})*)?";  		break;  		case 'table_prefix': diff --git a/phpBB/includes/message_parser.php b/phpBB/includes/message_parser.php index 9a67473cf9..04a2726d22 100644 --- a/phpBB/includes/message_parser.php +++ b/phpBB/includes/message_parser.php @@ -313,7 +313,7 @@ class bbcode_firstpass extends bbcode  		$in = str_replace(' ', '%20', $in);  		// Checking urls -		if (!preg_match('#^' . get_preg_expression('url') . '$#i', $in) && !preg_match('#^' . get_preg_expression('www_url') . '$#iu', $in)) +		if (!preg_match('#^' . get_preg_expression('url') . '$#iu', $in) && !preg_match('#^' . get_preg_expression('www_url') . '$#iu', $in))  		{  			return '[img]' . $in . '[/img]';  		} diff --git a/phpBB/phpbb/profilefields/type/type_url.php b/phpBB/phpbb/profilefields/type/type_url.php index bc8ac869d0..fe0bffd582 100644 --- a/phpBB/phpbb/profilefields/type/type_url.php +++ b/phpBB/phpbb/profilefields/type/type_url.php @@ -64,7 +64,7 @@ class type_url extends type_string  			return false;  		} -		if (!preg_match('#^' . get_preg_expression('url') . '$#i', $field_value)) +		if (!preg_match('#^' . get_preg_expression('url') . '$#iu', $field_value))  		{  			return $this->user->lang('FIELD_INVALID_URL', $this->get_field_name($field_data['lang_name']));  		} diff --git a/tests/functions/make_clickable_test.php b/tests/functions/make_clickable_test.php index e61cb2c30e..63beeb06b2 100644 --- a/tests/functions/make_clickable_test.php +++ b/tests/functions/make_clickable_test.php @@ -74,13 +74,77 @@ class phpbb_functions_make_clickable_test extends phpbb_test_case  				'http://www.phpbb.com/community/path/to/long/url/file.ext#section',  				'<!-- m --><a class="postlink" href="http://www.phpbb.com/community/path/to/long/url/file.ext#section">http://www.phpbb.com/community/path/to/ ... xt#section</a><!-- m -->'  			), +		); +	} -			// IDN is not parsed and returned as is -			array('http://домен.рф', 'http://домен.рф'), +	public function data_test_make_clickable_url_idn() +	{ +		return array( +			array( +				'http://www.täst.de/community/', +				'<!-- m --><a class="postlink" href="http://www.täst.de/community/">http://www.täst.de/community/</a><!-- m -->' +			), +			array( +				'http://www.täst.de/path/file.ext#section', +				'<!-- m --><a class="postlink" href="http://www.täst.de/path/file.ext#section">http://www.täst.de/path/file.ext#section</a><!-- m -->' +			), +			array( +				'ftp://ftp.täst.de/', +				'<!-- m --><a class="postlink" href="ftp://ftp.täst.de/">ftp://ftp.täst.de/</a><!-- m -->' +			), +			array( +				'sip://bantu@täst.de', +				'<!-- m --><a class="postlink" href="sip://bantu@täst.de">sip://bantu@täst.de</a><!-- m -->' +			), +			array( +				'www.täst.de/community/', +				'<!-- w --><a class="postlink" href="http://www.täst.de/community/">www.täst.de/community/</a><!-- w -->' +			), +			// Test appending punctuation mark to the URL +			array( +				'http://домен.рф/viewtopic.php?t=1!', +				'<!-- m --><a class="postlink" href="http://домен.рф/viewtopic.php?t=1">http://домен.рф/viewtopic.php?t=1</a><!-- m -->!' +			), +			array( +				'www.домен.рф/сообщество/?', +				'<!-- w --><a class="postlink" href="http://www.домен.рф/сообщество/">www.домен.рф/сообщество/</a><!-- w -->?' +			), +			// Test shortened text for URL > 55 characters long +			// URL text should be turned into: first 39 chars + ' ... ' + last 10 chars +			array( +				'http://www.домен.рф/сообщество/путь/по/длинной/ссылке/file.ext#section', +				'<!-- m --><a class="postlink" href="http://www.домен.рф/сообщество/путь/по/длинной/ссылке/file.ext#section">http://www.домен.рф/сообщество/путь/по/ ... xt#section</a><!-- m -->' +			), + +			// IDN with invalid characters shouldn't be parsed correctly (only 'valid' part) +			array( +				'http://www.täst╫.de', +				'<!-- m --><a class="postlink" href="http://www.täst">http://www.täst</a><!-- m -->╫.de' +			), +			// IDN in emails is unsupported yet  			array('почта@домен.рф', 'почта@домен.рф'),  		);  	} +	public function data_test_make_clickable_local_url_idn() +	{ +		return array( +			array( +				'http://www.домен.рф/viewtopic.php?t=1', +				'<!-- l --><a class="postlink-local" href="http://www.домен.рф/viewtopic.php?t=1">viewtopic.php?t=1</a><!-- l -->' +			), +			// Test appending punctuation mark to the URL +			array( +				'http://www.домен.рф/viewtopic.php?t=1!', +				'<!-- l --><a class="postlink-local" href="http://www.домен.рф/viewtopic.php?t=1">viewtopic.php?t=1</a><!-- l -->!' +			), +			array( +				'http://www.домен.рф/сообщество/?', +				'<!-- l --><a class="postlink-local" href="http://www.домен.рф/сообщество/">сообщество/</a><!-- l -->?' +			), +		); +	} +  	protected function setUp()  	{  		parent::setUp(); @@ -97,4 +161,20 @@ class phpbb_functions_make_clickable_test extends phpbb_test_case  	{  		$this->assertSame($expected, make_clickable($url));  	} + +	/** +	 * @dataProvider data_test_make_clickable_url_idn +	 */ +	public function test_urls_matching_idn($url, $expected) +	{ +		$this->assertSame($expected, make_clickable($url)); +	} + +	/** +	 * @dataProvider data_test_make_clickable_local_url_idn +	 */ +	public function test_local_urls_matching_idn($url, $expected) +	{ +		$this->assertSame($expected, make_clickable($url, "http://www.домен.рф")); +	}  } diff --git a/tests/profilefields/type_url_test.php b/tests/profilefields/type_url_test.php index 372c07418f..cc37f04f30 100644 --- a/tests/profilefields/type_url_test.php +++ b/tests/profilefields/type_url_test.php @@ -89,6 +89,32 @@ class phpbb_profilefield_type_url_test extends phpbb_test_case  				'FIELD_INVALID_URL-field',  				'Field should reject invalid URL having multi value parameters',  			), + +			// IDN url type profilefields +			array( +				'http://www.täst.de', +				array(), +				false, +				'Field should accept valid IDN', +			), +			array( +				'http://täst.de/index.html?param1=test¶m2=awesome', +				array(), +				false, +				'Field should accept valid IDN URL with params', +			), +			array( +				'http://домен.рф/index.html/тест/path?document=get', +				array(), +				false, +				'Field should accept valid IDN URL', +			), +			array( +				'http://домен.рф/index.html/тест/path?document[]=DocType%20test&document[]=AnotherDoc', +				array(), +				'FIELD_INVALID_URL-field', +				'Field should reject invalid IDN URL having multi value parameters', +			),  		);  	} @@ -119,6 +145,20 @@ class phpbb_profilefield_type_url_test extends phpbb_test_case  				'http://example.com',  				'Field should return correct raw value',  			), + +			// IDN tests +			array( +				'http://täst.de', +				array('field_show_novalue' => true), +				'http://täst.de', +				'Field should return the correct raw value', +			), +			array( +				'http://домен.рф', +				array('field_show_novalue' => false), +				'http://домен.рф', +				'Field should return correct raw value', +			),  		);  	} diff --git a/tests/regex/url_test.php b/tests/regex/url_test.php index d3487a9c16..e5d7c3256a 100644 --- a/tests/regex/url_test.php +++ b/tests/regex/url_test.php @@ -32,6 +32,6 @@ class phpbb_regex_url_test extends phpbb_test_case  	*/  	public function test_url($url, $expected)  	{ -		$this->assertEquals($expected, preg_match('#^' . get_preg_expression('url') . '$#i', $url)); +		$this->assertEquals($expected, preg_match('#^' . get_preg_expression('url') . '$#iu', $url));  	}  }  | 
