aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--phpBB/includes/functions.php30
-rw-r--r--tests/regex/censor_test.php14
2 files changed, 27 insertions, 17 deletions
diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php
index 5def593bd6..65b9f22d12 100644
--- a/phpBB/includes/functions.php
+++ b/phpBB/includes/functions.php
@@ -3432,30 +3432,26 @@ function get_preg_expression($mode)
* Generate regexp for naughty words censoring
* Depends on whether installed PHP version supports unicode properties
*
-* @param string $word word template to be replaced
+* @param string $word word template to be replaced
+* @param bool $use_unicode whether or not to take advantage of PCRE supporting unicode
*
* @return string $preg_expr regex to use with word censor
*/
-function get_censor_preg_expression($word)
+function get_censor_preg_expression($word, $use_unicode = true)
{
- static $unicode = null;
-
- if (empty($word))
- {
- return '';
- }
+ static $unicode_support = null;
// Check whether PHP version supports unicode properties
- if (is_null($unicode))
+ if (is_null($unicode_support))
{
- $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
+ $unicode_support = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
}
- if ($unicode)
- {
- // Unescape the asterisk to simplify further conversions
- $word = str_replace('\*', '*', preg_quote($word, '#'));
+ // Unescape the asterisk to simplify further conversions
+ $word = str_replace('\*', '*', preg_quote($word, '#'));
+ if ($use_unicode && $unicode_support)
+ {
// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes
$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word);
@@ -3464,7 +3460,11 @@ function get_censor_preg_expression($word)
}
else
{
- $preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu';
+ // Replace the asterisk inside the pattern, at the start and at the end of it with regexes
+ $word = preg_replace(array('#(?<=\S)\*+(?=\S)#iu', '#^\*+#', '#\*+$#'), array('(\x20*?\S*?)', '\S*?', '\S*?'), $word);
+
+ // Generate the final substitution
+ $preg_expr = '#(?<!\S)(' . $word . ')(?!\S)#iu';
}
return $preg_expr;
diff --git a/tests/regex/censor_test.php b/tests/regex/censor_test.php
index ae2d86e07e..93c761c8d0 100644
--- a/tests/regex/censor_test.php
+++ b/tests/regex/censor_test.php
@@ -31,9 +31,19 @@ class phpbb_regex_censor_test extends phpbb_test_case
/**
* @dataProvider censor_test_data
*/
- public function test_censor($pattern, $subject)
+ public function test_censor_unicode($pattern, $subject)
{
- $regex = get_censor_preg_expression($pattern);
+ $regex = get_censor_preg_expression($pattern, true);
+
+ $this->assertRegExp($regex, $subject);
+ }
+
+ /**
+ * @dataProvider censor_test_data
+ */
+ public function test_censor_no_unicode($pattern, $subject)
+ {
+ $regex = get_censor_preg_expression($pattern, false);
$this->assertRegExp($regex, $subject);
}