aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Wiedler <igor@wiedler.ch>2011-01-16 20:28:06 +0100
committerIgor Wiedler <igor@wiedler.ch>2011-01-16 20:37:39 +0100
commit656f18d3af1168b15648e77005345698c3a507b5 (patch)
treefa63c865a72a3570c1ad04fb1aa85f03a88e73a3
parent78df30f7b76bf3fb0852c70b4e90e1acddc78b05 (diff)
parente9c584af6aee4d8cec81ad1f96066860ea878644 (diff)
downloadforums-656f18d3af1168b15648e77005345698c3a507b5.tar
forums-656f18d3af1168b15648e77005345698c3a507b5.tar.gz
forums-656f18d3af1168b15648e77005345698c3a507b5.tar.bz2
forums-656f18d3af1168b15648e77005345698c3a507b5.tar.xz
forums-656f18d3af1168b15648e77005345698c3a507b5.zip
Merge branch 'develop-olympus' into develop
* develop-olympus: [ticket/9933] Remove empty word check. [ticket/9933] Add $use_unicode parameter to get_censor_preg_expression(). [ticket/9933] Adjust word censor regex for non-unicode mode. Conflicts: phpBB/includes/functions.php Also remove static $unicode_support.
-rw-r--r--phpBB/includes/functions.php30
-rw-r--r--tests/regex/censor_test.php14
2 files changed, 23 insertions, 21 deletions
diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php
index 12e4ebc597..cbd39891c9 100644
--- a/phpBB/includes/functions.php
+++ b/phpBB/includes/functions.php
@@ -3245,30 +3245,18 @@ function get_preg_expression($mode)
* Generate regexp for naughty words censoring
* Depends on whether installed PHP version supports unicode properties
*
-* @param string $word word template to be replaced
+* @param string $word word template to be replaced
+* @param bool $use_unicode whether or not to take advantage of PCRE supporting unicode
*
* @return string $preg_expr regex to use with word censor
*/
-function get_censor_preg_expression($word)
+function get_censor_preg_expression($word, $use_unicode = true)
{
- static $unicode = null;
+ // Unescape the asterisk to simplify further conversions
+ $word = str_replace('\*', '*', preg_quote($word, '#'));
- if (empty($word))
+ if ($use_unicode && pcre_utf8_support())
{
- return '';
- }
-
- // Check whether PHP version supports unicode properties
- if (is_null($unicode))
- {
- $unicode = pcre_utf8_support();
- }
-
- if ($unicode)
- {
- // Unescape the asterisk to simplify further conversions
- $word = str_replace('\*', '*', preg_quote($word, '#'));
-
// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes
$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word);
@@ -3277,7 +3265,11 @@ function get_censor_preg_expression($word)
}
else
{
- $preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu';
+ // Replace the asterisk inside the pattern, at the start and at the end of it with regexes
+ $word = preg_replace(array('#(?<=\S)\*+(?=\S)#iu', '#^\*+#', '#\*+$#'), array('(\x20*?\S*?)', '\S*?', '\S*?'), $word);
+
+ // Generate the final substitution
+ $preg_expr = '#(?<!\S)(' . $word . ')(?!\S)#iu';
}
return $preg_expr;
diff --git a/tests/regex/censor_test.php b/tests/regex/censor_test.php
index ae2d86e07e..93c761c8d0 100644
--- a/tests/regex/censor_test.php
+++ b/tests/regex/censor_test.php
@@ -31,9 +31,19 @@ class phpbb_regex_censor_test extends phpbb_test_case
/**
* @dataProvider censor_test_data
*/
- public function test_censor($pattern, $subject)
+ public function test_censor_unicode($pattern, $subject)
{
- $regex = get_censor_preg_expression($pattern);
+ $regex = get_censor_preg_expression($pattern, true);
+
+ $this->assertRegExp($regex, $subject);
+ }
+
+ /**
+ * @dataProvider censor_test_data
+ */
+ public function test_censor_no_unicode($pattern, $subject)
+ {
+ $regex = get_censor_preg_expression($pattern, false);
$this->assertRegExp($regex, $subject);
}