diff options
author | Andreas Fischer <bantu@phpbb.com> | 2011-01-07 15:41:12 +0100 |
---|---|---|
committer | Andreas Fischer <bantu@phpbb.com> | 2011-01-07 15:41:12 +0100 |
commit | a469e804930e66c244da27deb4dc94b5e280659f (patch) | |
tree | 3bab2abf053b81af81aab73d97c39d7f622f5130 | |
parent | 7a2348bf7f0ee67856cc5ae15591b7d1b7c48763 (diff) | |
parent | 95cf47c4ef41b1ad7c5dc27eb9a078769851acf8 (diff) | |
download | forums-a469e804930e66c244da27deb4dc94b5e280659f.tar forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.gz forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.bz2 forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.xz forums-a469e804930e66c244da27deb4dc94b5e280659f.zip |
Merge branch 'ticket/rxu/9933' into develop-olympus
* ticket/rxu/9933:
[ticket/9933] Create unit test for word censor regular expression.
[ticket/9933] Move word censor regex into separate function in functions.php
[ticket/9933] Wrong handling consecutive multiple asterisks in word censor
-rw-r--r-- | phpBB/includes/acp/acp_words.php | 3 | ||||
-rw-r--r-- | phpBB/includes/cache.php | 19 | ||||
-rw-r--r-- | phpBB/includes/functions.php | 42 | ||||
-rw-r--r-- | tests/regex/censor.php | 40 |
4 files changed, 86 insertions, 18 deletions
diff --git a/phpBB/includes/acp/acp_words.php b/phpBB/includes/acp/acp_words.php index 1cb9545967..88c5bbe592 100644 --- a/phpBB/includes/acp/acp_words.php +++ b/phpBB/includes/acp/acp_words.php @@ -95,6 +95,9 @@ class acp_words trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING); } + // Replace multiple consecutive asterisks with single one as those are not needed + $word = preg_replace('#\*{2,}#', '*', $word); + $sql_ary = array( 'word' => $word, 'replacement' => $replacement diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php index b50fab4ca2..612adcca4f 100644 --- a/phpBB/includes/cache.php +++ b/phpBB/includes/cache.php @@ -82,26 +82,9 @@ class cache extends acm $result = $db->sql_query($sql); $censors = array(); - $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; - while ($row = $db->sql_fetchrow($result)) { - if ($unicode) - { - // Unescape the asterisk to simplify further conversions - $row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); - - // Replace the asterisk inside the pattern, at the start and at the end of it with regexes - $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); - - // Generate the final substitution - $censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu'; - } - else - { - $censors['match'][] = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($row['word'], '#')) . ')(?!\S)#iu'; - } - + $censors['match'][] = get_censor_preg_expression($row['word']); $censors['replace'][] = $row['replacement']; } $db->sql_freeresult($result); diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index c7f19b709d..69be1627cf 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3429,6 +3429,48 @@ function get_preg_expression($mode) } /** +* Generate regexp for naughty words censoring +* Depends on whether installed PHP version supports unicode properties +* +* @param string $word word template to be replaced +* +* @return string $preg_expr regex to use with word censor +*/ +function get_censor_preg_expression($word) +{ + static $unicode = null; + + if (empty($word)) + { + return ''; + } + + // Check whether PHP version supports unicode properties + if (is_null($unicode)) + { + $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; + } + + if ($unicode) + { + // Unescape the asterisk to simplify further conversions + $word = str_replace('\*', '*', preg_quote($word, '#')); + + // Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes + $word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); + + // Generate the final substitution + $preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu'; + } + else + { + $preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu'; + } + + return $preg_expr; +} + +/** * Returns the first block of the specified IPv6 address and as many additional * ones as specified in the length paramater. * If length is zero, then an empty string is returned. diff --git a/tests/regex/censor.php b/tests/regex/censor.php new file mode 100644 index 0000000000..ae2d86e07e --- /dev/null +++ b/tests/regex/censor.php @@ -0,0 +1,40 @@ +<?php +/** +* +* @package testing +* @copyright (c) 2010 phpBB Group +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* +*/ + +require_once __DIR__ . '/../../phpBB/includes/functions.php'; + +class phpbb_regex_censor_test extends phpbb_test_case +{ + public function censor_test_data() + { + return array( + array('bad*word', 'bad word'), + array('bad***word', 'bad word'), + array('bad**word', 'bad word'), + array('*bad*word*', 'bad word'), + array('b*d', 'bad'), + array('*bad*', 'bad'), + array('*b*d*', 'bad'), + array('*b*d*', 'b d'), + array('b*d*word', 'bad word'), + array('**b**d**word**', 'bad word'), + array('**b**d**word**', 'the bad word catched'), + ); + } + + /** + * @dataProvider censor_test_data + */ + public function test_censor($pattern, $subject) + { + $regex = get_censor_preg_expression($pattern); + + $this->assertRegExp($regex, $subject); + } +}
\ No newline at end of file |