aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Fischer <bantu@phpbb.com>2011-01-07 15:41:12 +0100
committerAndreas Fischer <bantu@phpbb.com>2011-01-07 15:41:12 +0100
commita469e804930e66c244da27deb4dc94b5e280659f (patch)
tree3bab2abf053b81af81aab73d97c39d7f622f5130
parent7a2348bf7f0ee67856cc5ae15591b7d1b7c48763 (diff)
parent95cf47c4ef41b1ad7c5dc27eb9a078769851acf8 (diff)
downloadforums-a469e804930e66c244da27deb4dc94b5e280659f.tar
forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.gz
forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.bz2
forums-a469e804930e66c244da27deb4dc94b5e280659f.tar.xz
forums-a469e804930e66c244da27deb4dc94b5e280659f.zip
Merge branch 'ticket/rxu/9933' into develop-olympus
* ticket/rxu/9933: [ticket/9933] Create unit test for word censor regular expression. [ticket/9933] Move word censor regex into separate function in functions.php [ticket/9933] Wrong handling consecutive multiple asterisks in word censor
-rw-r--r--phpBB/includes/acp/acp_words.php3
-rw-r--r--phpBB/includes/cache.php19
-rw-r--r--phpBB/includes/functions.php42
-rw-r--r--tests/regex/censor.php40
4 files changed, 86 insertions, 18 deletions
diff --git a/phpBB/includes/acp/acp_words.php b/phpBB/includes/acp/acp_words.php
index 1cb9545967..88c5bbe592 100644
--- a/phpBB/includes/acp/acp_words.php
+++ b/phpBB/includes/acp/acp_words.php
@@ -95,6 +95,9 @@ class acp_words
trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING);
}
+ // Replace multiple consecutive asterisks with single one as those are not needed
+ $word = preg_replace('#\*{2,}#', '*', $word);
+
$sql_ary = array(
'word' => $word,
'replacement' => $replacement
diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php
index b50fab4ca2..612adcca4f 100644
--- a/phpBB/includes/cache.php
+++ b/phpBB/includes/cache.php
@@ -82,26 +82,9 @@ class cache extends acm
$result = $db->sql_query($sql);
$censors = array();
- $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
-
while ($row = $db->sql_fetchrow($result))
{
- if ($unicode)
- {
- // Unescape the asterisk to simplify further conversions
- $row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#'));
-
- // Replace the asterisk inside the pattern, at the start and at the end of it with regexes
- $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']);
-
- // Generate the final substitution
- $censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu';
- }
- else
- {
- $censors['match'][] = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($row['word'], '#')) . ')(?!\S)#iu';
- }
-
+ $censors['match'][] = get_censor_preg_expression($row['word']);
$censors['replace'][] = $row['replacement'];
}
$db->sql_freeresult($result);
diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php
index c7f19b709d..69be1627cf 100644
--- a/phpBB/includes/functions.php
+++ b/phpBB/includes/functions.php
@@ -3429,6 +3429,48 @@ function get_preg_expression($mode)
}
/**
+* Generate regexp for naughty words censoring
+* Depends on whether installed PHP version supports unicode properties
+*
+* @param string $word word template to be replaced
+*
+* @return string $preg_expr regex to use with word censor
+*/
+function get_censor_preg_expression($word)
+{
+ static $unicode = null;
+
+ if (empty($word))
+ {
+ return '';
+ }
+
+ // Check whether PHP version supports unicode properties
+ if (is_null($unicode))
+ {
+ $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false;
+ }
+
+ if ($unicode)
+ {
+ // Unescape the asterisk to simplify further conversions
+ $word = str_replace('\*', '*', preg_quote($word, '#'));
+
+ // Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes
+ $word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word);
+
+ // Generate the final substitution
+ $preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu';
+ }
+ else
+ {
+ $preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu';
+ }
+
+ return $preg_expr;
+}
+
+/**
* Returns the first block of the specified IPv6 address and as many additional
* ones as specified in the length paramater.
* If length is zero, then an empty string is returned.
diff --git a/tests/regex/censor.php b/tests/regex/censor.php
new file mode 100644
index 0000000000..ae2d86e07e
--- /dev/null
+++ b/tests/regex/censor.php
@@ -0,0 +1,40 @@
+<?php
+/**
+*
+* @package testing
+* @copyright (c) 2010 phpBB Group
+* @license http://opensource.org/licenses/gpl-license.php GNU Public License
+*
+*/
+
+require_once __DIR__ . '/../../phpBB/includes/functions.php';
+
+class phpbb_regex_censor_test extends phpbb_test_case
+{
+ public function censor_test_data()
+ {
+ return array(
+ array('bad*word', 'bad word'),
+ array('bad***word', 'bad word'),
+ array('bad**word', 'bad word'),
+ array('*bad*word*', 'bad word'),
+ array('b*d', 'bad'),
+ array('*bad*', 'bad'),
+ array('*b*d*', 'bad'),
+ array('*b*d*', 'b d'),
+ array('b*d*word', 'bad word'),
+ array('**b**d**word**', 'bad word'),
+ array('**b**d**word**', 'the bad word catched'),
+ );
+ }
+
+ /**
+ * @dataProvider censor_test_data
+ */
+ public function test_censor($pattern, $subject)
+ {
+ $regex = get_censor_preg_expression($pattern);
+
+ $this->assertRegExp($regex, $subject);
+ }
+} \ No newline at end of file