From 9a25e4ad8956f46fa41a6057a8af53f2955fb532 Mon Sep 17 00:00:00 2001 From: rxu Date: Tue, 4 Jan 2011 11:54:10 +0700 Subject: [ticket/9933] Wrong handling consecutive multiple asterisks in word censor Fix consecutive asterisks issue in word censor. PHPBB3-9933 --- phpBB/includes/acp/acp_words.php | 3 +++ phpBB/includes/cache.php | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'phpBB/includes') diff --git a/phpBB/includes/acp/acp_words.php b/phpBB/includes/acp/acp_words.php index 1cb9545967..88c5bbe592 100644 --- a/phpBB/includes/acp/acp_words.php +++ b/phpBB/includes/acp/acp_words.php @@ -95,6 +95,9 @@ class acp_words trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING); } + // Replace multiple consecutive asterisks with single one as those are not needed + $word = preg_replace('#\*{2,}#', '*', $word); + $sql_ary = array( 'word' => $word, 'replacement' => $replacement diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php index b50fab4ca2..9b90483b50 100644 --- a/phpBB/includes/cache.php +++ b/phpBB/includes/cache.php @@ -90,9 +90,9 @@ class cache extends acm { // Unescape the asterisk to simplify further conversions $row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); - + // Replace the asterisk inside the pattern, at the start and at the end of it with regexes - $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); + $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); // Generate the final substitution $censors['match'][] = '#(? Date: Wed, 5 Jan 2011 21:13:33 +0700 Subject: [ticket/9933] Move word censor regex into separate function in functions.php PHPBB3-9933 --- phpBB/includes/cache.php | 19 +------------------ phpBB/includes/functions.php | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 18 deletions(-) (limited to 'phpBB/includes') diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php index 9b90483b50..612adcca4f 100644 --- a/phpBB/includes/cache.php +++ b/phpBB/includes/cache.php @@ -82,26 +82,9 @@ class cache extends acm $result = $db->sql_query($sql); $censors = array(); - $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; - while ($row = $db->sql_fetchrow($result)) { - if ($unicode) - { - // Unescape the asterisk to simplify further conversions - $row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); - - // Replace the asterisk inside the pattern, at the start and at the end of it with regexes - $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); - - // Generate the final substitution - $censors['match'][] = '#(?sql_freeresult($result); diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index c7f19b709d..69be1627cf 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3428,6 +3428,48 @@ function get_preg_expression($mode) return ''; } +/** +* Generate regexp for naughty words censoring +* Depends on whether installed PHP version supports unicode properties +* +* @param string $word word template to be replaced +* +* @return string $preg_expr regex to use with word censor +*/ +function get_censor_preg_expression($word) +{ + static $unicode = null; + + if (empty($word)) + { + return ''; + } + + // Check whether PHP version supports unicode properties + if (is_null($unicode)) + { + $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; + } + + if ($unicode) + { + // Unescape the asterisk to simplify further conversions + $word = str_replace('\*', '*', preg_quote($word, '#')); + + // Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes + $word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); + + // Generate the final substitution + $preg_expr = '#(?