diff options
| -rw-r--r-- | phpBB/includes/acp/acp_words.php | 3 | ||||
| -rw-r--r-- | phpBB/includes/cache.php | 19 | ||||
| -rw-r--r-- | phpBB/includes/functions.php | 42 | ||||
| -rw-r--r-- | tests/regex/censor.php | 40 | 
4 files changed, 86 insertions, 18 deletions
diff --git a/phpBB/includes/acp/acp_words.php b/phpBB/includes/acp/acp_words.php index 35095d8b71..450a2fad4c 100644 --- a/phpBB/includes/acp/acp_words.php +++ b/phpBB/includes/acp/acp_words.php @@ -95,6 +95,9 @@ class acp_words  					trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING);  				} +				// Replace multiple consecutive asterisks with single one as those are not needed +				$word = preg_replace('#\*{2,}#', '*', $word); +  				$sql_ary = array(  					'word'			=> $word,  					'replacement'	=> $replacement diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php index 1986f021c6..49b690f1a4 100644 --- a/phpBB/includes/cache.php +++ b/phpBB/includes/cache.php @@ -82,26 +82,9 @@ class cache extends acm  			$result = $db->sql_query($sql);  			$censors = array(); -			$unicode = pcre_utf8_support(); -  			while ($row = $db->sql_fetchrow($result))  			{ -				if ($unicode) -				{ -					// Unescape the asterisk to simplify further conversions -					$row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); -					 -					// Replace the asterisk inside the pattern, at the start and at the end of it with regexes -					$row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); - -					// Generate the final substitution -					$censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu'; -				} -				else -				{ -					$censors['match'][] = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($row['word'], '#')) . ')(?!\S)#iu'; -				} - +				$censors['match'][] = get_censor_preg_expression($row['word']);  				$censors['replace'][] = $row['replacement'];  			}  			$db->sql_freeresult($result); diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index 41dad77141..c55931a2fb 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3250,6 +3250,48 @@ function get_preg_expression($mode)  }  /** +* Generate regexp for naughty words censoring +* Depends on whether installed PHP version supports unicode properties +* +* @param string	$word	word template to be replaced +* +* @return string $preg_expr		regex to use with word censor +*/ +function get_censor_preg_expression($word) +{ +	static $unicode = null; + +	if (empty($word)) +	{ +		return ''; +	} + +	// Check whether PHP version supports unicode properties +	if (is_null($unicode)) +	{ +		$unicode = pcre_utf8_support(); +	} + +	if ($unicode) +	{ +		// Unescape the asterisk to simplify further conversions +		$word = str_replace('\*', '*', preg_quote($word, '#')); + +		// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes +		$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); + +		// Generate the final substitution +		$preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu'; +	} +	else +	{ +		$preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu'; +	} + +	return $preg_expr; +} + +/**  * Returns the first block of the specified IPv6 address and as many additional  * ones as specified in the length paramater.  * If length is zero, then an empty string is returned. diff --git a/tests/regex/censor.php b/tests/regex/censor.php new file mode 100644 index 0000000000..ae2d86e07e --- /dev/null +++ b/tests/regex/censor.php @@ -0,0 +1,40 @@ +<?php +/** +* +* @package testing +* @copyright (c) 2010 phpBB Group +* @license http://opensource.org/licenses/gpl-license.php GNU Public License +* +*/ + +require_once __DIR__ . '/../../phpBB/includes/functions.php'; + +class phpbb_regex_censor_test extends phpbb_test_case +{ +	public function censor_test_data() +	{ +		return array( +			array('bad*word', 'bad word'), +			array('bad***word', 'bad word'), +			array('bad**word', 'bad word'), +			array('*bad*word*', 'bad word'), +			array('b*d', 'bad'), +			array('*bad*', 'bad'), +			array('*b*d*', 'bad'), +			array('*b*d*', 'b d'), +			array('b*d*word', 'bad word'), +			array('**b**d**word**', 'bad word'), +			array('**b**d**word**', 'the bad word catched'), +		); +	} + +	/** +	* @dataProvider censor_test_data +	*/ +	public function test_censor($pattern, $subject) +	{ +		$regex = get_censor_preg_expression($pattern); + +		$this->assertRegExp($regex, $subject); +	} +}
\ No newline at end of file  | 
