From 951b7b22e002571f1c304fcd63049aae77551a7b Mon Sep 17 00:00:00 2001 From: rxu Date: Wed, 5 Jan 2011 21:13:33 +0700 Subject: [ticket/9933] Move word censor regex into separate function in functions.php PHPBB3-9933 --- phpBB/includes/functions.php | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'phpBB/includes/functions.php') diff --git a/phpBB/includes/functions.php b/phpBB/includes/functions.php index c7f19b709d..69be1627cf 100644 --- a/phpBB/includes/functions.php +++ b/phpBB/includes/functions.php @@ -3428,6 +3428,48 @@ function get_preg_expression($mode) return ''; } +/** +* Generate regexp for naughty words censoring +* Depends on whether installed PHP version supports unicode properties +* +* @param string $word word template to be replaced +* +* @return string $preg_expr regex to use with word censor +*/ +function get_censor_preg_expression($word) +{ + static $unicode = null; + + if (empty($word)) + { + return ''; + } + + // Check whether PHP version supports unicode properties + if (is_null($unicode)) + { + $unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; + } + + if ($unicode) + { + // Unescape the asterisk to simplify further conversions + $word = str_replace('\*', '*', preg_quote($word, '#')); + + // Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes + $word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); + + // Generate the final substitution + $preg_expr = '#(?