diff options
Diffstat (limited to 'phpBB/phpbb/search')
| -rw-r--r-- | phpBB/phpbb/search/base.php | 324 | ||||
| -rw-r--r-- | phpBB/phpbb/search/fulltext_mysql.php | 929 | ||||
| -rw-r--r-- | phpBB/phpbb/search/fulltext_native.php | 1807 | ||||
| -rw-r--r-- | phpBB/phpbb/search/fulltext_postgres.php | 955 | ||||
| -rw-r--r-- | phpBB/phpbb/search/fulltext_sphinx.php | 904 | ||||
| -rw-r--r-- | phpBB/phpbb/search/index.htm | 10 | ||||
| -rw-r--r-- | phpBB/phpbb/search/sphinx/config.php | 282 | ||||
| -rw-r--r-- | phpBB/phpbb/search/sphinx/config_comment.php | 43 | ||||
| -rw-r--r-- | phpBB/phpbb/search/sphinx/config_section.php | 156 | ||||
| -rw-r--r-- | phpBB/phpbb/search/sphinx/config_variable.php | 74 | 
10 files changed, 5484 insertions, 0 deletions
| diff --git a/phpBB/phpbb/search/base.php b/phpBB/phpbb/search/base.php new file mode 100644 index 0000000000..9ecf3751d0 --- /dev/null +++ b/phpBB/phpbb/search/base.php @@ -0,0 +1,324 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search; + +/** +* @ignore +*/ +define('SEARCH_RESULT_NOT_IN_CACHE', 0); +define('SEARCH_RESULT_IN_CACHE', 1); +define('SEARCH_RESULT_INCOMPLETE', 2); + +/** +* \phpbb\search\base +* optional base class for search plugins providing simple caching based on ACM +* and functions to retrieve ignore_words and synonyms +* @package search +*/ +class base +{ +	var $ignore_words = array(); +	var $match_synonym = array(); +	var $replace_synonym = array(); + +	function search_backend(&$error) +	{ +		// This class cannot be used as a search plugin +		$error = true; +	} + +	/** +	* Retrieves a language dependend list of words that should be ignored by the search +	*/ +	function get_ignore_words() +	{ +		if (!sizeof($this->ignore_words)) +		{ +			global $user, $phpEx; + +			$words = array(); + +			if (file_exists("{$user->lang_path}{$user->lang_name}/search_ignore_words.$phpEx")) +			{ +				// include the file containing ignore words +				include("{$user->lang_path}{$user->lang_name}/search_ignore_words.$phpEx"); +			} + +			$this->ignore_words = $words; +			unset($words); +		} +	} + +	/** +	* Stores a list of synonyms that should be replaced in $this->match_synonym and $this->replace_synonym and caches them +	*/ +	function get_synonyms() +	{ +		if (!sizeof($this->match_synonym)) +		{ +			global $user, $phpEx; + +			$synonyms = array(); + +			if (file_exists("{$user->lang_path}{$user->lang_name}/search_synonyms.$phpEx")) +			{ +				// include the file containing synonyms +				include("{$user->lang_path}{$user->lang_name}/search_synonyms.$phpEx"); +			} + +			$this->match_synonym = array_keys($synonyms); +			$this->replace_synonym = array_values($synonyms); + +			unset($synonyms); +		} +	} + +	/** +	* Retrieves cached search results +	* +	* @param int &$result_count will contain the number of all results for the search (not only for the current page) +	* @param array &$id_ary is filled with the ids belonging to the requested page that are stored in the cache +	* +	* @return int SEARCH_RESULT_NOT_IN_CACHE or SEARCH_RESULT_IN_CACHE or SEARCH_RESULT_INCOMPLETE +	*/ +	function obtain_ids($search_key, &$result_count, &$id_ary, &$start, $per_page, $sort_dir) +	{ +		global $cache; + +		if (!($stored_ids = $cache->get('_search_results_' . $search_key))) +		{ +			// no search results cached for this search_key +			return SEARCH_RESULT_NOT_IN_CACHE; +		} +		else +		{ +			$result_count = $stored_ids[-1]; +			$reverse_ids = ($stored_ids[-2] != $sort_dir) ? true : false; +			$complete = true; + +			// Change start parameter in case out of bounds +			if ($result_count) +			{ +				if ($start < 0) +				{ +					$start = 0; +				} +				else if ($start >= $result_count) +				{ +					$start = floor(($result_count - 1) / $per_page) * $per_page; +				} +			} + +			// change the start to the actual end of the current request if the sort direction differs +			// from the dirction in the cache and reverse the ids later +			if ($reverse_ids) +			{ +				$start = $result_count - $start - $per_page; + +				// the user requested a page past the last index +				if ($start < 0) +				{ +					return SEARCH_RESULT_NOT_IN_CACHE; +				} +			} + +			for ($i = $start, $n = $start + $per_page; ($i < $n) && ($i < $result_count); $i++) +			{ +				if (!isset($stored_ids[$i])) +				{ +					$complete = false; +				} +				else +				{ +					$id_ary[] = $stored_ids[$i]; +				} +			} +			unset($stored_ids); + +			if ($reverse_ids) +			{ +				$id_ary = array_reverse($id_ary); +			} + +			if (!$complete) +			{ +				return SEARCH_RESULT_INCOMPLETE; +			} +			return SEARCH_RESULT_IN_CACHE; +		} +	} + +	/** +	* Caches post/topic ids +	* +	* @param array &$id_ary contains a list of post or topic ids that shall be cached, the first element +	* 	must have the absolute index $start in the result set. +	*/ +	function save_ids($search_key, $keywords, $author_ary, $result_count, &$id_ary, $start, $sort_dir) +	{ +		global $cache, $config, $db, $user; + +		$length = min(sizeof($id_ary), $config['search_block_size']); + +		// nothing to cache so exit +		if (!$length) +		{ +			return; +		} + +		$store_ids = array_slice($id_ary, 0, $length); + +		// create a new resultset if there is none for this search_key yet +		// or add the ids to the existing resultset +		if (!($store = $cache->get('_search_results_' . $search_key))) +		{ +			// add the current keywords to the recent searches in the cache which are listed on the search page +			if (!empty($keywords) || sizeof($author_ary)) +			{ +				$sql = 'SELECT search_time +					FROM ' . SEARCH_RESULTS_TABLE . ' +					WHERE search_key = \'' . $db->sql_escape($search_key) . '\''; +				$result = $db->sql_query($sql); + +				if (!$db->sql_fetchrow($result)) +				{ +					$sql_ary = array( +						'search_key'		=> $search_key, +						'search_time'		=> time(), +						'search_keywords'	=> $keywords, +						'search_authors'	=> ' ' . implode(' ', $author_ary) . ' ' +					); + +					$sql = 'INSERT INTO ' . SEARCH_RESULTS_TABLE . ' ' . $db->sql_build_array('INSERT', $sql_ary); +					$db->sql_query($sql); +				} +				$db->sql_freeresult($result); +			} + +			$sql = 'UPDATE ' . USERS_TABLE . ' +				SET user_last_search = ' . time() . ' +				WHERE user_id = ' . $user->data['user_id']; +			$db->sql_query($sql); + +			$store = array(-1 => $result_count, -2 => $sort_dir); +			$id_range = range($start, $start + $length - 1); +		} +		else +		{ +			// we use one set of results for both sort directions so we have to calculate the indizes +			// for the reversed array and we also have to reverse the ids themselves +			if ($store[-2] != $sort_dir) +			{ +				$store_ids = array_reverse($store_ids); +				$id_range = range($store[-1] - $start - $length, $store[-1] - $start - 1); +			} +			else +			{ +				$id_range = range($start, $start + $length - 1); +			} +		} + +		$store_ids = array_combine($id_range, $store_ids); + +		// append the ids +		if (is_array($store_ids)) +		{ +			$store += $store_ids; + +			// if the cache is too big +			if (sizeof($store) - 2 > 20 * $config['search_block_size']) +			{ +				// remove everything in front of two blocks in front of the current start index +				for ($i = 0, $n = $id_range[0] - 2 * $config['search_block_size']; $i < $n; $i++) +				{ +					if (isset($store[$i])) +					{ +						unset($store[$i]); +					} +				} + +				// remove everything after two blocks after the current stop index +				end($id_range); +				for ($i = $store[-1] - 1, $n = current($id_range) + 2 * $config['search_block_size']; $i > $n; $i--) +				{ +					if (isset($store[$i])) +					{ +						unset($store[$i]); +					} +				} +			} +			$cache->put('_search_results_' . $search_key, $store, $config['search_store_results']); + +			$sql = 'UPDATE ' . SEARCH_RESULTS_TABLE . ' +				SET search_time = ' . time() . ' +				WHERE search_key = \'' . $db->sql_escape($search_key) . '\''; +			$db->sql_query($sql); +		} + +		unset($store); +		unset($store_ids); +		unset($id_range); +	} + +	/** +	* Removes old entries from the search results table and removes searches with keywords that contain a word in $words. +	*/ +	function destroy_cache($words, $authors = false) +	{ +		global $db, $cache, $config; + +		// clear all searches that searched for the specified words +		if (sizeof($words)) +		{ +			$sql_where = ''; +			foreach ($words as $word) +			{ +				$sql_where .= " OR search_keywords " . $db->sql_like_expression($db->any_char . $word . $db->any_char); +			} + +			$sql = 'SELECT search_key +				FROM ' . SEARCH_RESULTS_TABLE . " +				WHERE search_keywords LIKE '%*%' $sql_where"; +			$result = $db->sql_query($sql); + +			while ($row = $db->sql_fetchrow($result)) +			{ +				$cache->destroy('_search_results_' . $row['search_key']); +			} +			$db->sql_freeresult($result); +		} + +		// clear all searches that searched for the specified authors +		if (is_array($authors) && sizeof($authors)) +		{ +			$sql_where = ''; +			foreach ($authors as $author) +			{ +				$sql_where .= (($sql_where) ? ' OR ' : '') . 'search_authors ' . $db->sql_like_expression($db->any_char . ' ' . (int) $author . ' ' . $db->any_char); +			} + +			$sql = 'SELECT search_key +				FROM ' . SEARCH_RESULTS_TABLE . " +				WHERE $sql_where"; +			$result = $db->sql_query($sql); + +			while ($row = $db->sql_fetchrow($result)) +			{ +				$cache->destroy('_search_results_' . $row['search_key']); +			} +			$db->sql_freeresult($result); +		} + +		$sql = 'DELETE +			FROM ' . SEARCH_RESULTS_TABLE . ' +			WHERE search_time < ' . (time() - $config['search_store_results']); +		$db->sql_query($sql); +	} +} diff --git a/phpBB/phpbb/search/fulltext_mysql.php b/phpBB/phpbb/search/fulltext_mysql.php new file mode 100644 index 0000000000..509b73e26e --- /dev/null +++ b/phpBB/phpbb/search/fulltext_mysql.php @@ -0,0 +1,929 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search; + +/** +* fulltext_mysql +* Fulltext search for MySQL +* @package search +*/ +class fulltext_mysql extends \phpbb\search\base +{ +	/** +	 * Associative array holding index stats +	 * @var array +	 */ +	protected $stats = array(); + +	/** +	 * Holds the words entered by user, obtained by splitting the entered query on whitespace +	 * @var array +	 */ +	protected $split_words = array(); + +	/** +	 * Config object +	 * @var \phpbb\config\config +	 */ +	protected $config; + +	/** +	 * Database connection +	 * @var \phpbb\db\driver\driver +	 */ +	protected $db; + +	/** +	 * User object +	 * @var \phpbb\user +	 */ +	protected $user; + +	/** +	 * Associative array stores the min and max word length to be searched +	 * @var array +	 */ +	protected $word_length = array(); + +	/** +	 * Contains tidied search query. +	 * Operators are prefixed in search query and common words excluded +	 * @var string +	 */ +	protected $search_query; + +	/** +	 * Contains common words. +	 * Common words are words with length less/more than min/max length +	 * @var array +	 */ +	protected $common_words = array(); + +	/** +	 * Constructor +	 * Creates a new \phpbb\search\fulltext_mysql, which is used as a search backend +	 * +	 * @param string|bool $error Any error that occurs is passed on through this reference variable otherwise false +	 */ +	public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user) +	{ +		$this->config = $config; +		$this->db = $db; +		$this->user = $user; + +		$this->word_length = array('min' => $this->config['fulltext_mysql_min_word_len'], 'max' => $this->config['fulltext_mysql_max_word_len']); + +		/** +		 * Load the UTF tools +		 */ +		if (!function_exists('utf8_strlen')) +		{ +			include($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx); +		} + +		$error = false; +	} + +	/** +	* Returns the name of this search backend to be displayed to administrators +	* +	* @return string Name +	*/ +	public function get_name() +	{ +		return 'MySQL Fulltext'; +	} + +	/** +	 * Returns the search_query +	 * +	 * @return string search query +	 */ +	public function get_search_query() +	{ +		return $this->search_query; +	} + +	/** +	 * Returns the common_words array +	 * +	 * @return array common words that are ignored by search backend +	 */ +	public function get_common_words() +	{ +		return $this->common_words; +	} + +	/** +	 * Returns the word_length array +	 * +	 * @return array min and max word length for searching +	 */ +	public function get_word_length() +	{ +		return $this->word_length; +	} + +	/** +	* Checks for correct MySQL version and stores min/max word length in the config +	* +	* @return string|bool Language key of the error/incompatiblity occurred +	*/ +	public function init() +	{ +		if ($this->db->sql_layer != 'mysql4' && $this->db->sql_layer != 'mysqli') +		{ +			return $this->user->lang['FULLTEXT_MYSQL_INCOMPATIBLE_DATABASE']; +		} + +		$result = $this->db->sql_query('SHOW TABLE STATUS LIKE \'' . POSTS_TABLE . '\''); +		$info = $this->db->sql_fetchrow($result); +		$this->db->sql_freeresult($result); + +		$engine = ''; +		if (isset($info['Engine'])) +		{ +			$engine = $info['Engine']; +		} +		else if (isset($info['Type'])) +		{ +			$engine = $info['Type']; +		} + +		$fulltext_supported = +			$engine === 'MyISAM' || +			// FULLTEXT is supported on InnoDB since MySQL 5.6.4 according to +			// http://dev.mysql.com/doc/refman/5.6/en/innodb-storage-engine.html +			$engine === 'InnoDB' && +			phpbb_version_compare($this->db->sql_server_info(true), '5.6.4', '>='); + +		if (!$fulltext_supported) +		{ +			return $this->user->lang['FULLTEXT_MYSQL_NOT_SUPPORTED']; +		} + +		$sql = 'SHOW VARIABLES +			LIKE \'ft\_%\''; +		$result = $this->db->sql_query($sql); + +		$mysql_info = array(); +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$mysql_info[$row['Variable_name']] = $row['Value']; +		} +		$this->db->sql_freeresult($result); + +		set_config('fulltext_mysql_max_word_len', $mysql_info['ft_max_word_len']); +		set_config('fulltext_mysql_min_word_len', $mysql_info['ft_min_word_len']); + +		return false; +	} + +	/** +	* Splits keywords entered by a user into an array of words stored in $this->split_words +	* Stores the tidied search query in $this->search_query +	* +	* @param string &$keywords Contains the keyword as entered by the user +	* @param string $terms is either 'all' or 'any' +	* @return bool false if no valid keywords were found and otherwise true +	*/ +	public function split_keywords(&$keywords, $terms) +	{ +		if ($terms == 'all') +		{ +			$match		= array('#\sand\s#iu', '#\sor\s#iu', '#\snot\s#iu', '#(^|\s)\+#', '#(^|\s)-#', '#(^|\s)\|#'); +			$replace	= array(' +', ' |', ' -', ' +', ' -', ' |'); + +			$keywords = preg_replace($match, $replace, $keywords); +		} + +		// Filter out as above +		$split_keywords = preg_replace("#[\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords))); + +		// Split words +		$split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords))); +		$matches = array(); +		preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches); +		$this->split_words = $matches[1]; + +		// We limit the number of allowed keywords to minimize load on the database +		if ($this->config['max_num_search_keywords'] && sizeof($this->split_words) > $this->config['max_num_search_keywords']) +		{ +			trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], sizeof($this->split_words))); +		} + +		// to allow phrase search, we need to concatenate quoted words +		$tmp_split_words = array(); +		$phrase = ''; +		foreach ($this->split_words as $word) +		{ +			if ($phrase) +			{ +				$phrase .= ' ' . $word; +				if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1) +				{ +					$tmp_split_words[] = $phrase; +					$phrase = ''; +				} +			} +			else if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1) +			{ +				$phrase = $word; +			} +			else +			{ +				$tmp_split_words[] = $word; +			} +		} +		if ($phrase) +		{ +			$tmp_split_words[] = $phrase; +		} + +		$this->split_words = $tmp_split_words; + +		unset($tmp_split_words); +		unset($phrase); + +		foreach ($this->split_words as $i => $word) +		{ +			$clean_word = preg_replace('#^[+\-|"]#', '', $word); + +			// check word length +			$clean_len = utf8_strlen(str_replace('*', '', $clean_word)); +			if (($clean_len < $this->config['fulltext_mysql_min_word_len']) || ($clean_len > $this->config['fulltext_mysql_max_word_len'])) +			{ +				$this->common_words[] = $word; +				unset($this->split_words[$i]); +			} +		} + +		if ($terms == 'any') +		{ +			$this->search_query = ''; +			foreach ($this->split_words as $word) +			{ +				if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0)) +				{ +					$word = substr($word, 1); +				} +				$this->search_query .= $word . ' '; +			} +		} +		else +		{ +			$this->search_query = ''; +			foreach ($this->split_words as $word) +			{ +				if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0)) +				{ +					$this->search_query .= $word . ' '; +				} +				else if (strpos($word, '|') === 0) +				{ +					$this->search_query .= substr($word, 1) . ' '; +				} +				else +				{ +					$this->search_query .= '+' . $word . ' '; +				} +			} +		} + +		$this->search_query = utf8_htmlspecialchars($this->search_query); + +		if ($this->search_query) +		{ +			$this->split_words = array_values($this->split_words); +			sort($this->split_words); +			return true; +		} +		return false; +	} + +	/** +	* Turns text into an array of words +	* @param string $text contains post text/subject +	*/ +	public function split_message($text) +	{ +		// Split words +		$text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); +		$matches = array(); +		preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches); +		$text = $matches[1]; + +		// remove too short or too long words +		$text = array_values($text); +		for ($i = 0, $n = sizeof($text); $i < $n; $i++) +		{ +			$text[$i] = trim($text[$i]); +			if (utf8_strlen($text[$i]) < $this->config['fulltext_mysql_min_word_len'] || utf8_strlen($text[$i]) > $this->config['fulltext_mysql_max_word_len']) +			{ +				unset($text[$i]); +			} +		} + +		return array_values($text); +	} + +	/** +	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) +	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No keywords? No posts +		if (!$this->search_query) +		{ +			return false; +		} + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			implode(', ', $this->split_words), +			$type, +			$fields, +			$terms, +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary) +		))); + +		if ($start < 0) +		{ +			$start = 0; +		} + +		// try reading the results from cache +		$result_count = 0; +		if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $result_count; +		} + +		$id_ary = array(); + +		$join_topic = ($type == 'posts') ? false : true; + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); +		$sql_sort_table = $sql_sort_join = ''; + +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_sort_table	= USERS_TABLE . ' u, '; +				$sql_sort_join	= ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster '; +			break; + +			case 't': +				$join_topic = true; +			break; + +			case 'f': +				$sql_sort_table	= FORUMS_TABLE . ' f, '; +				$sql_sort_join	= ' AND f.forum_id = p.forum_id '; +			break; +		} + +		// Build some display specific sql strings +		switch ($fields) +		{ +			case 'titleonly': +				$sql_match = 'p.post_subject'; +				$sql_match_where = ' AND p.post_id = t.topic_first_post_id'; +				$join_topic = true; +			break; + +			case 'msgonly': +				$sql_match = 'p.post_text'; +				$sql_match_where = ''; +			break; + +			case 'firstpost': +				$sql_match = 'p.post_subject, p.post_text'; +				$sql_match_where = ' AND p.post_id = t.topic_first_post_id'; +				$join_topic = true; +			break; + +			default: +				$sql_match = 'p.post_subject, p.post_text'; +				$sql_match_where = ''; +			break; +		} + +		$sql_select			= (!$result_count) ? 'SQL_CALC_FOUND_ROWS ' : ''; +		$sql_select			= ($type == 'posts') ? $sql_select . 'p.post_id' : 'DISTINCT ' . $sql_select . 't.topic_id'; +		$sql_from			= ($join_topic) ? TOPICS_TABLE . ' t, ' : ''; +		$field				= ($type == 'posts') ? 'post_id' : 'topic_id'; +		if (sizeof($author_ary) && $author_name) +		{ +			// first one matches post of registered users, second one guests and deleted users +			$sql_author = ' AND (' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +		} +		else if (sizeof($author_ary)) +		{ +			$sql_author = ' AND ' . $this->db->sql_in_set('p.poster_id', $author_ary); +		} +		else +		{ +			$sql_author = ''; +		} + +		$sql_where_options = $sql_sort_join; +		$sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : ''; +		$sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : ''; +		$sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; +		$sql_where_options .= ' AND ' . $post_visibility; +		$sql_where_options .= $sql_author; +		$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; +		$sql_where_options .= $sql_match_where; + +		$sql = "SELECT $sql_select +			FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p +			WHERE MATCH ($sql_match) AGAINST ('" . $this->db->sql_escape(htmlspecialchars_decode($this->search_query)) . "' IN BOOLEAN MODE) +				$sql_where_options +			ORDER BY $sql_sort"; +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = (int) $row[$field]; +		} +		$this->db->sql_freeresult($result); + +		$id_ary = array_unique($id_ary); + +		// if the total result count is not cached yet, retrieve it from the db +		if (!$result_count) +		{ +			$sql_found_rows = 'SELECT FOUND_ROWS() as result_count'; +			$result = $this->db->sql_query($sql_found_rows); +			$result_count = (int) $this->db->sql_fetchfield('result_count'); +			$this->db->sql_freeresult($result); + +			if (!$result_count) +			{ +				return false; +			} +		} + +		if ($start >= $result_count) +		{ +			$start = floor(($result_count - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = (int) $row[$field]; +			} +			$this->db->sql_freeresult($result); + +			$id_ary = array_unique($id_ary); +		} + +		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page +		$this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir); +		$id_ary = array_slice($id_ary, 0, (int) $per_page); + +		return $result_count; +	} + +	/** +	* Performs a search on an author's posts without caring about message contents. Depends on display specific params +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No author? No posts +		if (!sizeof($author_ary)) +		{ +			return 0; +		} + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			'', +			$type, +			($firstpost_only) ? 'firstpost' : '', +			'', +			'', +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary), +			$author_name, +		))); + +		if ($start < 0) +		{ +			$start = 0; +		} + +		// try reading the results from cache +		$result_count = 0; +		if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $result_count; +		} + +		$id_ary = array(); + +		// Create some display specific sql strings +		if ($author_name) +		{ +			// first one matches post of registered users, second one guests and deleted users +			$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +		} +		else +		{ +			$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); +		} +		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; +		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; +		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; +		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : ''; + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); +		$sql_sort_table = $sql_sort_join = ''; +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_sort_table	= USERS_TABLE . ' u, '; +				$sql_sort_join	= ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster '; +			break; + +			case 't': +				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : ''; +				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : ''; +			break; + +			case 'f': +				$sql_sort_table	= FORUMS_TABLE . ' f, '; +				$sql_sort_join	= ' AND f.forum_id = p.forum_id '; +			break; +		} + +		$m_approve_fid_sql = ' AND ' . $post_visibility; + +		// If the cache was completely empty count the results +		$calc_results = ($result_count) ? '' : 'SQL_CALC_FOUND_ROWS '; + +		// Build the query for really selecting the post_ids +		if ($type == 'posts') +		{ +			$sql = "SELECT {$calc_results}p.post_id +				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$m_approve_fid_sql +					$sql_fora +					$sql_sort_join +					$sql_time +				ORDER BY $sql_sort"; +			$field = 'post_id'; +		} +		else +		{ +			$sql = "SELECT {$calc_results}t.topic_id +				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$m_approve_fid_sql +					$sql_fora +					AND t.topic_id = p.topic_id +					$sql_sort_join +					$sql_time +				GROUP BY t.topic_id +				ORDER BY $sql_sort"; +			$field = 'topic_id'; +		} + +		// Only read one block of posts from the db and then cache it +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = (int) $row[$field]; +		} +		$this->db->sql_freeresult($result); + +		// retrieve the total result count if needed +		if (!$result_count) +		{ +			$sql_found_rows = 'SELECT FOUND_ROWS() as result_count'; +			$result = $this->db->sql_query($sql_found_rows); +			$result_count = (int) $this->db->sql_fetchfield('result_count'); +			$this->db->sql_freeresult($result); + +			if (!$result_count) +			{ +				return false; +			} +		} + +		if ($start >= $result_count) +		{ +			$start = floor(($result_count - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = (int) $row[$field]; +			} +			$this->db->sql_freeresult($result); + +			$id_ary = array_unique($id_ary); +		} + +		if (sizeof($id_ary)) +		{ +			$this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir); +			$id_ary = array_slice($id_ary, 0, $per_page); + +			return $result_count; +		} +		return false; +	} + +	/** +	* Destroys cached search results, that contained one of the new words in a post so the results won't be outdated +	* +	* @param	string		$mode contains the post mode: edit, post, reply, quote ... +	* @param	int			$post_id	contains the post id of the post to index +	* @param	string		$message	contains the post text of the post +	* @param	string		$subject	contains the subject of the post to index +	* @param	int			$poster_id	contains the user id of the poster +	* @param	int			$forum_id	contains the forum id of parent forum of the post +	*/ +	public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) +	{ +		// Split old and new post/subject to obtain array of words +		$split_text = $this->split_message($message); +		$split_title = ($subject) ? $this->split_message($subject) : array(); + +		$words = array_unique(array_merge($split_text, $split_title)); + +		unset($split_text); +		unset($split_title); + +		// destroy cached search results containing any of the words removed or added +		$this->destroy_cache($words, array($poster_id)); + +		unset($words); +	} + +	/** +	* Destroy cached results, that might be outdated after deleting a post +	*/ +	public function index_remove($post_ids, $author_ids, $forum_ids) +	{ +		$this->destroy_cache(array(), array_unique($author_ids)); +	} + +	/** +	* Destroy old cache entries +	*/ +	public function tidy() +	{ +		// destroy too old cached search results +		$this->destroy_cache(array()); + +		set_config('search_last_gc', time(), true); +	} + +	/** +	* Create fulltext index +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function create_index($acp_module, $u_action) +	{ +		// Make sure we can actually use MySQL with fulltext indexes +		if ($error = $this->init()) +		{ +			return $error; +		} + +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		$alter = array(); + +		if (!isset($this->stats['post_subject'])) +		{ +			if ($this->db->sql_layer == 'mysqli' || version_compare($this->db->sql_server_info(true), '4.1.3', '>=')) +			{ +				$alter[] = 'MODIFY post_subject varchar(255) COLLATE utf8_unicode_ci DEFAULT \'\' NOT NULL'; +			} +			else +			{ +				$alter[] = 'MODIFY post_subject text NOT NULL'; +			} +			$alter[] = 'ADD FULLTEXT (post_subject)'; +		} + +		if (!isset($this->stats['post_content'])) +		{ +			if ($this->db->sql_layer == 'mysqli' || version_compare($this->db->sql_server_info(true), '4.1.3', '>=')) +			{ +				$alter[] = 'MODIFY post_text mediumtext COLLATE utf8_unicode_ci NOT NULL'; +			} +			else +			{ +				$alter[] = 'MODIFY post_text mediumtext NOT NULL'; +			} + +			$alter[] = 'ADD FULLTEXT post_content (post_text, post_subject)'; +		} + +		if (sizeof($alter)) +		{ +			$this->db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter)); +		} + +		$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); + +		return false; +	} + +	/** +	* Drop fulltext index +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function delete_index($acp_module, $u_action) +	{ +		// Make sure we can actually use MySQL with fulltext indexes +		if ($error = $this->init()) +		{ +			return $error; +		} + +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		$alter = array(); + +		if (isset($this->stats['post_subject'])) +		{ +			$alter[] = 'DROP INDEX post_subject'; +		} + +		if (isset($this->stats['post_content'])) +		{ +			$alter[] = 'DROP INDEX post_content'; +		} + +		if (sizeof($alter)) +		{ +			$this->db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter)); +		} + +		$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); + +		return false; +	} + +	/** +	* Returns true if both FULLTEXT indexes exist +	*/ +	public function index_created() +	{ +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		return isset($this->stats['post_subject']) && isset($this->stats['post_content']); +	} + +	/** +	* Returns an associative array containing information about the indexes +	*/ +	public function index_stats() +	{ +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		return array( +			$this->user->lang['FULLTEXT_MYSQL_TOTAL_POSTS']			=> ($this->index_created()) ? $this->stats['total_posts'] : 0, +		); +	} + +	/** +	 * Computes the stats and store them in the $this->stats associative array +	 */ +	protected function get_stats() +	{ +		if (strpos($this->db->sql_layer, 'mysql') === false) +		{ +			$this->stats = array(); +			return; +		} + +		$sql = 'SHOW INDEX +			FROM ' . POSTS_TABLE; +		$result = $this->db->sql_query($sql); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			// deal with older MySQL versions which didn't use Index_type +			$index_type = (isset($row['Index_type'])) ? $row['Index_type'] : $row['Comment']; + +			if ($index_type == 'FULLTEXT') +			{ +				if ($row['Key_name'] == 'post_subject') +				{ +					$this->stats['post_subject'] = $row; +				} +				else if ($row['Key_name'] == 'post_content') +				{ +					$this->stats['post_content'] = $row; +				} +			} +		} +		$this->db->sql_freeresult($result); + +		$this->stats['total_posts'] = empty($this->stats) ? 0 : $this->db->get_estimated_row_count(POSTS_TABLE); +	} + +	/** +	* Display a note, that UTF-8 support is not available with certain versions of PHP +	* +	* @return associative array containing template and config variables +	*/ +	public function acp() +	{ +		$tpl = ' +		<dl> +			<dt><label>' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_MYSQL_MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt> +			<dd>' . $this->config['fulltext_mysql_min_word_len'] . '</dd> +		</dl> +		<dl> +			<dt><label>' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_MYSQL_MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt> +			<dd>' . $this->config['fulltext_mysql_max_word_len'] . '</dd> +		</dl> +		'; + +		// These are fields required in the config table +		return array( +			'tpl'		=> $tpl, +			'config'	=> array() +		); +	} +} diff --git a/phpBB/phpbb/search/fulltext_native.php b/phpBB/phpbb/search/fulltext_native.php new file mode 100644 index 0000000000..60180f1728 --- /dev/null +++ b/phpBB/phpbb/search/fulltext_native.php @@ -0,0 +1,1807 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search; + +/** +* fulltext_native +* phpBB's own db driven fulltext search, version 2 +* @package search +*/ +class fulltext_native extends \phpbb\search\base +{ +	/** +	 * Associative array holding index stats +	 * @var array +	 */ +	protected $stats = array(); + +	/** +	 * Associative array stores the min and max word length to be searched +	 * @var array +	 */ +	protected $word_length = array(); + +	/** +	 * Contains tidied search query. +	 * Operators are prefixed in search query and common words excluded +	 * @var string +	 */ +	protected $search_query; + +	/** +	 * Contains common words. +	 * Common words are words with length less/more than min/max length +	 * @var array +	 */ +	protected $common_words = array(); + +	/** +	 * Post ids of posts containing words that are to be included +	 * @var array +	 */ +	protected $must_contain_ids = array(); + +	/** +	 * Post ids of posts containing words that should not be included +	 * @var array +	 */ +	protected $must_not_contain_ids = array(); + +	/** +	 * Post ids of posts containing atleast one word that needs to be excluded +	 * @var array +	 */ +	protected $must_exclude_one_ids = array(); + +	/** +	 * Relative path to board root +	 * @var string +	 */ +	protected $phpbb_root_path; + +	/** +	 * PHP Extension +	 * @var string +	 */ +	protected $php_ext; + +	/** +	 * Config object +	 * @var \phpbb\config\config +	 */ +	protected $config; + +	/** +	 * Database connection +	 * @var \phpbb\db\driver\driver +	 */ +	protected $db; + +	/** +	 * User object +	 * @var \phpbb\user +	 */ +	protected $user; + +	/** +	* Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded +	* +	* @param	boolean|string	&$error	is passed by reference and should either be set to false on success or an error message on failure +	*/ +	public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user) +	{ +		$this->phpbb_root_path = $phpbb_root_path; +		$this->php_ext = $phpEx; +		$this->config = $config; +		$this->db = $db; +		$this->user = $user; + +		$this->word_length = array('min' => $this->config['fulltext_native_min_chars'], 'max' => $this->config['fulltext_native_max_chars']); + +		/** +		* Load the UTF tools +		*/ +		if (!class_exists('utf_normalizer')) +		{ +			include($this->phpbb_root_path . 'includes/utf/utf_normalizer.' . $this->php_ext); +		} +		if (!function_exists('utf8_decode_ncr')) +		{ +			include($this->phpbb_root_path . 'includes/utf/utf_tools.' . $this->php_ext); +		} + +		$error = false; +	} + +	/** +	* Returns the name of this search backend to be displayed to administrators +	* +	* @return string Name +	*/ +	public function get_name() +	{ +		return 'phpBB Native Fulltext'; +	} + +	/** +	 * Returns the search_query +	 * +	 * @return string search query +	 */ +	public function get_search_query() +	{ +		return $this->search_query; +	} + +	/** +	 * Returns the common_words array +	 * +	 * @return array common words that are ignored by search backend +	 */ +	public function get_common_words() +	{ +		return $this->common_words; +	} + +	/** +	 * Returns the word_length array +	 * +	 * @return array min and max word length for searching +	 */ +	public function get_word_length() +	{ +		return $this->word_length; +	} + +	/** +	* This function fills $this->search_query with the cleaned user search query +	* +	* If $terms is 'any' then the words will be extracted from the search query +	* and combined with | inside brackets. They will afterwards be treated like +	* an standard search query. +	* +	* Then it analyses the query and fills the internal arrays $must_not_contain_ids, +	* $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search() +	* +	* @param	string	$keywords	contains the search query string as entered by the user +	* @param	string	$terms		is either 'all' (use search query as entered, default words to 'must be contained in post') +	* 	or 'any' (find all posts containing at least one of the given words) +	* @return	boolean				false if no valid keywords were found and otherwise true +	*/ +	public function split_keywords($keywords, $terms) +	{ +		$tokens = '+-|()*'; + +		$keywords = trim($this->cleanup($keywords, $tokens)); + +		// allow word|word|word without brackets +		if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false)) +		{ +			$keywords = '(' . $keywords . ')'; +		} + +		$open_bracket = $space = false; +		for ($i = 0, $n = strlen($keywords); $i < $n; $i++) +		{ +			if ($open_bracket !== false) +			{ +				switch ($keywords[$i]) +				{ +					case ')': +						if ($open_bracket + 1 == $i) +						{ +							$keywords[$i - 1] = '|'; +							$keywords[$i] = '|'; +						} +						$open_bracket = false; +					break; +					case '(': +						$keywords[$i] = '|'; +					break; +					case '+': +					case '-': +					case ' ': +						$keywords[$i] = '|'; +					break; +					case '*': +						if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0)) +						{ +							if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0)) +							{ +								$keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1); +							} +						} +					break; +				} +			} +			else +			{ +				switch ($keywords[$i]) +				{ +					case ')': +						$keywords[$i] = ' '; +					break; +					case '(': +						$open_bracket = $i; +						$space = false; +					break; +					case '|': +						$keywords[$i] = ' '; +					break; +					case '-': +					case '+': +						$space = $keywords[$i]; +					break; +					case ' ': +						if ($space !== false) +						{ +							$keywords[$i] = $space; +						} +					break; +					default: +						$space = false; +				} +			} +		} + +		if ($open_bracket) +		{ +			$keywords .= ')'; +		} + +		$match = array( +			'#  +#', +			'#\|\|+#', +			'#(\+|\-)(?:\+|\-)+#', +			'#\(\|#', +			'#\|\)#', +		); +		$replace = array( +			' ', +			'|', +			'$1', +			'(', +			')', +		); + +		$keywords = preg_replace($match, $replace, $keywords); +		$num_keywords = sizeof(explode(' ', $keywords)); + +		// We limit the number of allowed keywords to minimize load on the database +		if ($this->config['max_num_search_keywords'] && $num_keywords > $this->config['max_num_search_keywords']) +		{ +			trigger_error($this->user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', (int) $this->config['max_num_search_keywords'], $num_keywords)); +		} + +		// $keywords input format: each word separated by a space, words in a bracket are not separated + +		// the user wants to search for any word, convert the search query +		if ($terms == 'any') +		{ +			$words = array(); + +			preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words); +			if (sizeof($words[1])) +			{ +				$keywords = '(' . implode('|', $words[1]) . ')'; +			} +		} + +		// set the search_query which is shown to the user +		$this->search_query = $keywords; + +		$exact_words = array(); +		preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words); +		$exact_words = $exact_words[1]; + +		$common_ids = $words = array(); + +		if (sizeof($exact_words)) +		{ +			$sql = 'SELECT word_id, word_text, word_common +				FROM ' . SEARCH_WORDLIST_TABLE . ' +				WHERE ' . $this->db->sql_in_set('word_text', $exact_words) . ' +				ORDER BY word_count ASC'; +			$result = $this->db->sql_query($sql); + +			// store an array of words and ids, remove common words +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				if ($row['word_common']) +				{ +					$this->common_words[] = $row['word_text']; +					$common_ids[$row['word_text']] = (int) $row['word_id']; +					continue; +				} + +				$words[$row['word_text']] = (int) $row['word_id']; +			} +			$this->db->sql_freeresult($result); +		} + +		// Handle +, - without preceeding whitespace character +		$match		= array('#(\S)\+#', '#(\S)-#'); +		$replace	= array('$1 +', '$1 +'); + +		$keywords = preg_replace($match, $replace, $keywords); + +		// now analyse the search query, first split it using the spaces +		$query = explode(' ', $keywords); + +		$this->must_contain_ids = array(); +		$this->must_not_contain_ids = array(); +		$this->must_exclude_one_ids = array(); + +		$mode = ''; +		$ignore_no_id = true; + +		foreach ($query as $word) +		{ +			if (empty($word)) +			{ +				continue; +			} + +			// words which should not be included +			if ($word[0] == '-') +			{ +				$word = substr($word, 1); + +				// a group of which at least one may not be in the resulting posts +				if ($word[0] == '(') +				{ +					$word = array_unique(explode('|', substr($word, 1, -1))); +					$mode = 'must_exclude_one'; +				} +				// one word which should not be in the resulting posts +				else +				{ +					$mode = 'must_not_contain'; +				} +				$ignore_no_id = true; +			} +			// words which have to be included +			else +			{ +				// no prefix is the same as a +prefix +				if ($word[0] == '+') +				{ +					$word = substr($word, 1); +				} + +				// a group of words of which at least one word should be in every resulting post +				if ($word[0] == '(') +				{ +					$word = array_unique(explode('|', substr($word, 1, -1))); +				} +				$ignore_no_id = false; +				$mode = 'must_contain'; +			} + +			if (empty($word)) +			{ +				continue; +			} + +			// if this is an array of words then retrieve an id for each +			if (is_array($word)) +			{ +				$non_common_words = array(); +				$id_words = array(); +				foreach ($word as $i => $word_part) +				{ +					if (strpos($word_part, '*') !== false) +					{ +						$id_words[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word_part)) . '\''; +						$non_common_words[] = $word_part; +					} +					else if (isset($words[$word_part])) +					{ +						$id_words[] = $words[$word_part]; +						$non_common_words[] = $word_part; +					} +					else +					{ +						$len = utf8_strlen($word_part); +						if ($len < $this->word_length['min'] || $len > $this->word_length['max']) +						{ +							$this->common_words[] = $word_part; +						} +					} +				} +				if (sizeof($id_words)) +				{ +					sort($id_words); +					if (sizeof($id_words) > 1) +					{ +						$this->{$mode . '_ids'}[] = $id_words; +					} +					else +					{ +						$mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode; +						$this->{$mode . '_ids'}[] = $id_words[0]; +					} +				} +				// throw an error if we shall not ignore unexistant words +				else if (!$ignore_no_id && sizeof($non_common_words)) +				{ +					trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode($user->lang['COMMA_SEPARATOR'], $non_common_words))); +				} +				unset($non_common_words); +			} +			// else we only need one id +			else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) +			{ +				if ($wildcard) +				{ +					$len = utf8_strlen(str_replace('*', '', $word)); +					if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) +					{ +						$this->{$mode . '_ids'}[] = '\'' . $this->db->sql_escape(str_replace('*', '%', $word)) . '\''; +					} +					else +					{ +						$this->common_words[] = $word; +					} +				} +				else +				{ +					$this->{$mode . '_ids'}[] = $words[$word]; +				} +			} +			else +			{ +				if (!isset($common_ids[$word])) +				{ +					$len = utf8_strlen($word); +					if ($len < $this->word_length['min'] || $len > $this->word_length['max']) +					{ +						$this->common_words[] = $word; +					} +				} +			} +		} + +		// Return true if all words are not common words +		if (sizeof($exact_words) - sizeof($this->common_words) > 0) +		{ +			return true; +		} +		return false; +	} + +	/** +	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) +	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No keywords? No posts. +		if (empty($this->search_query)) +		{ +			return false; +		} + +		// we can't search for negatives only +		if (empty($this->must_contain_ids)) +		{ +			return false; +		} + +		$must_contain_ids = $this->must_contain_ids; +		$must_not_contain_ids = $this->must_not_contain_ids; +		$must_exclude_one_ids = $this->must_exclude_one_ids; + +		sort($must_contain_ids); +		sort($must_not_contain_ids); +		sort($must_exclude_one_ids); + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			serialize($must_contain_ids), +			serialize($must_not_contain_ids), +			serialize($must_exclude_one_ids), +			$type, +			$fields, +			$terms, +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary), +			$author_name, +		))); + +		// try reading the results from cache +		$total_results = 0; +		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $total_results; +		} + +		$id_ary = array(); + +		$sql_where = array(); +		$group_by = false; +		$m_num = 0; +		$w_num = 0; + +		$sql_array = array( +			'SELECT'	=> ($type == 'posts') ? 'p.post_id' : 'p.topic_id', +			'FROM'		=> array( +				SEARCH_WORDMATCH_TABLE	=> array(), +				SEARCH_WORDLIST_TABLE	=> array(), +			), +			'LEFT_JOIN' => array(array( +				'FROM'	=> array(POSTS_TABLE => 'p'), +				'ON'	=> 'm0.post_id = p.post_id', +			)), +		); + +		$title_match = ''; +		$left_join_topics = false; +		$group_by = true; +		// Build some display specific sql strings +		switch ($fields) +		{ +			case 'titleonly': +				$title_match = 'title_match = 1'; +				$group_by = false; +			// no break +			case 'firstpost': +				$left_join_topics = true; +				$sql_where[] = 'p.post_id = t.topic_first_post_id'; +			break; + +			case 'msgonly': +				$title_match = 'title_match = 0'; +				$group_by = false; +			break; +		} + +		if ($type == 'topics') +		{ +			$left_join_topics = true; +			$group_by = true; +		} + +		/** +		* @todo Add a query optimizer (handle stuff like "+(4|3) +4") +		*/ + +		foreach ($this->must_contain_ids as $subquery) +		{ +			if (is_array($subquery)) +			{ +				$group_by = true; + +				$word_id_sql = array(); +				$word_ids = array(); +				foreach ($subquery as $id) +				{ +					if (is_string($id)) +					{ +						$sql_array['LEFT_JOIN'][] = array( +							'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), +							'ON'	=> "w$w_num.word_text LIKE $id" +						); +						$word_ids[] = "w$w_num.word_id"; + +						$w_num++; +					} +					else +					{ +						$word_ids[] = $id; +					} +				} + +				$sql_where[] = $this->db->sql_in_set("m$m_num.word_id", $word_ids); + +				unset($word_id_sql); +				unset($word_ids); +			} +			else if (is_string($subquery)) +			{ +				$sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num; + +				$sql_where[] = "w$w_num.word_text LIKE $subquery"; +				$sql_where[] = "m$m_num.word_id = w$w_num.word_id"; + +				$group_by = true; +				$w_num++; +			} +			else +			{ +				$sql_where[] = "m$m_num.word_id = $subquery"; +			} + +			$sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num; + +			if ($title_match) +			{ +				$sql_where[] = "m$m_num.$title_match"; +			} + +			if ($m_num != 0) +			{ +				$sql_where[] = "m$m_num.post_id = m0.post_id"; +			} +			$m_num++; +		} + +		foreach ($this->must_not_contain_ids as $key => $subquery) +		{ +			if (is_string($subquery)) +			{ +				$sql_array['LEFT_JOIN'][] = array( +					'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), +					'ON'	=> "w$w_num.word_text LIKE $subquery" +				); + +				$this->must_not_contain_ids[$key] = "w$w_num.word_id"; + +				$group_by = true; +				$w_num++; +			} +		} + +		if (sizeof($this->must_not_contain_ids)) +		{ +			$sql_array['LEFT_JOIN'][] = array( +				'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), +				'ON'	=> $this->db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id" +			); + +			$sql_where[] = "m$m_num.word_id IS NULL"; +			$m_num++; +		} + +		foreach ($this->must_exclude_one_ids as $ids) +		{ +			$is_null_joins = array(); +			foreach ($ids as $id) +			{ +				if (is_string($id)) +				{ +					$sql_array['LEFT_JOIN'][] = array( +						'FROM'	=> array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), +						'ON'	=> "w$w_num.word_text LIKE $id" +					); +					$id = "w$w_num.word_id"; + +					$group_by = true; +					$w_num++; +				} + +				$sql_array['LEFT_JOIN'][] = array( +					'FROM'	=> array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), +					'ON'	=> "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '') +				); +				$is_null_joins[] = "m$m_num.word_id IS NULL"; + +				$m_num++; +			} +			$sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')'; +		} + +		$sql_where[] = $post_visibility; + +		if ($topic_id) +		{ +			$sql_where[] = 'p.topic_id = ' . $topic_id; +		} + +		if (sizeof($author_ary)) +		{ +			if ($author_name) +			{ +				// first one matches post of registered users, second one guests and deleted users +				$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +			} +			else +			{ +				$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); +			} +			$sql_where[] = $sql_author; +		} + +		if (sizeof($ex_fid_ary)) +		{ +			$sql_where[] = $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true); +		} + +		if ($sort_days) +		{ +			$sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400)); +		} + +		$sql_array['WHERE'] = implode(' AND ', $sql_where); + +		$is_mysql = false; +		// if the total result count is not cached yet, retrieve it from the db +		if (!$total_results) +		{ +			$sql = ''; +			$sql_array_count = $sql_array; + +			if ($left_join_topics) +			{ +				$sql_array_count['LEFT_JOIN'][] = array( +					'FROM'	=> array(TOPICS_TABLE => 't'), +					'ON'	=> 'p.topic_id = t.topic_id' +				); +			} + +			switch ($this->db->sql_layer) +			{ +				case 'mysql4': +				case 'mysqli': + +					// 3.x does not support SQL_CALC_FOUND_ROWS +					// $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT']; +					$is_mysql = true; + +				break; + +				case 'sqlite': +					$sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id'; +					$sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results +							FROM (' . $this->db->sql_build_query('SELECT', $sql_array_count) . ')'; + +				// no break + +				default: +					$sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results'; +					$sql = (!$sql) ? $this->db->sql_build_query('SELECT', $sql_array_count) : $sql; + +					$result = $this->db->sql_query($sql); +					$total_results = (int) $this->db->sql_fetchfield('total_results'); +					$this->db->sql_freeresult($result); + +					if (!$total_results) +					{ +						return false; +					} +				break; +			} + +			unset($sql_array_count, $sql); +		} + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); + +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_array['FROM'][USERS_TABLE] = 'u'; +				$sql_where[] = 'u.user_id = p.poster_id '; +			break; + +			case 't': +				$left_join_topics = true; +			break; + +			case 'f': +				$sql_array['FROM'][FORUMS_TABLE] = 'f'; +				$sql_where[] = 'f.forum_id = p.forum_id'; +			break; +		} + +		if ($left_join_topics) +		{ +			$sql_array['LEFT_JOIN'][] = array( +				'FROM'	=> array(TOPICS_TABLE => 't'), +				'ON'	=> 'p.topic_id = t.topic_id' +			); +		} + +		$sql_array['WHERE'] = implode(' AND ', $sql_where); +		$sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : ''; +		$sql_array['ORDER_BY'] = $sql_sort; + +		unset($sql_where, $sql_sort, $group_by); + +		$sql = $this->db->sql_build_query('SELECT', $sql_array); +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; +		} +		$this->db->sql_freeresult($result); + +		// if we use mysql and the total result count is not cached yet, retrieve it from the db +		if (!$total_results && $is_mysql) +		{ +			// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it +			$sql_array_copy = $sql_array; +			$sql_array_copy['SELECT'] = 'SQL_CALC_FOUND_ROWS p.post_id '; + +			$sql_calc = $this->db->sql_build_query('SELECT', $sql_array_copy); +			unset($sql_array_copy); + +			$this->db->sql_query($sql_calc); +			$this->db->sql_freeresult($result); + +			$sql_count = 'SELECT FOUND_ROWS() as total_results'; +			$result = $this->db->sql_query($sql_count); +			$total_results = (int) $this->db->sql_fetchfield('total_results'); +			$this->db->sql_freeresult($result); + +			if (!$total_results) +			{ +				return false; +			} +		} + +		if ($start >= $total_results) +		{ +			$start = floor(($total_results - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; +			} +			$this->db->sql_freeresult($result); + +		} + +		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page +		$this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir); +		$id_ary = array_slice($id_ary, 0, (int) $per_page); + +		return $total_results; +	} + +	/** +	* Performs a search on an author's posts without caring about message contents. Depends on display specific params +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No author? No posts +		if (!sizeof($author_ary)) +		{ +			return 0; +		} + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			'', +			$type, +			($firstpost_only) ? 'firstpost' : '', +			'', +			'', +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary), +			$author_name, +		))); + +		// try reading the results from cache +		$total_results = 0; +		if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $total_results; +		} + +		$id_ary = array(); + +		// Create some display specific sql strings +		if ($author_name) +		{ +			// first one matches post of registered users, second one guests and deleted users +			$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +		} +		else +		{ +			$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); +		} +		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; +		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; +		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; +		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : ''; +		$post_visibility = ($post_visibility) ? ' AND ' . $post_visibility : ''; + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); +		$sql_sort_table = $sql_sort_join = ''; +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_sort_table	= USERS_TABLE . ' u, '; +				$sql_sort_join	= ' AND u.user_id = p.poster_id '; +			break; + +			case 't': +				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : ''; +				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : ''; +			break; + +			case 'f': +				$sql_sort_table	= FORUMS_TABLE . ' f, '; +				$sql_sort_join	= ' AND f.forum_id = p.forum_id '; +			break; +		} + +		$select = ($type == 'posts') ? 'p.post_id' : 't.topic_id'; +		$is_mysql = false; + +		// If the cache was completely empty count the results +		if (!$total_results) +		{ +			switch ($this->db->sql_layer) +			{ +				case 'mysql4': +				case 'mysqli': +//					$select = 'SQL_CALC_FOUND_ROWS ' . $select; +					$is_mysql = true; +				break; + +				default: +					if ($type == 'posts') +					{ +						$sql = 'SELECT COUNT(p.post_id) as total_results +							FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " +							WHERE $sql_author +								$sql_topic_id +								$sql_firstpost +								$post_visibility +								$sql_fora +								$sql_time"; +					} +					else +					{ +						if ($this->db->sql_layer == 'sqlite') +						{ +							$sql = 'SELECT COUNT(topic_id) as total_results +								FROM (SELECT DISTINCT t.topic_id'; +						} +						else +						{ +							$sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results'; +						} + +						$sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p +							WHERE $sql_author +								$sql_topic_id +								$sql_firstpost +								$post_visibility +								$sql_fora +								AND t.topic_id = p.topic_id +								$sql_time" . (($this->db->sql_layer == 'sqlite') ? ')' : ''); +					} +					$result = $this->db->sql_query($sql); + +					$total_results = (int) $this->db->sql_fetchfield('total_results'); +					$this->db->sql_freeresult($result); + +					if (!$total_results) +					{ +						return false; +					} +				break; +			} +		} + +		// Build the query for really selecting the post_ids +		if ($type == 'posts') +		{ +			$sql = "SELECT $select +				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . " +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$post_visibility +					$sql_fora +					$sql_sort_join +					$sql_time +				ORDER BY $sql_sort"; +			$field = 'post_id'; +		} +		else +		{ +			$sql = "SELECT $select +				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$post_visibility +					$sql_fora +					AND t.topic_id = p.topic_id +					$sql_sort_join +					$sql_time +				GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . ' +				ORDER BY ' . $sql_sort; +			$field = 'topic_id'; +		} + +		// Only read one block of posts from the db and then cache it +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = (int) $row[$field]; +		} +		$this->db->sql_freeresult($result); + +		if (!$total_results && $is_mysql) +		{ +			// Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it. +			$sql_calc = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql); + +			$this->db->sql_query($sql_calc); +			$this->db->sql_freeresult($result); + +			$sql_count = 'SELECT FOUND_ROWS() as total_results'; +			$result = $this->db->sql_query($sql_count); +			$total_results = (int) $this->db->sql_fetchfield('total_results'); +			$this->db->sql_freeresult($result); + +			if (!$total_results) +			{ +				return false; +			} +		} + +		if ($start >= $total_results) +		{ +			$start = floor(($total_results - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = (int) $row[$field]; +			} +			$this->db->sql_freeresult($result); +		} + +		if (sizeof($id_ary)) +		{ +			$this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir); +			$id_ary = array_slice($id_ary, 0, $per_page); + +			return $total_results; +		} +		return false; +	} + +	/** +	* Split a text into words of a given length +	* +	* The text is converted to UTF-8, cleaned up, and split. Then, words that +	* conform to the defined length range are returned in an array. +	* +	* NOTE: duplicates are NOT removed from the return array +	* +	* @param	string	$text	Text to split, encoded in UTF-8 +	* @return	array			Array of UTF-8 words +	*/ +	public function split_message($text) +	{ +		$match = $words = array(); + +		/** +		* Taken from the original code +		*/ +		// Do not index code +		$match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; +		// BBcode +		$match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#'; + +		$min = $this->word_length['min']; +		$max = $this->word_length['max']; + +		$isset_min = $min - 1; + +		/** +		* Clean up the string, remove HTML tags, remove BBCodes +		*/ +		$word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' '); + +		while (strlen($word)) +		{ +			if (strlen($word) > 255 || strlen($word) <= $isset_min) +			{ +				/** +				* Words longer than 255 bytes are ignored. This will have to be +				* changed whenever we change the length of search_wordlist.word_text +				* +				* Words shorter than $isset_min bytes are ignored, too +				*/ +				$word = strtok(' '); +				continue; +			} + +			$len = utf8_strlen($word); + +			/** +			* Test whether the word is too short to be indexed. +			* +			* Note that this limit does NOT apply to CJK and Hangul +			*/ +			if ($len < $min) +			{ +				/** +				* Note: this could be optimized. If the codepoint is lower than Hangul's range +				* we know that it will also be lower than CJK ranges +				*/ +				if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0) +					&& (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0) +					&& (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0)) +				{ +					$word = strtok(' '); +					continue; +				} +			} + +			$words[] = $word; +			$word = strtok(' '); +		} + +		return $words; +	} + +	/** +	* Updates wordlist and wordmatch tables when a message is posted or changed +	* +	* @param	string	$mode		Contains the post mode: edit, post, reply, quote +	* @param	int		$post_id	The id of the post which is modified/created +	* @param	string	&$message	New or updated post content +	* @param	string	&$subject	New or updated post subject +	* @param	int		$poster_id	Post author's user id +	* @param	int		$forum_id	The id of the forum in which the post is located +	*/ +	public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) +	{ +		if (!$this->config['fulltext_native_load_upd']) +		{ +			/** +			* The search indexer is disabled, return +			*/ +			return; +		} + +		// Split old and new post/subject to obtain array of 'words' +		$split_text = $this->split_message($message); +		$split_title = $this->split_message($subject); + +		$cur_words = array('post' => array(), 'title' => array()); + +		$words = array(); +		if ($mode == 'edit') +		{ +			$words['add']['post'] = array(); +			$words['add']['title'] = array(); +			$words['del']['post'] = array(); +			$words['del']['title'] = array(); + +			$sql = 'SELECT w.word_id, w.word_text, m.title_match +				FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m +				WHERE m.post_id = $post_id +					AND w.word_id = m.word_id"; +			$result = $this->db->sql_query($sql); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$which = ($row['title_match']) ? 'title' : 'post'; +				$cur_words[$which][$row['word_text']] = $row['word_id']; +			} +			$this->db->sql_freeresult($result); + +			$words['add']['post'] = array_diff($split_text, array_keys($cur_words['post'])); +			$words['add']['title'] = array_diff($split_title, array_keys($cur_words['title'])); +			$words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text); +			$words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title); +		} +		else +		{ +			$words['add']['post'] = $split_text; +			$words['add']['title'] = $split_title; +			$words['del']['post'] = array(); +			$words['del']['title'] = array(); +		} +		unset($split_text); +		unset($split_title); + +		// Get unique words from the above arrays +		$unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title'])); + +		// We now have unique arrays of all words to be added and removed and +		// individual arrays of added and removed words for text and title. What +		// we need to do now is add the new words (if they don't already exist) +		// and then add (or remove) matches between the words and this post +		if (sizeof($unique_add_words)) +		{ +			$sql = 'SELECT word_id, word_text +				FROM ' . SEARCH_WORDLIST_TABLE . ' +				WHERE ' . $this->db->sql_in_set('word_text', $unique_add_words); +			$result = $this->db->sql_query($sql); + +			$word_ids = array(); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$word_ids[$row['word_text']] = $row['word_id']; +			} +			$this->db->sql_freeresult($result); +			$new_words = array_diff($unique_add_words, array_keys($word_ids)); + +			$this->db->sql_transaction('begin'); +			if (sizeof($new_words)) +			{ +				$sql_ary = array(); + +				foreach ($new_words as $word) +				{ +					$sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0); +				} +				$this->db->sql_return_on_error(true); +				$this->db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary); +				$this->db->sql_return_on_error(false); +			} +			unset($new_words, $sql_ary); +		} +		else +		{ +			$this->db->sql_transaction('begin'); +		} + +		// now update the search match table, remove links to removed words and add links to new words +		foreach ($words['del'] as $word_in => $word_ary) +		{ +			$title_match = ($word_in == 'title') ? 1 : 0; + +			if (sizeof($word_ary)) +			{ +				$sql_in = array(); +				foreach ($word_ary as $word) +				{ +					$sql_in[] = $cur_words[$word_in][$word]; +				} + +				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' +					WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . ' +						AND post_id = ' . intval($post_id) . " +						AND title_match = $title_match"; +				$this->db->sql_query($sql); + +				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' +					SET word_count = word_count - 1 +					WHERE ' . $this->db->sql_in_set('word_id', $sql_in) . ' +						AND word_count > 0'; +				$this->db->sql_query($sql); + +				unset($sql_in); +			} +		} + +		$this->db->sql_return_on_error(true); +		foreach ($words['add'] as $word_in => $word_ary) +		{ +			$title_match = ($word_in == 'title') ? 1 : 0; + +			if (sizeof($word_ary)) +			{ +				$sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match) +					SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . ' +					FROM ' . SEARCH_WORDLIST_TABLE . ' +					WHERE ' . $this->db->sql_in_set('word_text', $word_ary); +				$this->db->sql_query($sql); + +				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' +					SET word_count = word_count + 1 +					WHERE ' . $this->db->sql_in_set('word_text', $word_ary); +				$this->db->sql_query($sql); +			} +		} +		$this->db->sql_return_on_error(false); + +		$this->db->sql_transaction('commit'); + +		// destroy cached search results containing any of the words removed or added +		$this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id)); + +		unset($unique_add_words); +		unset($words); +		unset($cur_words); +	} + +	/** +	* Removes entries from the wordmatch table for the specified post_ids +	*/ +	public function index_remove($post_ids, $author_ids, $forum_ids) +	{ +		if (sizeof($post_ids)) +		{ +			$sql = 'SELECT w.word_id, w.word_text, m.title_match +				FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w +				WHERE ' . $this->db->sql_in_set('m.post_id', $post_ids) . ' +					AND w.word_id = m.word_id'; +			$result = $this->db->sql_query($sql); + +			$message_word_ids = $title_word_ids = $word_texts = array(); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				if ($row['title_match']) +				{ +					$title_word_ids[] = $row['word_id']; +				} +				else +				{ +					$message_word_ids[] = $row['word_id']; +				} +				$word_texts[] = $row['word_text']; +			} +			$this->db->sql_freeresult($result); + +			if (sizeof($title_word_ids)) +			{ +				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' +					SET word_count = word_count - 1 +					WHERE ' . $this->db->sql_in_set('word_id', $title_word_ids) . ' +						AND word_count > 0'; +				$this->db->sql_query($sql); +			} + +			if (sizeof($message_word_ids)) +			{ +				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' +					SET word_count = word_count - 1 +					WHERE ' . $this->db->sql_in_set('word_id', $message_word_ids) . ' +						AND word_count > 0'; +				$this->db->sql_query($sql); +			} + +			unset($title_word_ids); +			unset($message_word_ids); + +			$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' +				WHERE ' . $this->db->sql_in_set('post_id', $post_ids); +			$this->db->sql_query($sql); +		} + +		$this->destroy_cache(array_unique($word_texts), array_unique($author_ids)); +	} + +	/** +	* Tidy up indexes: Tag 'common words' and remove +	* words no longer referenced in the match table +	*/ +	public function tidy() +	{ +		// Is the fulltext indexer disabled? If yes then we need not +		// carry on ... it's okay ... I know when I'm not wanted boo hoo +		if (!$this->config['fulltext_native_load_upd']) +		{ +			set_config('search_last_gc', time(), true); +			return; +		} + +		$destroy_cache_words = array(); + +		// Remove common words +		if ($this->config['num_posts'] >= 100 && $this->config['fulltext_native_common_thres']) +		{ +			$common_threshold = ((double) $this->config['fulltext_native_common_thres']) / 100.0; +			// First, get the IDs of common words +			$sql = 'SELECT word_id, word_text +				FROM ' . SEARCH_WORDLIST_TABLE . ' +				WHERE word_count > ' . floor($this->config['num_posts'] * $common_threshold) . ' +					OR word_common = 1'; +			$result = $this->db->sql_query($sql); + +			$sql_in = array(); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$sql_in[] = $row['word_id']; +				$destroy_cache_words[] = $row['word_text']; +			} +			$this->db->sql_freeresult($result); + +			if (sizeof($sql_in)) +			{ +				// Flag the words +				$sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' +					SET word_common = 1 +					WHERE ' . $this->db->sql_in_set('word_id', $sql_in); +				$this->db->sql_query($sql); + +				// by setting search_last_gc to the new time here we make sure that if a user reloads because the +				// following query takes too long, he won't run into it again +				set_config('search_last_gc', time(), true); + +				// Delete the matches +				$sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' +					WHERE ' . $this->db->sql_in_set('word_id', $sql_in); +				$this->db->sql_query($sql); +			} +			unset($sql_in); +		} + +		if (sizeof($destroy_cache_words)) +		{ +			// destroy cached search results containing any of the words that are now common or were removed +			$this->destroy_cache(array_unique($destroy_cache_words)); +		} + +		set_config('search_last_gc', time(), true); +	} + +	/** +	* Deletes all words from the index +	*/ +	public function delete_index($acp_module, $u_action) +	{ +		switch ($this->db->sql_layer) +		{ +			case 'sqlite': +			case 'firebird': +				$this->db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE); +				$this->db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE); +				$this->db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE); +			break; + +			default: +				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE); +				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE); +				$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); +			break; +		} +	} + +	/** +	* Returns true if both FULLTEXT indexes exist +	*/ +	public function index_created() +	{ +		if (!sizeof($this->stats)) +		{ +			$this->get_stats(); +		} + +		return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false; +	} + +	/** +	* Returns an associative array containing information about the indexes +	*/ +	public function index_stats() +	{ +		if (!sizeof($this->stats)) +		{ +			$this->get_stats(); +		} + +		return array( +			$this->user->lang['TOTAL_WORDS']		=> $this->stats['total_words'], +			$this->user->lang['TOTAL_MATCHES']	=> $this->stats['total_matches']); +	} + +	protected function get_stats() +	{ +		$this->stats['total_words']		= $this->db->get_estimated_row_count(SEARCH_WORDLIST_TABLE); +		$this->stats['total_matches']	= $this->db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE); +	} + +	/** +	* Clean up a text to remove non-alphanumeric characters +	* +	* This method receives a UTF-8 string, normalizes and validates it, replaces all +	* non-alphanumeric characters with strings then returns the result. +	* +	* Any number of "allowed chars" can be passed as a UTF-8 string in NFC. +	* +	* @param	string	$text			Text to split, in UTF-8 (not normalized or sanitized) +	* @param	string	$allowed_chars	String of special chars to allow +	* @param	string	$encoding		Text encoding +	* @return	string					Cleaned up text, only alphanumeric chars are left +	* +	* @todo \normalizer::cleanup being able to be used? +	*/ +	protected function cleanup($text, $allowed_chars = null, $encoding = 'utf-8') +	{ +		static $conv = array(), $conv_loaded = array(); +		$words = $allow = array(); + +		// Convert the text to UTF-8 +		$encoding = strtolower($encoding); +		if ($encoding != 'utf-8') +		{ +			$text = utf8_recode($text, $encoding); +		} + +		$utf_len_mask = array( +			"\xC0"	=>	2, +			"\xD0"	=>	2, +			"\xE0"	=>	3, +			"\xF0"	=>	4 +		); + +		/** +		* Replace HTML entities and NCRs +		*/ +		$text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES); + +		/** +		* Load the UTF-8 normalizer +		* +		* If we use it more widely, an instance of that class should be held in a +		* a global variable instead +		*/ +		\utf_normalizer::nfc($text); + +		/** +		* The first thing we do is: +		* +		* - convert ASCII-7 letters to lowercase +		* - remove the ASCII-7 non-alpha characters +		* - remove the bytes that should not appear in a valid UTF-8 string: 0xC0, +		*   0xC1 and 0xF5-0xFF +		* +		* @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars +		*/ +		$sb_match	= "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; +		$sb_replace	= 'istcpamelrdojbnhfgvwuqkyxz                                                                              '; + +		/** +		* This is the list of legal ASCII chars, it is automatically extended +		* with ASCII chars from $allowed_chars +		*/ +		$legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z'; + +		/** +		* Prepare an array containing the extra chars to allow +		*/ +		if (isset($allowed_chars[0])) +		{ +			$pos = 0; +			$len = strlen($allowed_chars); +			do +			{ +				$c = $allowed_chars[$pos]; + +				if ($c < "\x80") +				{ +					/** +					* ASCII char +					*/ +					$sb_pos = strpos($sb_match, $c); +					if (is_int($sb_pos)) +					{ +						/** +						* Remove the char from $sb_match and its corresponding +						* replacement in $sb_replace +						*/ +						$sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1); +						$sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1); +						$legal_ascii .= $c; +					} + +					++$pos; +				} +				else +				{ +					/** +					* UTF-8 char +					*/ +					$utf_len = $utf_len_mask[$c & "\xF0"]; +					$allow[substr($allowed_chars, $pos, $utf_len)] = 1; +					$pos += $utf_len; +				} +			} +			while ($pos < $len); +		} + +		$text = strtr($text, $sb_match, $sb_replace); +		$ret = ''; + +		$pos = 0; +		$len = strlen($text); + +		do +		{ +			/** +			* Do all consecutive ASCII chars at once +			*/ +			if ($spn = strspn($text, $legal_ascii, $pos)) +			{ +				$ret .= substr($text, $pos, $spn); +				$pos += $spn; +			} + +			if ($pos >= $len) +			{ +				return $ret; +			} + +			/** +			* Capture the UTF char +			*/ +			$utf_len = $utf_len_mask[$text[$pos] & "\xF0"]; +			$utf_char = substr($text, $pos, $utf_len); +			$pos += $utf_len; + +			if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST) +				|| ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST) +				|| ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST)) +			{ +				/** +				* All characters within these ranges are valid +				* +				* We separate them with a space in order to index each character +				* individually +				*/ +				$ret .= ' ' . $utf_char . ' '; +				continue; +			} + +			if (isset($allow[$utf_char])) +			{ +				/** +				* The char is explicitly allowed +				*/ +				$ret .= $utf_char; +				continue; +			} + +			if (isset($conv[$utf_char])) +			{ +				/** +				* The char is mapped to something, maybe to itself actually +				*/ +				$ret .= $conv[$utf_char]; +				continue; +			} + +			/** +			* The char isn't mapped, but did we load its conversion table? +			* +			* The search indexer table is split into blocks. The block number of +			* each char is equal to its codepoint right-shifted for 11 bits. It +			* means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or +			* 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus, +			* all UTF chars encoded in 2 bytes are in the same first block. +			*/ +			if (isset($utf_char[2])) +			{ +				if (isset($utf_char[3])) +				{ +					/** +					* 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx +					* 0000 0111 0011 1111 0010 0000 +					*/ +					$idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5); +				} +				else +				{ +					/** +					* 1110 nnnn 10nx xxxx 10xx xxxx +					* 0000 0111 0010 0000 +					*/ +					$idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5); +				} +			} +			else +			{ +				/** +				* 110x xxxx 10xx xxxx +				* 0000 0000 0000 0000 +				*/ +				$idx = 0; +			} + +			/** +			* Check if the required conv table has been loaded already +			*/ +			if (!isset($conv_loaded[$idx])) +			{ +				$conv_loaded[$idx] = 1; +				$file = $this->phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $this->php_ext; + +				if (file_exists($file)) +				{ +					$conv += include($file); +				} +			} + +			if (isset($conv[$utf_char])) +			{ +				$ret .= $conv[$utf_char]; +			} +			else +			{ +				/** +				* We add an entry to the conversion table so that we +				* don't have to convert to codepoint and perform the checks +				* that are above this block +				*/ +				$conv[$utf_char] = ' '; +				$ret .= ' '; +			} +		} +		while (1); + +		return $ret; +	} + +	/** +	* Returns a list of options for the ACP to display +	*/ +	public function acp() +	{ +		/** +		* if we need any options, copied from fulltext_native for now, will have to be adjusted or removed +		*/ + +		$tpl = ' +		<dl> +			<dt><label for="fulltext_native_load_upd">' . $this->user->lang['YES_SEARCH_UPDATE'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt> +			<dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$this->config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $this->user->lang['NO'] . '</label></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_native_min_chars">' . $this->user->lang['MIN_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_native_min_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_min_chars]" value="' . (int) $this->config['fulltext_native_min_chars'] . '" /></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_native_max_chars">' . $this->user->lang['MAX_SEARCH_CHARS'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_native_max_chars" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_native_max_chars]" value="' . (int) $this->config['fulltext_native_max_chars'] . '" /></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_native_common_thres">' . $this->user->lang['COMMON_WORD_THRESHOLD'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $this->config['fulltext_native_common_thres'] . '" /> %</dd> +		</dl> +		'; + +		// These are fields required in the config table +		return array( +			'tpl'		=> $tpl, +			'config'	=> array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100') +		); +	} +} diff --git a/phpBB/phpbb/search/fulltext_postgres.php b/phpBB/phpbb/search/fulltext_postgres.php new file mode 100644 index 0000000000..63caeffcc5 --- /dev/null +++ b/phpBB/phpbb/search/fulltext_postgres.php @@ -0,0 +1,955 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search; + +/** +* fulltext_postgres +* Fulltext search for PostgreSQL +* @package search +*/ +class fulltext_postgres extends \phpbb\search\base +{ +	/** +	 * Associative array holding index stats +	 * @var array +	 */ +	protected $stats = array(); + +	/** +	 * Holds the words entered by user, obtained by splitting the entered query on whitespace +	 * @var array +	 */ +	protected $split_words = array(); + +	/** +	 * True if PostgreSQL version supports tsearch +	 * @var boolean +	 */ +	protected $tsearch_usable = false; + +	/** +	 * Stores the PostgreSQL version +	 * @var string +	 */ +	protected $version; + +	/** +	 * Stores the tsearch query +	 * @var string +	 */ +	protected $tsearch_query; + +	/** +	 * True if phrase search is supported. +	 * PostgreSQL fulltext currently doesn't support it +	 * @var boolean +	 */ +	protected $phrase_search = false; + +	/** +	 * Config object +	 * @var \phpbb\config\config +	 */ +	protected $config; + +	/** +	 * Database connection +	 * @var \phpbb\db\driver\driver +	 */ +	protected $db; + +	/** +	 * User object +	 * @var \phpbb\user +	 */ +	protected $user; + +	/** +	 * Contains tidied search query. +	 * Operators are prefixed in search query and common words excluded +	 * @var string +	 */ +	protected $search_query; + +	/** +	 * Contains common words. +	 * Common words are words with length less/more than min/max length +	 * @var array +	 */ +	protected $common_words = array(); + +	/** +	 * Associative array stores the min and max word length to be searched +	 * @var array +	 */ +	protected $word_length = array(); + +	/** +	 * Constructor +	 * Creates a new \phpbb\search\fulltext_postgres, which is used as a search backend +	 * +	 * @param string|bool $error Any error that occurs is passed on through this reference variable otherwise false +	 */ +	public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user) +	{ +		$this->config = $config; +		$this->db = $db; +		$this->user = $user; + +		$this->word_length = array('min' => $this->config['fulltext_postgres_min_word_len'], 'max' => $this->config['fulltext_postgres_max_word_len']); + +		if ($this->db->sql_layer == 'postgres') +		{ +			$pgsql_version = explode(',', substr($this->db->sql_server_info(), 10)); +			$this->version = trim($pgsql_version[0]); +			if (version_compare($this->version, '8.3', '>=')) +			{ +				$this->tsearch_usable = true; +			} +		} + +		/** +		 * Load the UTF tools +		 */ +		if (!function_exists('utf8_strlen')) +		{ +			include($phpbb_root_path . 'includes/utf/utf_tools.' . $phpEx); +		} + +		$error = false; +	} + +	/** +	* Returns the name of this search backend to be displayed to administrators +	* +	* @return string Name +	*/ +	public function get_name() +	{ +		return 'PostgreSQL Fulltext'; +	} + +	/** +	 * Returns the search_query +	 * +	 * @return string search query +	 */ +	public function get_search_query() +	{ +		return $this->search_query; +	} + +	/** +	 * Returns the common_words array +	 * +	 * @return array common words that are ignored by search backend +	 */ +	public function get_common_words() +	{ +		return $this->common_words; +	} + +	/** +	 * Returns the word_length array +	 * +	 * @return array min and max word length for searching +	 */ +	public function get_word_length() +	{ +		return $this->word_length; +	} + +	/** +	 * Returns if phrase search is supported or not +	 * +	 * @return bool +	 */ +	public function supports_phrase_search() +	{ +		return $this->phrase_search; +	} + +	/** +	* Checks for correct PostgreSQL version and stores min/max word length in the config +	* +	* @return string|bool Language key of the error/incompatiblity occurred +	*/ +	public function init() +	{ +		if ($this->db->sql_layer != 'postgres') +		{ +			return $this->user->lang['FULLTEXT_POSTGRES_INCOMPATIBLE_DATABASE']; +		} + +		if (!$this->tsearch_usable) +		{ +			return $this->user->lang['FULLTEXT_POSTGRES_TS_NOT_USABLE']; +		} + +		return false; +	} + +	/** +	* Splits keywords entered by a user into an array of words stored in $this->split_words +	* Stores the tidied search query in $this->search_query +	* +	* @param	string	&$keywords	Contains the keyword as entered by the user +	* @param	string	$terms	is either 'all' or 'any' +	* @return	bool	false	if no valid keywords were found and otherwise true +	*/ +	public function split_keywords(&$keywords, $terms) +	{ +		if ($terms == 'all') +		{ +			$match		= array('#\sand\s#iu', '#\sor\s#iu', '#\snot\s#iu', '#(^|\s)\+#', '#(^|\s)-#', '#(^|\s)\|#'); +			$replace	= array(' +', ' |', ' -', ' +', ' -', ' |'); + +			$keywords = preg_replace($match, $replace, $keywords); +		} + +		// Filter out as above +		$split_keywords = preg_replace("#[\"\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords))); + +		// Split words +		$split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords))); +		$matches = array(); +		preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches); +		$this->split_words = $matches[1]; + +		foreach ($this->split_words as $i => $word) +		{ +			$clean_word = preg_replace('#^[+\-|"]#', '', $word); + +			// check word length +			$clean_len = utf8_strlen(str_replace('*', '', $clean_word)); +			if (($clean_len < $this->config['fulltext_postgres_min_word_len']) || ($clean_len > $this->config['fulltext_postgres_max_word_len'])) +			{ +				$this->common_words[] = $word; +				unset($this->split_words[$i]); +			} +		} + +		if ($terms == 'any') +		{ +			$this->search_query = ''; +			$this->tsearch_query = ''; +			foreach ($this->split_words as $word) +			{ +				if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0)) +				{ +					$word = substr($word, 1); +				} +				$this->search_query .= $word . ' '; +				$this->tsearch_query .= '|' . $word . ' '; +			} +		} +		else +		{ +			$this->search_query = ''; +			$this->tsearch_query = ''; +			foreach ($this->split_words as $word) +			{ +				if (strpos($word, '+') === 0) +				{ +					$this->search_query .= $word . ' '; +					$this->tsearch_query .= '&' . substr($word, 1) . ' '; +				} +				elseif (strpos($word, '-') === 0) +				{ +					$this->search_query .= $word . ' '; +					$this->tsearch_query .= '&!' . substr($word, 1) . ' '; +				} +				elseif (strpos($word, '|') === 0) +				{ +					$this->search_query .= $word . ' '; +					$this->tsearch_query .= '|' . substr($word, 1) . ' '; +				} +				else +				{ +					$this->search_query .= '+' . $word . ' '; +					$this->tsearch_query .= '&' . $word . ' '; +				} +			} +		} + +		$this->tsearch_query = substr($this->tsearch_query, 1); +		$this->search_query = utf8_htmlspecialchars($this->search_query); + +		if ($this->search_query) +		{ +			$this->split_words = array_values($this->split_words); +			sort($this->split_words); +			return true; +		} +		return false; +	} + +	/** +	* Turns text into an array of words +	* @param string $text contains post text/subject +	*/ +	public function split_message($text) +	{ +		// Split words +		$text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text))); +		$matches = array(); +		preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches); +		$text = $matches[1]; + +		// remove too short or too long words +		$text = array_values($text); +		for ($i = 0, $n = sizeof($text); $i < $n; $i++) +		{ +			$text[$i] = trim($text[$i]); +			if (utf8_strlen($text[$i]) < $this->config['fulltext_postgres_min_word_len'] || utf8_strlen($text[$i]) > $this->config['fulltext_postgres_max_word_len']) +			{ +				unset($text[$i]); +			} +		} + +		return array_values($text); +	} + +	/** +	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) +	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No keywords? No posts +		if (!$this->search_query) +		{ +			return false; +		} + +		// When search query contains queries like -foo +		if (strpos($this->search_query, '+') === false) +		{ +			return false; +		} + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			implode(', ', $this->split_words), +			$type, +			$fields, +			$terms, +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary) +		))); + +		if ($start < 0) +		{ +			$start = 0; +		} + +		// try reading the results from cache +		$result_count = 0; +		if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $result_count; +		} + +		$id_ary = array(); + +		$join_topic = ($type == 'posts') ? false : true; + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); +		$sql_sort_table = $sql_sort_join = ''; + +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_sort_table	= USERS_TABLE . ' u, '; +				$sql_sort_join	= ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster '; +			break; + +			case 't': +				$join_topic = true; +			break; + +			case 'f': +				$sql_sort_table	= FORUMS_TABLE . ' f, '; +				$sql_sort_join	= ' AND f.forum_id = p.forum_id '; +			break; +		} + +		// Build some display specific sql strings +		switch ($fields) +		{ +			case 'titleonly': +				$sql_match = 'p.post_subject'; +				$sql_match_where = ' AND p.post_id = t.topic_first_post_id'; +				$join_topic = true; +			break; + +			case 'msgonly': +				$sql_match = 'p.post_text'; +				$sql_match_where = ''; +			break; + +			case 'firstpost': +				$sql_match = 'p.post_subject, p.post_text'; +				$sql_match_where = ' AND p.post_id = t.topic_first_post_id'; +				$join_topic = true; +			break; + +			default: +				$sql_match = 'p.post_subject, p.post_text'; +				$sql_match_where = ''; +			break; +		} + +		$sql_select			= ($type == 'posts') ? 'p.post_id' : 'DISTINCT t.topic_id'; +		$sql_from			= ($join_topic) ? TOPICS_TABLE . ' t, ' : ''; +		$field				= ($type == 'posts') ? 'post_id' : 'topic_id'; +		$sql_author			= (sizeof($author_ary) == 1) ? ' = ' . $author_ary[0] : 'IN (' . implode(', ', $author_ary) . ')'; + +		if (sizeof($author_ary) && $author_name) +		{ +			// first one matches post of registered users, second one guests and deleted users +			$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +		} +		else if (sizeof($author_ary)) +		{ +			$sql_author = ' AND ' . $this->db->sql_in_set('p.poster_id', $author_ary); +		} +		else +		{ +			$sql_author = ''; +		} + +		$sql_where_options = $sql_sort_join; +		$sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : ''; +		$sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : ''; +		$sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; +		$sql_where_options .= ' AND ' . $post_visibility; +		$sql_where_options .= $sql_author; +		$sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; +		$sql_where_options .= $sql_match_where; + +		$tmp_sql_match = array(); +		$sql_match = str_replace(',', " || ' ' ||", $sql_match); +		$tmp_sql_match = "to_tsvector ('" . $this->db->sql_escape($this->config['fulltext_postgres_ts_name']) . "', " . $sql_match . ") @@ to_tsquery ('" . $this->db->sql_escape($this->config['fulltext_postgres_ts_name']) . "', '" . $this->db->sql_escape($this->tsearch_query) . "')"; + +		$this->db->sql_transaction('begin'); + +		$sql_from = "FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p"; +		$sql_where = "WHERE (" . $tmp_sql_match . ") +			$sql_where_options"; +		$sql = "SELECT $sql_select +			$sql_from +			$sql_where +			ORDER BY $sql_sort"; +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = $row[$field]; +		} +		$this->db->sql_freeresult($result); + +		$id_ary = array_unique($id_ary); + +		// if the total result count is not cached yet, retrieve it from the db +		if (!$result_count) +		{ +			$sql_count = "SELECT COUNT(*) as result_count +				$sql_from +				$sql_where"; +			$result = $this->db->sql_query($sql_count); +			$result_count = (int) $this->db->sql_fetchfield('result_count'); +			$this->db->sql_freeresult($result); + +			if (!$result_count) +			{ +				return false; +			} +		} + +		$this->db->sql_transaction('commit'); + +		if ($start >= $result_count) +		{ +			$start = floor(($result_count - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = $row[$field]; +			} +			$this->db->sql_freeresult($result); + +			$id_ary = array_unique($id_ary); +		} + +		// store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page +		$this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir); +		$id_ary = array_slice($id_ary, 0, (int) $per_page); + +		return $result_count; +	} + +	/** +	* Performs a search on an author's posts without caring about message contents. Depends on display specific params +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No author? No posts +		if (!sizeof($author_ary)) +		{ +			return 0; +		} + +		// generate a search_key from all the options to identify the results +		$search_key = md5(implode('#', array( +			'', +			$type, +			($firstpost_only) ? 'firstpost' : '', +			'', +			'', +			$sort_days, +			$sort_key, +			$topic_id, +			implode(',', $ex_fid_ary), +			$post_visibility, +			implode(',', $author_ary), +			$author_name, +		))); + +		if ($start < 0) +		{ +			$start = 0; +		} + +		// try reading the results from cache +		$result_count = 0; +		if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) +		{ +			return $result_count; +		} + +		$id_ary = array(); + +		// Create some display specific sql strings +		if ($author_name) +		{ +			// first one matches post of registered users, second one guests and deleted users +			$sql_author = '(' . $this->db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; +		} +		else +		{ +			$sql_author = $this->db->sql_in_set('p.poster_id', $author_ary); +		} +		$sql_fora		= (sizeof($ex_fid_ary)) ? ' AND ' . $this->db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; +		$sql_topic_id	= ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; +		$sql_time		= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; +		$sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : ''; + +		// Build sql strings for sorting +		$sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); +		$sql_sort_table = $sql_sort_join = ''; +		switch ($sql_sort[0]) +		{ +			case 'u': +				$sql_sort_table	= USERS_TABLE . ' u, '; +				$sql_sort_join	= ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster '; +			break; + +			case 't': +				$sql_sort_table	= ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : ''; +				$sql_sort_join	= ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : ''; +			break; + +			case 'f': +				$sql_sort_table	= FORUMS_TABLE . ' f, '; +				$sql_sort_join	= ' AND f.forum_id = p.forum_id '; +			break; +		} + +		$m_approve_fid_sql = ' AND ' . $post_visibility; + +		// Build the query for really selecting the post_ids +		if ($type == 'posts') +		{ +			$sql = "SELECT p.post_id +				FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$m_approve_fid_sql +					$sql_fora +					$sql_sort_join +					$sql_time +				ORDER BY $sql_sort"; +			$field = 'post_id'; +		} +		else +		{ +			$sql = "SELECT t.topic_id +				FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p +				WHERE $sql_author +					$sql_topic_id +					$sql_firstpost +					$m_approve_fid_sql +					$sql_fora +					AND t.topic_id = p.topic_id +					$sql_sort_join +					$sql_time +				GROUP BY t.topic_id, $sort_by_sql[$sort_key] +				ORDER BY $sql_sort"; +			$field = 'topic_id'; +		} + +		$this->db->sql_transaction('begin'); + +		// Only read one block of posts from the db and then cache it +		$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			$id_ary[] = $row[$field]; +		} +		$this->db->sql_freeresult($result); + +		// retrieve the total result count if needed +		if (!$result_count) +		{ +			if ($type == 'posts') +			{ +				$sql_count = "SELECT COUNT(*) as result_count +					FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " +					WHERE $sql_author +						$sql_topic_id +						$sql_firstpost +						$m_approve_fid_sql +						$sql_fora +						$sql_sort_join +						$sql_time"; +			} +			else +			{ +				$sql_count = "SELECT COUNT(*) as result_count +					FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p +					WHERE $sql_author +						$sql_topic_id +						$sql_firstpost +						$m_approve_fid_sql +						$sql_fora +						AND t.topic_id = p.topic_id +						$sql_sort_join +						$sql_time +					GROUP BY t.topic_id, $sort_by_sql[$sort_key]"; +			} + +			$result = $this->db->sql_query($sql_count); +			$result_count = (int) $this->db->sql_fetchfield('result_count'); + +			if (!$result_count) +			{ +				return false; +			} +		} + +		$this->db->sql_transaction('commit'); + +		if ($start >= $result_count) +		{ +			$start = floor(($result_count - 1) / $per_page) * $per_page; + +			$result = $this->db->sql_query_limit($sql, $this->config['search_block_size'], $start); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$id_ary[] = (int) $row[$field]; +			} +			$this->db->sql_freeresult($result); + +			$id_ary = array_unique($id_ary); +		} + +		if (sizeof($id_ary)) +		{ +			$this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir); +			$id_ary = array_slice($id_ary, 0, $per_page); + +			return $result_count; +		} +		return false; +	} + +	/** +	* Destroys cached search results, that contained one of the new words in a post so the results won't be outdated +	* +	* @param	string		$mode		contains the post mode: edit, post, reply, quote ... +	* @param	int			$post_id	contains the post id of the post to index +	* @param	string		$message	contains the post text of the post +	* @param	string		$subject	contains the subject of the post to index +	* @param	int			$poster_id	contains the user id of the poster +	* @param	int			$forum_id	contains the forum id of parent forum of the post +	*/ +	public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) +	{ +		// Split old and new post/subject to obtain array of words +		$split_text = $this->split_message($message); +		$split_title = ($subject) ? $this->split_message($subject) : array(); + +		$words = array_unique(array_merge($split_text, $split_title)); + +		unset($split_text); +		unset($split_title); + +		// destroy cached search results containing any of the words removed or added +		$this->destroy_cache($words, array($poster_id)); + +		unset($words); +	} + +	/** +	* Destroy cached results, that might be outdated after deleting a post +	*/ +	public function index_remove($post_ids, $author_ids, $forum_ids) +	{ +		$this->destroy_cache(array(), $author_ids); +	} + +	/** +	* Destroy old cache entries +	*/ +	public function tidy() +	{ +		// destroy too old cached search results +		$this->destroy_cache(array()); + +		set_config('search_last_gc', time(), true); +	} + +	/** +	* Create fulltext index +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function create_index($acp_module, $u_action) +	{ +		// Make sure we can actually use PostgreSQL with fulltext indexes +		if ($error = $this->init()) +		{ +			return $error; +		} + +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		if (!isset($this->stats['post_subject'])) +		{ +			$this->db->sql_query("CREATE INDEX " . POSTS_TABLE . "_" . $this->config['fulltext_postgres_ts_name'] . "_post_subject ON " . POSTS_TABLE . " USING gin (to_tsvector ('" . $this->db->sql_escape($this->config['fulltext_postgres_ts_name']) . "', post_subject))"); +		} + +		if (!isset($this->stats['post_content'])) +		{ +			$this->db->sql_query("CREATE INDEX " . POSTS_TABLE . "_" . $this->config['fulltext_postgres_ts_name'] . "_post_content ON " . POSTS_TABLE . " USING gin (to_tsvector ('" . $this->db->sql_escape($this->config['fulltext_postgres_ts_name']) . "', post_text || ' ' || post_subject))"); +		} + +		$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); + +		return false; +	} + +	/** +	* Drop fulltext index +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function delete_index($acp_module, $u_action) +	{ +		// Make sure we can actually use PostgreSQL with fulltext indexes +		if ($error = $this->init()) +		{ +			return $error; +		} + +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		if (isset($this->stats['post_subject'])) +		{ +			$this->db->sql_query('DROP INDEX ' . $this->stats['post_subject']['relname']); +		} + +		if (isset($this->stats['post_content'])) +		{ +			$this->db->sql_query('DROP INDEX ' . $this->stats['post_content']['relname']); +		} + +		$this->db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); + +		return false; +	} + +	/** +	* Returns true if both FULLTEXT indexes exist +	*/ +	public function index_created() +	{ +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		return (isset($this->stats['post_subject']) && isset($this->stats['post_content'])) ? true : false; +	} + +	/** +	* Returns an associative array containing information about the indexes +	*/ +	public function index_stats() +	{ +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		return array( +			$this->user->lang['FULLTEXT_POSTGRES_TOTAL_POSTS']			=> ($this->index_created()) ? $this->stats['total_posts'] : 0, +		); +	} + +	/** +	 * Computes the stats and store them in the $this->stats associative array +	 */ +	protected function get_stats() +	{ +		if ($this->db->sql_layer != 'postgres') +		{ +			$this->stats = array(); +			return; +		} + +		$sql = "SELECT c2.relname, pg_catalog.pg_get_indexdef(i.indexrelid, 0, true) AS indexdef +			  FROM pg_catalog.pg_class c1, pg_catalog.pg_index i, pg_catalog.pg_class c2 +			 WHERE c1.relname = '" . POSTS_TABLE . "' +			   AND pg_catalog.pg_table_is_visible(c1.oid) +			   AND c1.oid = i.indrelid +			   AND i.indexrelid = c2.oid"; +		$result = $this->db->sql_query($sql); + +		while ($row = $this->db->sql_fetchrow($result)) +		{ +			// deal with older PostgreSQL versions which didn't use Index_type +			if (strpos($row['indexdef'], 'to_tsvector') !== false) +			{ +				if ($row['relname'] == POSTS_TABLE . '_' . $this->config['fulltext_postgres_ts_name'] . '_post_subject' || $row['relname'] == POSTS_TABLE . '_post_subject') +				{ +					$this->stats['post_subject'] = $row; +				} +				else if ($row['relname'] == POSTS_TABLE . '_' . $this->config['fulltext_postgres_ts_name'] . '_post_content' || $row['relname'] == POSTS_TABLE . '_post_content') +				{ +					$this->stats['post_content'] = $row; +				} +			} +		} +		$this->db->sql_freeresult($result); + +		$this->stats['total_posts'] = $this->config['num_posts']; +	} + +	/** +	* Display various options that can be configured for the backend from the acp +	* +	* @return associative array containing template and config variables +	*/ +	public function acp() +	{ +		$tpl = ' +		<dl> +			<dt><label>' . $this->user->lang['FULLTEXT_POSTGRES_VERSION_CHECK'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_POSTGRES_VERSION_CHECK_EXPLAIN'] . '</span></dt> +			<dd>' . (($this->tsearch_usable) ? $this->user->lang['YES'] : $this->user->lang['NO']) . ' (PostgreSQL ' . $this->version . ')</dd> +		</dl> +		<dl> +			<dt><label>' . $this->user->lang['FULLTEXT_POSTGRES_TS_NAME'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_POSTGRES_TS_NAME_EXPLAIN'] . '</span></dt> +			<dd><select name="config[fulltext_postgres_ts_name]">'; + +		if ($this->db->sql_layer == 'postgres' && $this->tsearch_usable) +		{ +			$sql = 'SELECT cfgname AS ts_name +				  FROM pg_ts_config'; +			$result = $this->db->sql_query($sql); + +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$tpl .= '<option value="' . $row['ts_name'] . '"' . ($row['ts_name'] === $this->config['fulltext_postgres_ts_name'] ? ' selected="selected"' : '') . '>' . $row['ts_name'] . '</option>'; +			} +			$this->db->sql_freeresult($result); +		} +		else +		{ +			$tpl .= '<option value="' . $this->config['fulltext_postgres_ts_name'] . '" selected="selected">' . $this->config['fulltext_postgres_ts_name'] . '</option>'; +		} + +		$tpl .= '</select></dd> +		</dl> +                <dl> +                        <dt><label for="fulltext_postgres_min_word_len">' . $this->user->lang['FULLTEXT_POSTGRES_MIN_WORD_LEN'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_POSTGRES_MIN_WORD_LEN_EXPLAIN'] . '</span></dt> +                        <dd><input id="fulltext_postgres_min_word_len" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_postgres_min_word_len]" value="' . (int) $this->config['fulltext_postgres_min_word_len'] . '" /></dd> +                </dl> +                <dl> +                        <dt><label for="fulltext_postgres_max_word_len">' . $this->user->lang['FULLTEXT_POSTGRES_MAX_WORD_LEN'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_POSTGRES_MAX_WORD_LEN_EXPLAIN'] . '</span></dt> +                        <dd><input id="fulltext_postgres_max_word_len" type="number" size="3" maxlength="3" min="0" max="255" name="config[fulltext_postgres_max_word_len]" value="' . (int) $this->config['fulltext_postgres_max_word_len'] . '" /></dd> +                </dl> +		'; + +		// These are fields required in the config table +		return array( +			'tpl'		=> $tpl, +			'config'	=> array('fulltext_postgres_ts_name' => 'string', 'fulltext_postgres_min_word_len' => 'integer:0:255', 'fulltext_postgres_max_word_len' => 'integer:0:255') +		); +	} +} diff --git a/phpBB/phpbb/search/fulltext_sphinx.php b/phpBB/phpbb/search/fulltext_sphinx.php new file mode 100644 index 0000000000..d86a394326 --- /dev/null +++ b/phpBB/phpbb/search/fulltext_sphinx.php @@ -0,0 +1,904 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search; + +define('SPHINX_MAX_MATCHES', 20000); +define('SPHINX_CONNECT_RETRIES', 3); +define('SPHINX_CONNECT_WAIT_TIME', 300); + +/** +* fulltext_sphinx +* Fulltext search based on the sphinx search deamon +* @package search +*/ +class fulltext_sphinx +{ +	/** +	 * Associative array holding index stats +	 * @var array +	 */ +	protected $stats = array(); + +	/** +	 * Holds the words entered by user, obtained by splitting the entered query on whitespace +	 * @var array +	 */ +	protected $split_words = array(); + +	/** +	 * Holds unique sphinx id +	 * @var string +	 */ +	protected $id; + +	/** +	 * Stores the names of both main and delta sphinx indexes +	 * separated by a semicolon +	 * @var string +	 */ +	protected $indexes; + +	/** +	 * Sphinx searchd client object +	 * @var SphinxClient +	 */ +	protected $sphinx; + +	/** +	 * Relative path to board root +	 * @var string +	 */ +	protected $phpbb_root_path; + +	/** +	 * PHP Extension +	 * @var string +	 */ +	protected $php_ext; + +	/** +	 * Auth object +	 * @var \phpbb\auth\auth +	 */ +	protected $auth; + +	/** +	 * Config object +	 * @var \phpbb\config\config +	 */ +	protected $config; + +	/** +	 * Database connection +	 * @var \phpbb\db\driver\driver +	 */ +	protected $db; + +	/** +	 * Database Tools object +	 * @var \phpbb\db\tools +	 */ +	protected $db_tools; + +	/** +	 * Stores the database type if supported by sphinx +	 * @var string +	 */ +	protected $dbtype; + +	/** +	 * User object +	 * @var \phpbb\user +	 */ +	protected $user; + +	/** +	 * Stores the generated content of the sphinx config file +	 * @var string +	 */ +	protected $config_file_data = ''; + +	/** +	 * Contains tidied search query. +	 * Operators are prefixed in search query and common words excluded +	 * @var string +	 */ +	protected $search_query; + +	/** +	 * Constructor +	 * Creates a new \phpbb\search\fulltext_postgres, which is used as a search backend +	 * +	 * @param string|bool $error Any error that occurs is passed on through this reference variable otherwise false +	 */ +	public function __construct(&$error, $phpbb_root_path, $phpEx, $auth, $config, $db, $user) +	{ +		$this->phpbb_root_path = $phpbb_root_path; +		$this->php_ext = $phpEx; +		$this->config = $config; +		$this->user = $user; +		$this->db = $db; +		$this->auth = $auth; + +		// Initialize \phpbb\db\tools object +		$this->db_tools = new \phpbb\db\tools($this->db); + +		if(!$this->config['fulltext_sphinx_id']) +		{ +			set_config('fulltext_sphinx_id', unique_id()); +		} +		$this->id = $this->config['fulltext_sphinx_id']; +		$this->indexes = 'index_phpbb_' . $this->id . '_delta;index_phpbb_' . $this->id . '_main'; + +		if (!class_exists('SphinxClient')) +		{ +			require($this->phpbb_root_path . 'includes/sphinxapi.' . $this->php_ext); +		} + +		// Initialize sphinx client +		$this->sphinx = new \SphinxClient(); + +		$this->sphinx->SetServer(($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost'), ($this->config['fulltext_sphinx_port'] ? (int) $this->config['fulltext_sphinx_port'] : 9312)); + +		$error = false; +	} + +	/** +	* Returns the name of this search backend to be displayed to administrators +	* +	* @return string Name +	*/ +	public function get_name() +	{ +		return 'Sphinx Fulltext'; +	} + +	/** +	 * Returns the search_query +	 * +	 * @return string search query +	 */ +	public function get_search_query() +	{ +		return $this->search_query; +	} + +	/** +	 * Returns false as there is no word_len array +	 * +	 * @return false +	 */ +	public function get_word_length() +	{ +		return false; +	} + +	/** +	 * Returns an empty array as there are no common_words +	 * +	 * @return array common words that are ignored by search backend +	 */ +	public function get_common_words() +	{ +		return array(); +	} + +	/** +	* Checks permissions and paths, if everything is correct it generates the config file +	* +	* @return string|bool Language key of the error/incompatiblity encountered, or false if successful +	*/ +	public function init() +	{ +		if ($this->db->sql_layer != 'mysql' && $this->db->sql_layer != 'mysql4' && $this->db->sql_layer != 'mysqli' && $this->db->sql_layer != 'postgres') +		{ +			return $this->user->lang['FULLTEXT_SPHINX_WRONG_DATABASE']; +		} + +		// Move delta to main index each hour +		set_config('search_gc', 3600); + +		return false; +	} + +	/** +	 * Generates content of sphinx.conf +	 * +	 * @return bool True if sphinx.conf content is correctly generated, false otherwise +	 */ +	protected function config_generate() +	{ +		// Check if Database is supported by Sphinx +		if ($this->db->sql_layer =='mysql' || $this->db->sql_layer == 'mysql4' || $this->db->sql_layer == 'mysqli') +		{ +			$this->dbtype = 'mysql'; +		} +		else if ($this->db->sql_layer == 'postgres') +		{ +			$this->dbtype = 'pgsql'; +		} +		else +		{ +			$this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_WRONG_DATABASE'); +			return false; +		} + +		// Check if directory paths have been filled +		if (!$this->config['fulltext_sphinx_data_path']) +		{ +			$this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_NO_CONFIG_DATA'); +			return false; +		} + +		include($this->phpbb_root_path . 'config.' . $this->php_ext); + +		/* Now that we're sure everything was entered correctly, +		generate a config for the index. We use a config value +		fulltext_sphinx_id for this, as it should be unique. */ +		$config_object = new \phpbb\search\sphinx\config($this->config_file_data); +		$config_data = array( +			'source source_phpbb_' . $this->id . '_main' => array( +				array('type',						$this->dbtype . ' # mysql or pgsql'), +				// This config value sql_host needs to be changed incase sphinx and sql are on different servers +				array('sql_host',					$dbhost . ' # SQL server host sphinx connects to'), +				array('sql_user',					$dbuser), +				array('sql_pass',					$dbpasswd), +				array('sql_db',						$dbname), +				array('sql_port',					$dbport . ' # optional, default is 3306 for mysql and 5432 for pgsql'), +				array('sql_query_pre',				'SET NAMES \'utf8\''), +				array('sql_query_pre',				'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = (SELECT MAX(post_id) FROM ' . POSTS_TABLE . ') WHERE counter_id = 1'), +				array('sql_query_range',			'SELECT MIN(post_id), MAX(post_id) FROM ' . POSTS_TABLE . ''), +				array('sql_range_step',				'5000'), +				array('sql_query',					'SELECT +						p.post_id AS id, +						p.forum_id, +						p.topic_id, +						p.poster_id, +						p.post_visibility, +						CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, +						p.post_time, +						p.post_subject, +						p.post_subject as title, +						p.post_text as data, +						t.topic_last_post_time, +						0 as deleted +					FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t +					WHERE +						p.topic_id = t.topic_id +						AND p.post_id >= $start AND p.post_id <= $end'), +				array('sql_query_post',				''), +				array('sql_query_post_index',		'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = $maxid WHERE counter_id = 1'), +				array('sql_query_info',				'SELECT * FROM ' . POSTS_TABLE . ' WHERE post_id = $id'), +				array('sql_attr_uint',				'forum_id'), +				array('sql_attr_uint',				'topic_id'), +				array('sql_attr_uint',				'poster_id'), +				array('sql_attr_uint',				'post_visibility'), +				array('sql_attr_bool',				'topic_first_post'), +				array('sql_attr_bool',				'deleted'), +				array('sql_attr_timestamp',			'post_time'), +				array('sql_attr_timestamp',			'topic_last_post_time'), +				array('sql_attr_string',			'post_subject'), +			), +			'source source_phpbb_' . $this->id . '_delta : source_phpbb_' . $this->id . '_main' => array( +				array('sql_query_pre',				''), +				array('sql_query_range',			''), +				array('sql_range_step',				''), +				array('sql_query',					'SELECT +						p.post_id AS id, +						p.forum_id, +						p.topic_id, +						p.poster_id, +						p.post_visibility, +						CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, +						p.post_time, +						p.post_subject, +						p.post_subject as title, +						p.post_text as data, +						t.topic_last_post_time, +						0 as deleted +					FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t +					WHERE +						p.topic_id = t.topic_id +						AND p.post_id >=  ( SELECT max_doc_id FROM ' . SPHINX_TABLE . ' WHERE counter_id=1 )'), +			), +			'index index_phpbb_' . $this->id . '_main' => array( +				array('path',						$this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_main'), +				array('source',						'source_phpbb_' . $this->id . '_main'), +				array('docinfo',					'extern'), +				array('morphology',					'none'), +				array('stopwords',					''), +				array('min_word_len',				'2'), +				array('charset_type',				'utf-8'), +				array('charset_table',				'U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF'), +				array('min_prefix_len',				'0'), +				array('min_infix_len',				'0'), +			), +			'index index_phpbb_' . $this->id . '_delta : index_phpbb_' . $this->id . '_main' => array( +				array('path',						$this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_delta'), +				array('source',						'source_phpbb_' . $this->id . '_delta'), +			), +			'indexer' => array( +				array('mem_limit',					$this->config['fulltext_sphinx_indexer_mem_limit'] . 'M'), +			), +			'searchd' => array( +				array('compat_sphinxql_magics'	,	'0'), +				array('listen'	,					($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost') . ':' . ($this->config['fulltext_sphinx_port'] ? $this->config['fulltext_sphinx_port'] : '9312')), +				array('log',						$this->config['fulltext_sphinx_data_path'] . 'log/searchd.log'), +				array('query_log',					$this->config['fulltext_sphinx_data_path'] . 'log/sphinx-query.log'), +				array('read_timeout',				'5'), +				array('max_children',				'30'), +				array('pid_file',					$this->config['fulltext_sphinx_data_path'] . 'searchd.pid'), +				array('max_matches',				(string) SPHINX_MAX_MATCHES), +				array('binlog_path',				$this->config['fulltext_sphinx_data_path']), +			), +		); + +		$non_unique = array('sql_query_pre' => true, 'sql_attr_uint' => true, 'sql_attr_timestamp' => true, 'sql_attr_str2ordinal' => true, 'sql_attr_bool' => true); +		$delete = array('sql_group_column' => true, 'sql_date_column' => true, 'sql_str2ordinal_column' => true); +		foreach ($config_data as $section_name => $section_data) +		{ +			$section = $config_object->get_section_by_name($section_name); +			if (!$section) +			{ +				$section = $config_object->add_section($section_name); +			} + +			foreach ($delete as $key => $void) +			{ +				$section->delete_variables_by_name($key); +			} + +			foreach ($non_unique as $key => $void) +			{ +				$section->delete_variables_by_name($key); +			} + +			foreach ($section_data as $entry) +			{ +				$key = $entry[0]; +				$value = $entry[1]; + +				if (!isset($non_unique[$key])) +				{ +					$variable = $section->get_variable_by_name($key); +					if (!$variable) +					{ +						$variable = $section->create_variable($key, $value); +					} +					else +					{ +						$variable->set_value($value); +					} +				} +				else +				{ +					$variable = $section->create_variable($key, $value); +				} +			} +		} +		$this->config_file_data = $config_object->get_data(); + +		return true; +	} + +	/** +	* Splits keywords entered by a user into an array of words stored in $this->split_words +	* Stores the tidied search query in $this->search_query +	* +	* @param string $keywords Contains the keyword as entered by the user +	* @param string $terms is either 'all' or 'any' +	* @return false if no valid keywords were found and otherwise true +	*/ +	public function split_keywords(&$keywords, $terms) +	{ +		if ($terms == 'all') +		{ +			$match		= array('#\sand\s#i', '#\sor\s#i', '#\snot\s#i', '#\+#', '#-#', '#\|#', '#@#'); +			$replace	= array(' & ', ' | ', '  - ', ' +', ' -', ' |', ''); + +			$replacements = 0; +			$keywords = preg_replace($match, $replace, $keywords); +			$this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED); +		} +		else +		{ +			$this->sphinx->SetMatchMode(SPH_MATCH_ANY); +		} + +		// Keep quotes and new lines +		$keywords = str_replace(array('"', "\n"), array('"', ' '), trim($keywords)); + +		if (strlen($keywords) > 0) +		{ +			$this->search_query = str_replace('"', '"', $keywords); +			return true; +		} + +		return false; +	} + +	/** +	* Performs a search on keywords depending on display specific params. You have to run split_keywords() first +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	string		$fields				contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) +	* @param	string		$terms				is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids if the author should be ignored during the search the array is empty +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, &$start, $per_page) +	{ +		// No keywords? No posts. +		if (!strlen($this->search_query) && !sizeof($author_ary)) +		{ +			return false; +		} + +		$id_ary = array(); + +		$join_topic = ($type != 'posts'); + +		// Sorting + +		if ($type == 'topics') +		{ +			switch ($sort_key) +			{ +				case 'a': +					$this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'poster_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); +				break; + +				case 'f': +					$this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'forum_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); +				break; + +				case 'i': + +				case 's': +					$this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'post_subject ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); +				break; + +				case 't': + +				default: +					$this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'topic_last_post_time ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); +				break; +			} +		} +		else +		{ +			switch ($sort_key) +			{ +				case 'a': +					$this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'poster_id'); +				break; + +				case 'f': +					$this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'forum_id'); +				break; + +				case 'i': + +				case 's': +					$this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_subject'); +				break; + +				case 't': + +				default: +					$this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_time'); +				break; +			} +		} + +		// Most narrow filters first +		if ($topic_id) +		{ +			$this->sphinx->SetFilter('topic_id', array($topic_id)); +		} + +		$search_query_prefix = ''; + +		switch ($fields) +		{ +			case 'titleonly': +				// Only search the title +				if ($terms == 'all') +				{ +					$search_query_prefix = '@title '; +				} +				// Weight for the title +				$this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); +				// 1 is first_post, 0 is not first post +				$this->sphinx->SetFilter('topic_first_post', array(1)); +			break; + +			case 'msgonly': +				// Only search the body +				if ($terms == 'all') +				{ +					$search_query_prefix = '@data '; +				} +				// Weight for the body +				$this->sphinx->SetFieldWeights(array("title" => 1, "data" => 5)); +			break; + +			case 'firstpost': +				// More relative weight for the title, also search the body +				$this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); +				// 1 is first_post, 0 is not first post +				$this->sphinx->SetFilter('topic_first_post', array(1)); +			break; + +			default: +				// More relative weight for the title, also search the body +				$this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); +			break; +		} + +		if (sizeof($author_ary)) +		{ +			$this->sphinx->SetFilter('poster_id', $author_ary); +		} + +		// As this is not simply possible at the moment, we limit the result to approved posts. +		// This will make it impossible for moderators to search unapproved and softdeleted posts, +		// but at least it will also cause the same for normal users. +		$this->sphinx->SetFilter('post_visibility', array(ITEM_APPROVED)); + +		if (sizeof($ex_fid_ary)) +		{ +			// All forums that a user is allowed to access +			$fid_ary = array_unique(array_intersect(array_keys($this->auth->acl_getf('f_read', true)), array_keys($this->auth->acl_getf('f_search', true)))); +			// All forums that the user wants to and can search in +			$search_forums = array_diff($fid_ary, $ex_fid_ary); + +			if (sizeof($search_forums)) +			{ +				$this->sphinx->SetFilter('forum_id', $search_forums); +			} +		} + +		$this->sphinx->SetFilter('deleted', array(0)); + +		$this->sphinx->SetLimits($start, (int) $per_page, SPHINX_MAX_MATCHES); +		$result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + +		// Could be connection to localhost:9312 failed (errno=111, +		// msg=Connection refused) during rotate, retry if so +		$retries = SPHINX_CONNECT_RETRIES; +		while (!$result && (strpos($this->sphinx->GetLastError(), "errno=111,") !== false) && $retries--) +		{ +			usleep(SPHINX_CONNECT_WAIT_TIME); +			$result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); +		} + +		if ($this->sphinx->GetLastError()) +		{ +			add_log('critical', 'LOG_SPHINX_ERROR', $this->sphinx->GetLastError()); +			if ($this->auth->acl_get('a_')) +			{ +				trigger_error($this->user->lang('SPHINX_SEARCH_FAILED', $this->sphinx->GetLastError())); +			} +			else +			{ +				trigger_error($this->user->lang('SPHINX_SEARCH_FAILED_LOG')); +			} +		} + +		$result_count = $result['total_found']; + +		if ($result_count && $start >= $result_count) +		{ +			$start = floor(($result_count - 1) / $per_page) * $per_page; + +			$this->sphinx->SetLimits((int) $start, (int) $per_page, SPHINX_MAX_MATCHES); +			$result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + +			// Could be connection to localhost:9312 failed (errno=111, +			// msg=Connection refused) during rotate, retry if so +			$retries = SPHINX_CONNECT_RETRIES; +			while (!$result && (strpos($this->sphinx->GetLastError(), "errno=111,") !== false) && $retries--) +			{ +				usleep(SPHINX_CONNECT_WAIT_TIME); +				$result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); +			} +		} + +		$id_ary = array(); +		if (isset($result['matches'])) +		{ +			if ($type == 'posts') +			{ +				$id_ary = array_keys($result['matches']); +			} +			else +			{ +				foreach ($result['matches'] as $key => $value) +				{ +					$id_ary[] = $value['attrs']['topic_id']; +				} +			} +		} +		else +		{ +			return false; +		} + +		$id_ary = array_slice($id_ary, 0, (int) $per_page); + +		return $result_count; +	} + +	/** +	* Performs a search on an author's posts without caring about message contents. Depends on display specific params +	* +	* @param	string		$type				contains either posts or topics depending on what should be searched for +	* @param	boolean		$firstpost_only		if true, only topic starting posts will be considered +	* @param	array		$sort_by_sql		contains SQL code for the ORDER BY part of a query +	* @param	string		$sort_key			is the key of $sort_by_sql for the selected sorting +	* @param	string		$sort_dir			is either a or d representing ASC and DESC +	* @param	string		$sort_days			specifies the maximum amount of days a post may be old +	* @param	array		$ex_fid_ary			specifies an array of forum ids which should not be searched +	* @param	string		$post_visibility	specifies which types of posts the user can view in which forums +	* @param	int			$topic_id			is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched +	* @param	array		$author_ary			an array of author ids +	* @param	string		$author_name		specifies the author match, when ANONYMOUS is also a search-match +	* @param	array		&$id_ary			passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered +	* @param	int			$start				indicates the first index of the page +	* @param	int			$per_page			number of ids each page is supposed to contain +	* @return	boolean|int						total number of results +	*/ +	public function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) +	{ +		$this->search_query = ''; + +		$this->sphinx->SetMatchMode(SPH_MATCH_FULLSCAN); +		$fields = ($firstpost_only) ? 'firstpost' : 'all'; +		$terms = 'all'; +		return $this->keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $post_visibility, $topic_id, $author_ary, $author_name, $id_ary, $start, $per_page); +	} + +	/** +	 * Updates wordlist and wordmatch tables when a message is posted or changed +	 * +	 * @param	string	$mode	Contains the post mode: edit, post, reply, quote +	 * @param	int	$post_id	The id of the post which is modified/created +	 * @param	string	&$message	New or updated post content +	 * @param	string	&$subject	New or updated post subject +	 * @param	int	$poster_id	Post author's user id +	 * @param	int	$forum_id	The id of the forum in which the post is located +	 */ +	public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) +	{ +		if ($mode == 'edit') +		{ +			$this->sphinx->UpdateAttributes($this->indexes, array('forum_id', 'poster_id'), array((int)$post_id => array((int)$forum_id, (int)$poster_id))); +		} +		else if ($mode != 'post' && $post_id) +		{ +			// Update topic_last_post_time for full topic +			$sql_array = array( +				'SELECT'	=> 'p1.post_id', +				'FROM'		=> array( +					POSTS_TABLE	=> 'p1', +				), +				'LEFT_JOIN'	=> array(array( +					'FROM'	=> array( +						POSTS_TABLE	=> 'p2' +					), +					'ON'	=> 'p1.topic_id = p2.topic_id', +				)), +			); + +			$sql = $this->db->sql_build_query('SELECT', $sql_array); +			$result = $this->db->sql_query($sql); + +			$post_updates = array(); +			$post_time = time(); +			while ($row = $this->db->sql_fetchrow($result)) +			{ +				$post_updates[(int)$row['post_id']] = array($post_time); +			} +			$this->db->sql_freeresult($result); + +			if (sizeof($post_updates)) +			{ +				$this->sphinx->UpdateAttributes($this->indexes, array('topic_last_post_time'), $post_updates); +			} +		} +	} + +	/** +	* Delete a post from the index after it was deleted +	*/ +	public function index_remove($post_ids, $author_ids, $forum_ids) +	{ +		$values = array(); +		foreach ($post_ids as $post_id) +		{ +			$values[$post_id] = array(1); +		} + +		$this->sphinx->UpdateAttributes($this->indexes, array('deleted'), $values); +	} + +	/** +	* Nothing needs to be destroyed +	*/ +	public function tidy($create = false) +	{ +		set_config('search_last_gc', time(), true); +	} + +	/** +	* Create sphinx table +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function create_index($acp_module, $u_action) +	{ +		if (!$this->index_created()) +		{ +			$table_data = array( +				'COLUMNS'	=> array( +					'counter_id'	=> array('UINT', 0), +					'max_doc_id'	=> array('UINT', 0), +				), +				'PRIMARY_KEY'	=> 'counter_id', +			); +			$this->db_tools->sql_create_table(SPHINX_TABLE, $table_data); + +			$sql = 'TRUNCATE TABLE ' . SPHINX_TABLE; +			$this->db->sql_query($sql); + +			$data = array( +				'counter_id'	=> '1', +				'max_doc_id'	=> '0', +			); +			$sql = 'INSERT INTO ' . SPHINX_TABLE . ' ' . $this->db->sql_build_array('INSERT', $data); +			$this->db->sql_query($sql); +		} + +		return false; +	} + +	/** +	* Drop sphinx table +	* +	* @return string|bool error string is returned incase of errors otherwise false +	*/ +	public function delete_index($acp_module, $u_action) +	{ +		if (!$this->index_created()) +		{ +			return false; +		} + +		$this->db_tools->sql_table_drop(SPHINX_TABLE); + +		return false; +	} + +	/** +	* Returns true if the sphinx table was created +	* +	* @return bool true if sphinx table was created +	*/ +	public function index_created($allow_new_files = true) +	{ +		$created = false; + +		if ($this->db_tools->sql_table_exists(SPHINX_TABLE)) +		{ +			$created = true; +		} + +		return $created; +	} + +	/** +	* Returns an associative array containing information about the indexes +	* +	* @return string|bool Language string of error false otherwise +	*/ +	public function index_stats() +	{ +		if (empty($this->stats)) +		{ +			$this->get_stats(); +		} + +		return array( +			$this->user->lang['FULLTEXT_SPHINX_MAIN_POSTS']			=> ($this->index_created()) ? $this->stats['main_posts'] : 0, +			$this->user->lang['FULLTEXT_SPHINX_DELTA_POSTS']			=> ($this->index_created()) ? $this->stats['total_posts'] - $this->stats['main_posts'] : 0, +			$this->user->lang['FULLTEXT_MYSQL_TOTAL_POSTS']			=> ($this->index_created()) ? $this->stats['total_posts'] : 0, +		); +	} + +	/** +	* Collects stats that can be displayed on the index maintenance page +	*/ +	protected function get_stats() +	{ +		if ($this->index_created()) +		{ +			$sql = 'SELECT COUNT(post_id) as total_posts +				FROM ' . POSTS_TABLE; +			$result = $this->db->sql_query($sql); +			$this->stats['total_posts'] = (int) $this->db->sql_fetchfield('total_posts'); +			$this->db->sql_freeresult($result); + +			$sql = 'SELECT COUNT(p.post_id) as main_posts +				FROM ' . POSTS_TABLE . ' p, ' . SPHINX_TABLE . ' m +				WHERE p.post_id <= m.max_doc_id +					AND m.counter_id = 1'; +			$result = $this->db->sql_query($sql); +			$this->stats['main_posts'] = (int) $this->db->sql_fetchfield('main_posts'); +			$this->db->sql_freeresult($result); +		} +	} + +	/** +	* Returns a list of options for the ACP to display +	* +	* @return associative array containing template and config variables +	*/ +	public function acp() +	{ +		$config_vars = array( +			'fulltext_sphinx_data_path' => 'string', +			'fulltext_sphinx_host' => 'string', +			'fulltext_sphinx_port' => 'string', +			'fulltext_sphinx_indexer_mem_limit' => 'int', +		); + +		$tpl = ' +		<span class="error">' . $this->user->lang['FULLTEXT_SPHINX_CONFIGURE']. '</span> +		<dl> +			<dt><label for="fulltext_sphinx_data_path">' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_sphinx_data_path" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_data_path]" value="' . $this->config['fulltext_sphinx_data_path'] . '" /></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_sphinx_host">' . $this->user->lang['FULLTEXT_SPHINX_HOST'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_HOST_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_sphinx_host" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_host]" value="' . $this->config['fulltext_sphinx_host'] . '" /></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_sphinx_port">' . $this->user->lang['FULLTEXT_SPHINX_PORT'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_PORT_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_sphinx_port" type="number" size="4" maxlength="10" name="config[fulltext_sphinx_port]" value="' . $this->config['fulltext_sphinx_port'] . '" /></dd> +		</dl> +		<dl> +			<dt><label for="fulltext_sphinx_indexer_mem_limit">' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT_EXPLAIN'] . '</span></dt> +			<dd><input id="fulltext_sphinx_indexer_mem_limit" type="number" size="4" maxlength="10" name="config[fulltext_sphinx_indexer_mem_limit]" value="' . $this->config['fulltext_sphinx_indexer_mem_limit'] . '" /> ' . $this->user->lang['MIB'] . '</dd> +		</dl> +		<dl> +			<dt><label for="fulltext_sphinx_config_file">' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE'] . $this->user->lang['COLON'] . '</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE_EXPLAIN'] . '</span></dt> +			<dd>' . (($this->config_generate()) ? '<textarea readonly="readonly" rows="6" id="sphinx_config_data">' . htmlspecialchars($this->config_file_data) . '</textarea>' : $this->config_file_data) . '</dd> +		<dl> +		'; + +		// These are fields required in the config table +		return array( +			'tpl'		=> $tpl, +			'config'	=> $config_vars +		); +	} +} diff --git a/phpBB/phpbb/search/index.htm b/phpBB/phpbb/search/index.htm new file mode 100644 index 0000000000..ee1f723a7d --- /dev/null +++ b/phpBB/phpbb/search/index.htm @@ -0,0 +1,10 @@ +<html> +<head> +<title></title> +<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> +</head> + +<body bgcolor="#FFFFFF" text="#000000"> + +</body> +</html> diff --git a/phpBB/phpbb/search/sphinx/config.php b/phpBB/phpbb/search/sphinx/config.php new file mode 100644 index 0000000000..cb8e4524df --- /dev/null +++ b/phpBB/phpbb/search/sphinx/config.php @@ -0,0 +1,282 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search\sphinx; + +/** +* \phpbb\search\sphinx\config +* An object representing the sphinx configuration +* Can read it from file and write it back out after modification +* @package search +*/ +class config +{ +	private $sections = array(); + +	/** +	* Constructor which optionally loads data from a variable +	* +	* @param	string	$config_data	Variable containing the sphinx configuration data +	* +	* @access	public +	*/ +	function __construct($config_data) +	{ +		if ($config_data != '') +		{ +			$this->read($config_data); +		} +	} + +	/** +	* Get a section object by its name +	* +	* @param	string 								$name	The name of the section that shall be returned +	* @return	\phpbb\search\sphinx\config_section			The section object or null if none was found +	* +	* @access	public +	*/ +	function get_section_by_name($name) +	{ +		for ($i = 0, $size = sizeof($this->sections); $i < $size; $i++) +		{ +			// Make sure this is really a section object and not a comment +			if (($this->sections[$i] instanceof \phpbb\search\sphinx\config_section) && $this->sections[$i]->get_name() == $name) +			{ +				return $this->sections[$i]; +			} +		} +	} + +	/** +	* Appends a new empty section to the end of the config +	* +	* @param	string								$name	The name for the new section +	* @return	\phpbb\search\sphinx\config_section			The newly created section object +	* +	* @access	public +	*/ +	function add_section($name) +	{ +		$this->sections[] = new \phpbb\search\sphinx\config_section($name, ''); +		return $this->sections[sizeof($this->sections) - 1]; +	} + +	/** +	* Reads the config file data +	* +	* @param	string	$config_data	The config file data +	* +	* @access	private +	*/ +	function read($config_data) +	{ +		$this->sections = array(); + +		$section = null; +		$found_opening_bracket = false; +		$in_value = false; + +		foreach ($config_data as $i => $line) +		{ +			// If the value of a variable continues to the next line because the line +			// break was escaped then we don't trim leading space but treat it as a part of the value +			if ($in_value) +			{ +				$line = rtrim($line); +			} +			else +			{ +				$line = trim($line); +			} + +			// If we're not inside a section look for one +			if (!$section) +			{ +				// Add empty lines and comments as comment objects to the section list +				// that way they're not deleted when reassembling the file from the sections +				if (!$line || $line[0] == '#') +				{ +					$this->sections[] = new \phpbb\search\sphinx\config_comment($config_file[$i]); +					continue; +				} +				else +				{ +					// Otherwise we scan the line reading the section name until we find +					// an opening curly bracket or a comment +					$section_name = ''; +					$section_name_comment = ''; +					$found_opening_bracket = false; +					for ($j = 0, $length = strlen($line); $j < $length; $j++) +					{ +						if ($line[$j] == '#') +						{ +							$section_name_comment = substr($line, $j); +							break; +						} + +						if ($found_opening_bracket) +						{ +							continue; +						} + +						if ($line[$j] == '{') +						{ +							$found_opening_bracket = true; +							continue; +						} + +						$section_name .= $line[$j]; +					} + +					// And then we create the new section object +					$section_name = trim($section_name); +					$section = new \phpbb\search\sphinx\config_section($section_name, $section_name_comment); +				} +			} +			else +			{ +				// If we're looking for variables inside a section +				$skip_first = false; + +				// If we're not in a value continuing over the line feed +				if (!$in_value) +				{ +					// Then add empty lines and comments as comment objects to the variable list +					// of this section so they're not deleted on reassembly +					if (!$line || $line[0] == '#') +					{ +						$section->add_variable(new \phpbb\search\sphinx\config_comment($config_file[$i])); +						continue; +					} + +					// As long as we haven't yet actually found an opening bracket for this section +					// we treat everything as comments so it's not deleted either +					if (!$found_opening_bracket) +					{ +						if ($line[0] == '{') +						{ +							$skip_first = true; +							$line = substr($line, 1); +							$found_opening_bracket = true; +						} +						else +						{ +							$section->add_variable(new \phpbb\search\sphinx\config_comment($config_file[$i])); +							continue; +						} +					} +				} + +				// If we did not find a comment in this line or still add to the previous +				// line's value ... +				if ($line || $in_value) +				{ +					if (!$in_value) +					{ +						$name = ''; +						$value = ''; +						$comment = ''; +						$found_assignment = false; +					} +					$in_value = false; +					$end_section = false; + +					/* ... then we should prase this line char by char: +					 - first there's the variable name +					 - then an equal sign +					 - the variable value +					 - possibly a backslash before the linefeed in this case we need to continue +					   parsing the value in the next line +					 - a # indicating that the rest of the line is a comment +					 - a closing curly bracket indicating the end of this section*/ +					for ($j = 0, $length = strlen($line); $j < $length; $j++) +					{ +						if ($line[$j] == '#') +						{ +							$comment = substr($line, $j); +							break; +						} +						else if ($line[$j] == '}') +						{ +							$comment = substr($line, $j + 1); +							$end_section = true; +							break; +						} +						else if (!$found_assignment) +						{ +							if ($line[$j] == '=') +							{ +								$found_assignment = true; +							} +							else +							{ +								$name .= $line[$j]; +							} +						} +						else +						{ +							if ($line[$j] == '\\' && $j == $length - 1) +							{ +								$value .= "\n"; +								$in_value = true; +								// Go to the next line and keep processing the value in there +								continue 2; +							} +							$value .= $line[$j]; +						} +					} + +					// If a name and an equal sign were found then we have append a +					// new variable object to the section +					if ($name && $found_assignment) +					{ +						$section->add_variable(new \phpbb\search\sphinx\config_variable(trim($name), trim($value), ($end_section) ? '' : $comment)); +						continue; +					} + +					/* If we found a closing curly bracket this section has been completed +					and we can append it to the section list and continue with looking for +					the next section */ +					if ($end_section) +					{ +						$section->set_end_comment($comment); +						$this->sections[] = $section; +						$section = null; +						continue; +					} +				} + +				// If we did not find anything meaningful up to here, then just treat it +				// as a comment +				$comment = ($skip_first) ? "\t" . substr(ltrim($config_file[$i]), 1) : $config_file[$i]; +				$section->add_variable(new \phpbb\search\sphinx\config_comment($comment)); +			} +		} + +	} + +	/** +	* Returns the config data +	* +	* @return	string	$data	The config data that is generated +	* +	* @access	public +	*/ +	function get_data() +	{ +		$data = ""; +		foreach ($this->sections as $section) +		{ +			$data .= $section->to_string(); +		} + +		return $data; +	} +} diff --git a/phpBB/phpbb/search/sphinx/config_comment.php b/phpBB/phpbb/search/sphinx/config_comment.php new file mode 100644 index 0000000000..20b1c19af1 --- /dev/null +++ b/phpBB/phpbb/search/sphinx/config_comment.php @@ -0,0 +1,43 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search\sphinx; + +/** +* \phpbb\search\sphinx\config_comment +* Represents a comment inside the sphinx configuration +*/ +class config_comment +{ +	private $exact_string; + +	/** +	* Create a new comment +	* +	* @param	string	$exact_string	The content of the comment including newlines, leading whitespace, etc. +	* +	* @access	public +	*/ +	function __construct($exact_string) +	{ +		$this->exact_string = $exact_string; +	} + +	/** +	* Simply returns the comment as it was created +	* +	* @return	string	The exact string that was specified in the constructor +	* +	* @access	public +	*/ +	function to_string() +	{ +		return $this->exact_string; +	} +} diff --git a/phpBB/phpbb/search/sphinx/config_section.php b/phpBB/phpbb/search/sphinx/config_section.php new file mode 100644 index 0000000000..8f9253ec56 --- /dev/null +++ b/phpBB/phpbb/search/sphinx/config_section.php @@ -0,0 +1,156 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search\sphinx; + +/** +* \phpbb\search\sphinx\config_section +* Represents a single section inside the sphinx configuration +*/ +class config_section +{ +	private $name; +	private $comment; +	private $end_comment; +	private $variables = array(); + +	/** +	* Construct a new section +	* +	* @param	string	$name		Name of the section +	* @param	string	$comment	Comment that should be appended after the name in the +	*								textual format. +	* +	* @access	public +	*/ +	function __construct($name, $comment) +	{ +		$this->name = $name; +		$this->comment = $comment; +		$this->end_comment = ''; +	} + +	/** +	* Add a variable object to the list of variables in this section +	* +	* @param	\phpbb\search\sphinx\config_variable	$variable	The variable object +	* +	* @access	public +	*/ +	function add_variable($variable) +	{ +		$this->variables[] = $variable; +	} + +	/** +	* Adds a comment after the closing bracket in the textual representation +	* +	* @param	string	$end_comment +	* +	* @access	public +	*/ +	function set_end_comment($end_comment) +	{ +		$this->end_comment = $end_comment; +	} + +	/** +	* Getter for the name of this section +	* +	* @return	string	Section's name +	* +	* @access	public +	*/ +	function get_name() +	{ +		return $this->name; +	} + +	/** +	* Get a variable object by its name +	* +	* @param	string 								$name	The name of the variable that shall be returned +	* @return	\phpbb\search\sphinx\config_section			The first variable object from this section with the +	*														given name or null if none was found +	* +	* @access	public +	*/ +	function get_variable_by_name($name) +	{ +		for ($i = 0, $size = sizeof($this->variables); $i < $size; $i++) +		{ +			// Make sure this is a variable object and not a comment +			if (($this->variables[$i] instanceof \phpbb\search\sphinx\config_variable) && $this->variables[$i]->get_name() == $name) +			{ +				return $this->variables[$i]; +			} +		} +	} + +	/** +	* Deletes all variables with the given name +	* +	* @param	string	$name	The name of the variable objects that are supposed to be removed +	* +	* @access	public +	*/ +	function delete_variables_by_name($name) +	{ +		for ($i = 0, $size = sizeof($this->variables); $i < $size; $i++) +		{ +			// Make sure this is a variable object and not a comment +			if (($this->variables[$i] instanceof \phpbb\search\sphinx\config_variable) && $this->variables[$i]->get_name() == $name) +			{ +				array_splice($this->variables, $i, 1); +				$i--; +			} +		} +	} + +	/** +	* Create a new variable object and append it to the variable list of this section +	* +	* @param	string								$name	The name for the new variable +	* @param	string								$value	The value for the new variable +	* @return	\phpbb\search\sphinx\config_variable			Variable object that was created +	* +	* @access	public +	*/ +	function create_variable($name, $value) +	{ +		$this->variables[] = new \phpbb\search\sphinx\config_variable($name, $value, ''); +		return $this->variables[sizeof($this->variables) - 1]; +	} + +	/** +	* Turns this object into a string which can be written to a config file +	* +	* @return	string	Config data in textual form, parsable for sphinx +	* +	* @access	public +	*/ +	function to_string() +	{ +		$content = $this->name . ' ' . $this->comment . "\n{\n"; + +		// Make sure we don't get too many newlines after the opening bracket +		while (trim($this->variables[0]->to_string()) == '') +		{ +			array_shift($this->variables); +		} + +		foreach ($this->variables as $variable) +		{ +			$content .= $variable->to_string(); +		} +		$content .= '}' . $this->end_comment . "\n"; + +		return $content; +	} +} diff --git a/phpBB/phpbb/search/sphinx/config_variable.php b/phpBB/phpbb/search/sphinx/config_variable.php new file mode 100644 index 0000000000..c0f6d28dcc --- /dev/null +++ b/phpBB/phpbb/search/sphinx/config_variable.php @@ -0,0 +1,74 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +namespace phpbb\search\sphinx; + +/** +* \phpbb\search\sphinx\config_variable +* Represents a single variable inside the sphinx configuration +*/ +class config_variable +{ +	private $name; +	private $value; +	private $comment; + +	/** +	* Constructs a new variable object +	* +	* @param	string	$name		Name of the variable +	* @param	string	$value		Value of the variable +	* @param	string	$comment	Optional comment after the variable in the +	*								config file +	* +	* @access	public +	*/ +	function __construct($name, $value, $comment) +	{ +		$this->name = $name; +		$this->value = $value; +		$this->comment = $comment; +	} + +	/** +	* Getter for the variable's name +	* +	* @return	string	The variable object's name +	* +	* @access	public +	*/ +	function get_name() +	{ +		return $this->name; +	} + +	/** +	* Allows changing the variable's value +	* +	* @param	string	$value	New value for this variable +	* +	* @access	public +	*/ +	function set_value($value) +	{ +		$this->value = $value; +	} + +	/** +	* Turns this object into a string readable by sphinx +	* +	* @return	string	Config data in textual form +	* +	* @access	public +	*/ +	function to_string() +	{ +		return "\t" . $this->name . ' = ' . str_replace("\n", " \\\n", $this->value) . ' ' . $this->comment . "\n"; +	} +} | 
