aboutsummaryrefslogtreecommitdiffstats
path: root/phpBB/includes/search/fulltext_native.php
diff options
context:
space:
mode:
Diffstat (limited to 'phpBB/includes/search/fulltext_native.php')
-rwxr-xr-xphpBB/includes/search/fulltext_native.php1058
1 files changed, 1058 insertions, 0 deletions
diff --git a/phpBB/includes/search/fulltext_native.php b/phpBB/includes/search/fulltext_native.php
new file mode 100755
index 0000000000..b69dc18061
--- /dev/null
+++ b/phpBB/includes/search/fulltext_native.php
@@ -0,0 +1,1058 @@
+<?php
+/**
+*
+* @package search
+* @version $Id$
+* @copyright (c) 2005 phpBB Group
+* @license http://opensource.org/licenses/gpl-license.php GNU Public License
+*
+*/
+
+/**
+*/
+if (!defined('IN_PHPBB'))
+{
+ exit;
+}
+
+/**
+* @ignore
+*/
+include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
+
+/**
+* @package search
+* fulltext_native
+* phpBB's own db driven fulltext search
+*/
+class fulltext_native extends search_backend
+{
+ var $stats;
+ var $word_length;
+
+ function fulltext_native(&$error)
+ {
+ global $config;
+
+ $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
+
+ $error = false;
+ }
+
+ /**
+ * Splits keywords entered by a user into an array of words stored in $this->split_words
+ *
+ * @param string $keywords Contains the keyword as entered by the user
+ * @param string $terms is either 'all' or 'any'
+ * @return false if no valid keywords were found and otherwise true
+ */
+ function split_keywords(&$keywords, $terms)
+ {
+ global $db, $config;
+
+ $drop_char_match = array('^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '!');
+ $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ');
+
+ $this->get_ignore_words();
+ $this->get_synonyms();
+
+ if ($terms == 'all')
+ {
+ $match = array('#\sand\s#i', '#\sor\s#i', '#\snot\s#i', '#\+#', '#-#', '#\|#');
+ $replace = array(' + ', ' | ', ' - ', ' + ', ' - ', ' | ');
+
+ $keywords = preg_replace($match, $replace, $keywords);
+ }
+
+ $match = array();
+ // New lines, carriage returns
+ $match[] = "#[\n\r]+#";
+ // NCRs like &nbsp; etc.
+ $match[] = '#(&amp;|&)[\#a-z0-9]+?;#i';
+
+ // Filter out as above
+ $keywords = preg_replace($match, ' ', strtolower(trim($keywords)));
+ $keywords = str_replace($drop_char_match, $drop_char_replace, $keywords);
+
+ // Split words
+ $this->split_words = explode(' ', preg_replace('#\s+#', ' ', $keywords));
+
+ if (sizeof($this->ignore_words))
+ {
+ $this->common_words = array_intersect($this->split_words, $this->ignore_words);
+ $this->split_words = array_diff($this->split_words, $this->ignore_words);
+ }
+
+ if (sizeof($this->replace_synonym))
+ {
+ $this->split_words = str_replace($this->replace_synonym, $this->match_synonym, $this->split_words);
+ }
+
+ $prefixes = array('+', '-', '|');
+ $prefixed = false;
+ $in_words = '';
+ foreach ($this->split_words as $i => $word)
+ {
+ if (in_array($word, $prefixes))
+ {
+ $prefixed = true;
+ continue;
+ }
+
+ // check word length
+ $clean_len = strlen(str_replace('*', '', $word));
+ if (($clean_len < $config['fulltext_native_min_chars']) || ($clean_len > $config['fulltext_native_max_chars']))
+ {
+ if ($prefixed)
+ {
+ $this->common_words[] = $this->split_words[$i - 1];
+ unset($this->split_words[$i - 1]);
+ }
+ $this->common_words[] = $this->split_words[$i];
+ unset($this->split_words[$i]);
+ }
+ else if (strpos($word, '*') === false)
+ {
+ $in_words .= (($in_words) ? ', ' : '') . '\'' . $db->sql_escape($word) . '\'';
+ }
+
+ $prefixed = false;
+ }
+
+ if ($in_words)
+ {
+ // identify common words and ignore them
+ $sql = 'SELECT word_text
+ FROM ' . SEARCH_WORD_TABLE . "
+ WHERE word_text IN ($in_words)
+ AND word_common = 1";
+ $result = $db->sql_query($sql);
+
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $key = array_search($row['word_text'], $this->split_words);
+
+ if (isset($this->split_words[$key - 1]) && (in_array($this->split_words[$key - 1], $prefixes)))
+ {
+ $this->common_words[] = $this->split_words[$key - 1];
+ unset($this->split_words[$key - 1]);
+ }
+ $this->common_words[] = $row['word_text'];
+ unset($this->split_words[$key]);
+ }
+ $db->sql_freeresult($result);
+ }
+
+ if (sizeof($this->split_words))
+ {
+ $this->split_words = array_values($this->split_words);
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Turns text into an array of words that can be stored in the word list table
+ */
+ function split_message($text)
+ {
+ global $config;
+
+ static $drop_char_match, $drop_char_replace;
+
+ $this->get_ignore_words();
+ $this->get_synonyms();
+
+ if (!is_array($drop_char_match))
+ {
+ $drop_char_match = array('-', '^', '$', ';', '#', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '\'', '!', '*', '+');
+ $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' ');
+ }
+
+ $match = array();
+ // Comments for hardcoded bbcode elements (urls, smilies, html)
+ $match[] = '#<!\-\- .* \-\->(.*?)<!\-\- .* \-\->#is';
+ // New lines, carriage returns
+ $match[] = "#[\n\r]+#";
+ // NCRs like &nbsp; etc.
+ $match[] = '#(&amp;|&)[\#a-z0-9]+?;#i';
+ // Do not index code
+ $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
+ // BBcode
+ $match[] = '#\[\/?[a-z\*\+\-]+(?:=.*?)?(\:?[0-9a-z]{5,})\]#';
+ // Filter out ; and # but not &#[0-9]+;
+ //$match[] = '#(&\#[0-9]+;)|;|\#|&#';
+
+ $text = preg_replace($match, ' ', ' ' . strtolower(trim($text)) . ' ');
+
+ // Filter out non-alphabetical chars
+ $text = str_replace($drop_char_match, $drop_char_replace, $text);
+
+ // Split words
+ $text = explode(' ', preg_replace('#\s+#', ' ', trim($text)));
+
+ if (sizeof($this->ignore_words))
+ {
+ $text = array_diff($text, $this->ignore_words);
+ }
+
+ if (sizeof($this->replace_synonym))
+ {
+ $text = str_replace($this->replace_synonym, $this->match_synonym, $text);
+ }
+
+ // remove too short or too long words
+ $text = array_values($text);
+ for ($i = 0, $n = sizeof($text); $i < $n; $i++)
+ {
+ $text[$i] = trim($text[$i]);
+ if (strlen($text[$i]) < $config['fulltext_native_min_chars'] || strlen($text[$i]) > $config['fulltext_native_max_chars'])
+ {
+ unset($text[$i]);
+ }
+ }
+
+ return $text;
+ }
+
+ /**
+ * Performs a search on keywords depending on display specific params.
+ *
+ * @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
+ * @param int $start indicates the first index of the page
+ * @param int $per_page number of ids each page is supposed to contain
+ * @return total number of results
+ */
+ function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
+ {
+ global $config, $db;
+
+ // No keywords? No posts.
+ if (!sizeof($this->split_words))
+ {
+ return false;
+ }
+
+ // generate a search_key from all the options to identify the results
+ $search_key = md5(implode('#', array(
+ implode(',', $this->split_words),
+ $type,
+ $fields,
+ $terms,
+ $sort_days,
+ $sort_key,
+ $topic_id,
+ implode(',', $ex_fid_ary),
+ implode(',', $m_approve_fid_ary),
+ implode(',', $author_ary)
+ )));
+
+ // try reading the results from cache
+ $result_count = 0;
+ if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
+ {
+ return $result_count;
+ }
+
+ $result_count = 0;
+ $id_ary = array();
+
+ $join_topic = ($type == 'posts') ? false : true;
+ // Build sql strings for sorting
+ $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
+ $sql_sort_table = $sql_sort_join = '';
+ switch ($sql_sort[0])
+ {
+ case 'u':
+ $sql_sort_table = USERS_TABLE . ' u, ';
+ $sql_sort_join = ' AND u.user_id = p.poster_id ';
+ break;
+
+ case 't':
+ $join_topic = true;
+ break;
+
+ case 'f':
+ $sql_sort_table = FORUMS_TABLE . ' f, ';
+ $sql_sort_join = ' AND f.forum_id = p.forum_id ';
+ break;
+
+ }
+
+ // Build some display specific sql strings
+ switch ($fields)
+ {
+ case 'titleonly':
+ $sql_match = ' AND m.title_match = 1 AND p.post_id = t.topic_first_post_id';
+ $join_topic = true;
+ break;
+
+ case 'msgonly':
+ $sql_match = ' AND m.title_match = 0';
+ break;
+
+ case 'firstpost':
+ $sql_match = ' AND p.post_id = t.topic_first_post_id';
+ $join_topic = true;
+ break;
+
+ default:
+ $sql_match = '';
+ }
+
+ if (!sizeof($m_approve_fid_ary))
+ {
+ $m_approve_fid_sql = ' AND p.post_approved = 1';
+ }
+ else if ($m_approve_fid_ary === array(-1))
+ {
+ $m_approve_fid_sql = '';
+ }
+ else
+ {
+ $m_approve_fid_sql = ' AND (p.post_approved = 1 OR p.forum_id NOT IN (' . implode(', ', $m_approve_fid_ary) . '))';
+ }
+
+ $sql_select = ($type == 'posts') ? 'm.post_id' : 'DISTINCT t.topic_id';
+ $sql_from = ($join_topic) ? TOPICS_TABLE . ' t, ' : '';
+ $field = ($type == 'posts') ? 'm.post_id' : 't.topic_id';
+ $sql_author = (sizeof($author_ary) == 1) ? ' = ' . $author_ary[0] : 'IN (' . implode(',', $author_ary) . ')';
+
+ $sql_where_options = $sql_sort_join;
+ $sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : '';
+ $sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : '';
+ $sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND p.forum_id NOT IN (' . implode(',', $ex_fid_ary) . ')' : '';
+ $sql_where_options .= $m_approve_fid_sql;
+ $sql_where_options .= (sizeof($author_ary)) ? ' AND p.poster_id ' . $sql_author : '';
+ $sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
+ $sql_where_options .= $sql_match;
+
+ // split the words into three arrays (AND, OR, NOT)
+ $sql_words = array('AND' => array(), 'OR' => array(), 'NOT' => array());
+ $bool = ($terms == 'all') ? 'AND' : 'OR';
+
+ foreach ($this->split_words as $word)
+ {
+ switch ($word)
+ {
+ case '-':
+ $bool = 'NOT';
+ continue;
+ case '+':
+ $bool = 'AND';
+ continue;
+ case '|':
+ $bool = 'OR';
+ continue;
+ default:
+ $bool = ($terms != 'all') ? 'OR' : $bool;
+ $sql_words[$bool][] = "'" . $db->sql_escape(preg_replace('#\*+#', '%', trim($word))) . "'";
+ $bool = ($terms == 'all') ? 'AND' : 'OR';
+ }
+ }
+
+ // Select all post_ids that contain all AND-words
+ $result_ary= array('AND' => array(), 'OR' => array(), 'NOT' => array());
+ if (sizeof($sql_words['AND']))
+ {
+ $sql_in = '';
+ foreach ($sql_words['AND'] as $word)
+ {
+ // first select all post ids that match a word containing a wildcard
+ if (strstr($word, '%'))
+ {
+ $sql = "SELECT $sql_select
+ FROM $sql_from$sql_sort_table" . POSTS_TABLE . ' p, ' . SEARCH_MATCH_TABLE . ' m, ' . SEARCH_WORD_TABLE . " w
+ WHERE w.word_text LIKE $word
+ AND m.word_id = w.word_id
+ AND w.word_common <> 1
+ AND p.post_id = m.post_id
+ $sql_where_options
+ GROUP BY $field
+ ORDER BY $sql_sort";
+ $result = $db->sql_query($sql);
+
+ if (!($row = $db->sql_fetchrow($result)))
+ {
+ $id_ary = array();
+ return false;
+ }
+
+ $ids = array();
+ do
+ {
+ $ids[] = ($type == 'topics') ? $row['topic_id'] : $row['post_id'];
+ }
+ while ($row = $db->sql_fetchrow($result));
+ $db->sql_freeresult($result);
+
+ // remove ids that are not present in all AND-word results
+ if (sizeof($result_ary['AND']))
+ {
+ $result_ary['AND'] = array_intersect($result_ary['AND'], $ids);
+ }
+ else
+ {
+ $result_ary['AND'] = $ids;
+ }
+ unset($ids);
+ }
+ else
+ {
+ $sql_in .= (($sql_in) ? ', ' : '') . $word;
+ }
+ }
+
+ if ($sql_in)
+ {
+ $sql = "SELECT $sql_select, COUNT(DISTINCT m.word_id) as matches, " . $sort_by_sql[$sort_key] . "
+ FROM $sql_from$sql_sort_table" . POSTS_TABLE . ' p, ' . SEARCH_MATCH_TABLE . ' m, ' . SEARCH_WORD_TABLE . " w
+ WHERE w.word_text IN ($sql_in)
+ AND m.word_id = w.word_id
+ AND w.word_common <> 1
+ AND p.post_id = m.post_id
+ $sql_where_options
+ GROUP BY $field, " . $sort_by_sql[$sort_key] . '
+ ORDER BY ' . $sql_sort;
+ $result = $db->sql_query($sql);
+
+ if (!($row = $db->sql_fetchrow($result)))
+ {
+ $id_ary = array();
+ return false;
+ }
+
+ // A little trick so we only need one query: using DISTINCT makes every word unique so if the
+ // number of all words for one post_id equals the number of AND-words it has to contain all
+ // AND-words
+ $ids = array();
+ do
+ {
+ if ($row['matches'] == sizeof($sql_words['AND']))
+ {
+ $ids[] = ($type == 'topics') ? $row['topic_id'] : $row['post_id'];
+ }
+ }
+ while ($row = $db->sql_fetchrow($result));
+ $db->sql_freeresult($result);
+
+ // remove ids that are not present in all AND-word results
+ if (sizeof($result_ary['AND']))
+ {
+ $result_ary['AND'] = array_intersect($result_ary['AND'], $ids);
+ }
+ else
+ {
+ $result_ary['AND'] = $ids;
+ }
+ unset($ids);
+ }
+ }
+
+ // Select all post_ids that contain one of the OR-words
+ if (sizeof($sql_words['OR']))
+ {
+ $sql_where = $sql_in = '';
+ foreach ($sql_words['OR'] as $word)
+ {
+ if (strstr($word, '%'))
+ {
+ $sql_where .= (($sql_where) ? ' OR w.word_text ' : 'w.word_text ') . "LIKE $word";
+ }
+ else
+ {
+ $sql_in .= (($sql_in) ? ', ' : '') . $word;
+ }
+ }
+ $sql_where = ($sql_in) ? $sql_where . (($sql_where) ? ' OR ' : '') . 'w.word_text IN (' . $sql_in . ')' : $sql_where;
+
+ $sql = "SELECT $sql_select
+ FROM $sql_from$sql_sort_table" . POSTS_TABLE . ' p, ' . SEARCH_MATCH_TABLE . ' m, ' . SEARCH_WORD_TABLE . " w
+ WHERE ($sql_where)
+ AND m.word_id = w.word_id
+ AND w.word_common <> 1
+ AND p.post_id = m.post_id
+ $sql_where_options
+ ORDER BY $sql_sort";
+ $result = $db->sql_query($sql);
+
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $result_ary['OR'][] = ($type == 'topics') ? $row['topic_id'] : $row['post_id'];
+ }
+ $db->sql_freeresult($result);
+ }
+
+ // remove post_ids that do not contain any OR-word
+ if (sizeof($result_ary['OR']))
+ {
+ $id_ary = (sizeof($result_ary['AND'])) ? array_intersect($result_ary['AND'], $result_ary['OR']) : $result_ary['OR'];
+ }
+ else
+ {
+ $id_ary = (sizeof($result_ary['AND'])) ? $result_ary['AND'] : array();
+ }
+
+ unset($result_ary['AND']);
+ unset($result_ary['OR']);
+
+ // remove all post_ids that contain a NOT-word
+ if (sizeof($sql_words['NOT']) && sizeof($id_ary))
+ {
+ $sql_where = $sql_in = '';
+ foreach ($sql_words['NOT'] as $word)
+ {
+ if (strstr($word, '%'))
+ {
+ $sql_where .= (($sql_where) ? ' OR w.word_text ' : 'w.word_text ') . "LIKE $word";
+ }
+ else
+ {
+ $sql_in .= (($sql_in) ? ', ' : '') . $word;
+ }
+ }
+ $sql_where = ($sql_in) ? $sql_where . (($sql_where) ? ' OR ' : '') . 'w.word_text IN (' . $sql_in . ')' : $sql_where;
+
+ $sql = "SELECT $sql_select
+ FROM $sql_from" . POSTS_TABLE . ' p, ' . SEARCH_MATCH_TABLE . ' m, ' . SEARCH_WORD_TABLE . " w
+ WHERE ($sql_where)
+ AND m.word_id = w.word_id
+ AND w.word_common <> 1
+ AND p.post_id = m.post_id
+ $sql_where_options";
+ $result = $db->sql_query($sql);
+
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $result_ary['NOT'][] = ($type == 'topics') ? $row['topic_id'] : $row['post_id'];
+ }
+ $db->sql_freeresult($result);
+ }
+
+ if (sizeof($result_ary['NOT']))
+ {
+ $id_ary = (sizeof($id_ary)) ? array_diff($id_ary, $result_ary['NOT']) : array();
+ }
+ unset($result_ary);
+
+ if (!sizeof($id_ary))
+ {
+ return false;
+ }
+
+ $result_count = sizeof($id_ary);
+
+ // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
+ $id_ary = array_slice($id_ary, $start);
+ $this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir);
+ $id_ary = array_slice($id_ary, 0, (int) $per_page);
+
+ return $result_count;
+ }
+
+ /**
+ * Performs a search on an author's posts without caring about message contents. Depends on display specific params
+ *
+ * @param array $id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
+ * @param int $start indicates the first index of the page
+ * @param int $per_page number of ids each page is supposed to contain
+ * @return total number of results
+ */
+ function author_search($type, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
+ {
+ global $config, $db;
+
+ // No author? No posts.
+ if (!sizeof($author_ary))
+ {
+ return 0;
+ }
+
+ // generate a search_key from all the options to identify the results
+ $search_key = md5(implode('#', array(
+ '',
+ $type,
+ '',
+ '',
+ $sort_days,
+ $sort_key,
+ $topic_id,
+ implode(',', $ex_fid_ary),
+ implode(',', $m_approve_fid_ary),
+ implode(',', $author_ary)
+ )));
+
+ // try reading the results from cache
+ $result_count = 0;
+ if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
+ {
+ return $result_count;
+ }
+
+ $id_ary = array();
+
+ // Create some display specific sql strings
+ $sql_author = 'p.poster_id ' . ((sizeof($author_ary) > 1) ? 'IN (' . implode(',', $author_ary) . ')' : '= ' . $author_ary[0]);
+ $sql_fora = (sizeof($ex_fid_ary)) ? ' AND p.forum_id NOT IN (' . implode(',', $ex_fid_ary) . ')' : '';
+ $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
+ $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
+
+ // Build sql strings for sorting
+ $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
+ $sql_sort_table = $sql_sort_join = '';
+ switch ($sql_sort[0])
+ {
+ case 'u':
+ $sql_sort_table = USERS_TABLE . ' u, ';
+ $sql_sort_join = ' AND u.user_id = p.poster_id ';
+ break;
+
+ case 't':
+ $sql_sort_table = ($type == 'posts') ? TOPICS_TABLE . ' t, ' : '';
+ $sql_sort_join = ($type == 'posts') ? ' AND t.topic_id = p.topic_id ' : '';
+ break;
+
+ case 'f':
+ $sql_sort_table = FORUMS_TABLE . ' f, ';
+ $sql_sort_join = ' AND f.forum_id = p.forum_id ';
+ break;
+ }
+
+ if (!sizeof($m_approve_fid_ary))
+ {
+ $m_approve_fid_sql = ' AND p.post_approved = 1';
+ }
+ else if ($m_approve_fid_ary == array(-1))
+ {
+ $m_approve_fid_sql = '';
+ }
+ else
+ {
+ $m_approve_fid_sql = ' AND (p.post_approved = 1 OR p.forum_id IN (' . implode($m_approve_fid_ary) . '))';
+ }
+
+ // If the cache was completely empty count the results
+ if (!$result_count)
+ {
+ if ($type == 'posts')
+ {
+ $sql = 'SELECT COUNT(p.post_id) as result_count
+ FROM ' . POSTS_TABLE . " p
+ WHERE $sql_author
+ $sql_topic_id
+ $m_approve_fid_sql
+ $sql_fora
+ $sql_time";
+ }
+ else
+ {
+ $sql = 'SELECT COUNT(DISTINCT t.topic_id) as result_count
+ FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
+ WHERE $sql_author
+ $sql_topic_id
+ $m_approve_fid_sql
+ $sql_fora
+ AND t.topic_id = p.topic_id
+ $sql_time";
+ }
+ $result = $db->sql_query($sql);
+
+ if ($row = $db->sql_fetchrow())
+ {
+ $result_count = $row['result_count'];
+ }
+ $db->sql_freeresult($result);
+ }
+
+ // Build the query for really selecting the post_ids
+ if ($type == 'posts')
+ {
+ $sql = 'SELECT p.post_id
+ FROM ' . $sql_sort_table . POSTS_TABLE . ' p' . (($topic_id) ? ', ' . TOPICS_TABLE . ' t' : '') . "
+ WHERE $sql_author
+ $sql_topic_id
+ $m_approve_fid_sql
+ $sql_fora
+ $sql_sort_join
+ $sql_time
+ ORDER BY $sql_sort";
+ $field = 'post_id';
+ }
+ else
+ {
+ $sql = 'SELECT t.topic_id
+ FROM ' . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
+ WHERE $sql_author
+ $sql_topic_id
+ $m_approve_fid_sql
+ $sql_fora
+ AND t.topic_id = p.topic_id
+ $sql_sort_join
+ $sql_time
+ GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
+ ORDER BY ' . $sql_sort;
+ $field = 'topic_id';
+ }
+
+ // Only read one block of posts from the db and then cache it
+ $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
+
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $id_ary[] = $row[$field];
+ }
+ $db->sql_freeresult($result);
+
+ if (sizeof($id_ary))
+ {
+ $this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir);
+ $id_ary = array_slice($id_ary, 0, $per_page);
+
+ return $result_count;
+ }
+ return false;
+ }
+
+ /**
+ * Updates wordlist and wordmatch tables when a message is posted or changed
+ *
+ * @param string $mode contains the post mode: edit, post, reply, quote ...
+ */
+ function index($mode, $post_id, &$message, &$subject, $poster_id)
+ {
+ global $config, $db;
+
+ // Is the fulltext indexer disabled? If yes then we need not
+ // carry on ... it's okay ... I know when I'm not wanted boo hoo
+ if (!$config['fulltext_native_load_upd'])
+ {
+ return;
+ }
+
+ // Split old and new post/subject to obtain array of 'words'
+ $split_text = $this->split_message($message);
+ $split_title = ($subject) ? $this->split_message($subject) : array();
+ $cur_words = array('post' => array(), 'title' => array());
+
+ $words = array();
+ if ($mode == 'edit')
+ {
+ $words['add']['post'] = array();
+ $words['add']['title'] = array();
+ $words['del']['post'] = array();
+ $words['del']['title'] = array();
+
+ $sql = 'SELECT w.word_id, w.word_text, m.title_match
+ FROM ' . SEARCH_WORD_TABLE . ' w, ' . SEARCH_MATCH_TABLE . " m
+ WHERE m.post_id = $post_id
+ AND w.word_id = m.word_id";
+ $result = $db->sql_query($sql);
+
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $which = ($row['title_match']) ? 'title' : 'post';
+ $cur_words[$which][$row['word_text']] = $row['word_id'];
+ }
+ $db->sql_freeresult($result);
+
+ $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
+ $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
+ $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
+ $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
+ }
+ else
+ {
+ $words['add']['post'] = $split_text;
+ $words['add']['title'] = $split_title;
+ $words['del']['post'] = array();
+ $words['del']['title'] = array();
+ }
+ unset($split_text);
+ unset($split_title);
+
+ // Get unique words from the above arrays
+ $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
+
+ // We now have unique arrays of all words to be added and removed and
+ // individual arrays of added and removed words for text and title. What
+ // we need to do now is add the new words (if they don't already exist)
+ // and then add (or remove) matches between the words and this post
+ if (sizeof($unique_add_words))
+ {
+ $sql = 'SELECT word_id, word_text
+ FROM ' . SEARCH_WORD_TABLE . '
+ WHERE word_text IN (' . implode(', ', preg_replace('#^(.*)$#', '\'$1\'', $unique_add_words)) . ')';
+ $result = $db->sql_query($sql);
+
+ $word_ids = array();
+ while ($row = $db->sql_fetchrow($result))
+ {
+ $word_ids[$row['word_text']] = $row['word_id'];
+ }
+ $db->sql_freeresult($result);
+
+ $new_words = array_diff($unique_add_words, array_keys($word_ids));
+
+ if (sizeof($new_words))
+ {
+ switch (SQL_LAYER)
+ {
+ case 'mysql':
+ $sql = 'INSERT INTO ' . SEARCH_WORD_TABLE . ' (word_text)
+ VALUES ' . implode(', ', preg_replace('#^(.*)$#', '(\'$1\')', $new_words));
+ $db->sql_query($sql);
+ break;
+
+ case 'mysql4':
+ case 'mysqli':
+ case 'mssql':
+ case 'mssql_odbc':
+ case 'sqlite':
+ $sql = 'INSERT INTO ' . SEARCH_WORD_TABLE . ' (word_text) ' . implode(' UNION ALL ', preg_replace('#^(.*)$#', "SELECT '\$1'", $new_words));
+ $db->sql_query($sql);
+ break;
+
+ default:
+ foreach ($new_words as $word)
+ {
+ $sql = 'INSERT INTO ' . SEARCH_WORD_TABLE . " (word_text)
+ VALUES ('$word')";
+ $db->sql_query($sql);
+ }
+ }
+ }
+ unset($new_words);
+ }
+
+ // now update the search match table, remove links to removed words and add links to new words
+ foreach ($words['del'] as $word_in => $word_ary)
+ {
+ $title_match = ($word_in == 'title') ? 1 : 0;
+
+ if (sizeof($word_ary))
+ {
+ $sql_in = array();
+ foreach ($word_ary as $word)
+ {
+ $sql_in[] = $cur_words[$word_in][$word];
+ }
+
+ $sql = 'DELETE FROM ' . SEARCH_MATCH_TABLE . '
+ WHERE word_id IN (' . implode(', ', $sql_in) . ')
+ AND post_id = ' . intval($post_id) . "
+ AND title_match = $title_match";
+ $db->sql_query($sql);
+ unset($sql_in);
+ }
+ }
+
+ foreach ($words['add'] as $word_in => $word_ary)
+ {
+ $title_match = ($word_in == 'title') ? 1 : 0;
+
+ if (sizeof($word_ary))
+ {
+ $sql = 'INSERT INTO ' . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match)
+ SELECT $post_id, word_id, $title_match
+ FROM " . SEARCH_WORD_TABLE . '
+ WHERE word_text IN (' . implode(', ', preg_replace('#^(.*)$#', '\'$1\'', $word_ary)) . ')';
+ $db->sql_query($sql);
+ }
+ }
+
+ // destroy cached search results containing any of the words removed or added
+ $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['post'])), array($poster_id));
+
+ unset($unique_add_words);
+ unset($words);
+ unset($cur_words);
+ }
+
+ /**
+ * Removes entries from the wordmatch table for the specified post_ids
+ */
+ function index_remove($post_ids, $author_ids)
+ {
+ global $db;
+
+ $sql = 'DELETE FROM ' . SEARCH_MATCH_TABLE . '
+ WHERE post_id IN (' . implode(', ', $post_ids) . ')';
+ $db->sql_query($sql);
+
+ // SEARCH_WORD_TABLE will be updated by tidy()
+
+ $this->destroy_cache(array(), $author_ids);
+ }
+
+ /**
+ * Tidy up indexes: Tag 'common words' and remove
+ * words no longer referenced in the match table
+ */
+ function tidy()
+ {
+ global $db, $config;
+
+ // Is the fulltext indexer disabled? If yes then we need not
+ // carry on ... it's okay ... I know when I'm not wanted boo hoo
+ if (!$config['fulltext_native_load_upd'])
+ {
+ return;
+ }
+
+ $destroy_cache_words = array();
+
+ // Remove common (> 60% of posts ) words
+ if ($config['num_posts'] >= 100)
+ {
+ $sql = 'SELECT word_id
+ FROM ' . SEARCH_MATCH_TABLE . '
+ GROUP BY word_id
+ HAVING COUNT(word_id) > ' . floor($config['num_posts'] * 0.6);
+ $result = $db->sql_query($sql);
+
+ if ($row = $db->sql_fetchrow($result))
+ {
+ $sql_in = array();
+ do
+ {
+ $sql_in[] = $row['word_id'];
+ }
+ while ($row = $db->sql_fetchrow($result));
+
+ $destroy_cache_words = $sql_in;
+
+ $sql_in = implode(', ', $sql_in);
+
+ $sql = 'UPDATE ' . SEARCH_WORD_TABLE . "
+ SET word_common = 1
+ WHERE word_id IN ($sql_in)";
+ $db->sql_query($sql);
+
+ $sql = 'DELETE FROM ' . SEARCH_MATCH_TABLE . "
+ WHERE word_id IN ($sql_in)";
+ $db->sql_query($sql);
+ unset($sql_in);
+ }
+ $db->sql_freeresult($result);
+ }
+
+ // Remove words with no matches ... this is a potentially nasty query
+ $sql = 'SELECT w.word_id
+ FROM ' . SEARCH_WORD_TABLE . ' w
+ LEFT JOIN ' . SEARCH_MATCH_TABLE . ' m ON (w.word_id = m.word_id)
+ WHERE w.word_common = 0 AND m.word_id IS NULL
+ GROUP BY w.word_id';
+ $result = $db->sql_query($sql);
+
+ if ($row = $db->sql_fetchrow($result))
+ {
+ $sql_in = array();
+ do
+ {
+ $sql_in[] = $row['word_id'];
+ }
+ while ($row = $db->sql_fetchrow($result));
+
+ $destroy_cache_words = array_merge($destroy_cache_words, $sql_in);
+
+ $sql = 'DELETE FROM ' . SEARCH_WORD_TABLE . '
+ WHERE word_id IN (' . implode(', ', $sql_in) . ')';
+ $db->sql_query($sql);
+ unset($sql_in);
+ }
+ $db->sql_freeresult($result);
+
+ // destroy cached search results containing any of the words that are now common or were removed
+ $this->destroy_cache(array_unique($destroy_cache_words));
+ }
+
+ /**
+ * Deletes all words from the index
+ */
+ function delete_index($acp_module, $u_action)
+ {
+ global $db;
+
+ $db->sql_query(((SQL_LAYER != 'sqlite') ? 'TRUNCATE ' : 'DELETE FROM ') . SEARCH_WORD_TABLE);
+ $db->sql_query(((SQL_LAYER != 'sqlite') ? 'TRUNCATE ' : 'DELETE FROM ') . SEARCH_MATCH_TABLE);
+ $db->sql_query(((SQL_LAYER != 'sqlite') ? 'TRUNCATE ' : 'DELETE FROM ') . SEARCH_TABLE);
+ }
+
+ /**
+ * Returns true if both FULLTEXT indexes exist
+ */
+ function index_created()
+ {
+ if (!is_array($this->stats))
+ {
+ $this->get_stats();
+ }
+
+ return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
+ }
+
+ /**
+ * Returns an associative array containing information about the indexes
+ */
+ function index_stats()
+ {
+ global $user;
+
+ if (!is_array($this->stats))
+ {
+ $this->get_stats();
+ }
+
+ return array(
+ $user->lang['TOTAL_WORDS'] => $this->stats['total_words'],
+ $user->lang['TOTAL_MATCHES'] => $this->stats['total_matches']);
+ }
+
+ function get_stats()
+ {
+ global $db;
+
+ $sql = 'SELECT COUNT(*) as total_words
+ FROM ' . SEARCH_WORD_TABLE;
+ $result = $db->sql_query($sql);
+ $this->stats['total_words'] = (int) $db->sql_fetchfield('total_words');
+ $db->sql_freeresult($result);
+
+ $sql = 'SELECT COUNT(*) as total_matches
+ FROM ' . SEARCH_MATCH_TABLE;
+ $result = $db->sql_query($sql);
+ $this->stats['total_matches'] = (int) $db->sql_fetchfield('total_matches');
+ $db->sql_freeresult($result);
+ }
+
+ /**
+ * Returns a list of options for the ACP to display
+ */
+ function acp()
+ {
+ global $user, $config;
+
+ $tpl = '
+ <dl>
+ <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
+ <dd><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" />&nbsp;' . $user->lang['YES'] . '&nbsp;&nbsp;<input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" />&nbsp;' . $user->lang['NO'] . '</dd>
+ </dl>
+ <dl>
+ <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
+ <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
+ </dl>
+ <dl>
+ <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
+ <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
+ </dl>
+ ';
+
+ // These are fields required in the config table
+ return array(
+ 'tpl' => $tpl,
+ 'config' => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer', 'fulltext_native_max_chars' => 'integer')
+ );
+ }
+}
+
+?> \ No newline at end of file