diff options
author | Oleg Pudeyev <oleg@bsdpower.com> | 2012-07-27 22:27:26 -0400 |
---|---|---|
committer | Oleg Pudeyev <oleg@bsdpower.com> | 2012-07-27 22:27:26 -0400 |
commit | 7fd1a166da4c040532bb8ff35584b330348366d2 (patch) | |
tree | eb804d8cdea2fee1ef694edbd11b97cd2e4cf3e2 /phpBB/includes | |
parent | d1e56868667234b4d8ea61a79463c3e3e47efa80 (diff) | |
parent | f1729281e6f69202730c0926f3799da516fd3ae9 (diff) | |
download | forums-7fd1a166da4c040532bb8ff35584b330348366d2.tar forums-7fd1a166da4c040532bb8ff35584b330348366d2.tar.gz forums-7fd1a166da4c040532bb8ff35584b330348366d2.tar.bz2 forums-7fd1a166da4c040532bb8ff35584b330348366d2.tar.xz forums-7fd1a166da4c040532bb8ff35584b330348366d2.zip |
Merge PR #865 branch 'dhruvgoel92/feature/sphinx-fulltext-search' into develop
* dhruvgoel92/feature/sphinx-fulltext-search: (57 commits)
[feature/sphinx-fulltext-search] add sphinx to Authors file
[feature/sphinx-fulltext-search] add sphinxapi.php file
[feature/sphinx-fulltext-search] fix auth bug
[feature/sphinx-fulltext-search] remove unused property
[feature/sphinx-fulltext-search] use 9312 as default port
[feature/sphinx-fulltext-search] fix language of host config
[feature/sphinx-fulltext-search] fix sphinx for arbitary host
[feature/sphinx-fulltext-search] coding changes acc to phbb conventions
[feature/sphinx-fulltext-search] fixing comments
[feature/sphinx-fulltext-search] add trailing slash in language
[feature/sphinx-fulltext-search] improve port option
[feature/sphinx-fulltext-search] remove stopwords and config path
[feature/sphinx-fulltext-search] makes sql host configurable
[feature/sphinx-fulltext-search] use readonly instead of disabled
[feature/sphinx-fulltext-search] fix language keys' typo
[feature/sphinx-fulltext-search] remove note from db_tools
[feature/sphinx-fulltext-search] add support for postgres
[feature/sphinx-fulltext-search] add pgsql functionality
[feature/sphinx-fulltext-search] use Update in sphinx query
[feature/sphinx-fulltext-search] use CASE instead of IF
...
Diffstat (limited to 'phpBB/includes')
-rw-r--r-- | phpBB/includes/acp/acp_search.php | 2 | ||||
-rw-r--r-- | phpBB/includes/constants.php | 1 | ||||
-rw-r--r-- | phpBB/includes/db/db_tools.php | 1 | ||||
-rw-r--r-- | phpBB/includes/search/fulltext_sphinx.php | 804 | ||||
-rw-r--r-- | phpBB/includes/search/sphinx/config.php | 288 | ||||
-rw-r--r-- | phpBB/includes/search/sphinx/config_comment.php | 49 | ||||
-rw-r--r-- | phpBB/includes/search/sphinx/config_section.php | 162 | ||||
-rw-r--r-- | phpBB/includes/search/sphinx/config_variable.php | 80 | ||||
-rw-r--r-- | phpBB/includes/sphinxapi.php | 1712 |
9 files changed, 3097 insertions, 2 deletions
diff --git a/phpBB/includes/acp/acp_search.php b/phpBB/includes/acp/acp_search.php index 54a3e7aaa1..82d9b021fe 100644 --- a/phpBB/includes/acp/acp_search.php +++ b/phpBB/includes/acp/acp_search.php @@ -598,7 +598,7 @@ class acp_search { global $phpbb_root_path, $phpEx, $user; - if (!class_exists($type) || !method_exists($type, 'get_name')) + if (!class_exists($type) || !method_exists($type, 'keyword_search')) { $error = $user->lang['NO_SUCH_SEARCH_MODULE']; return $error; diff --git a/phpBB/includes/constants.php b/phpBB/includes/constants.php index 66d2a003c6..68af41ab20 100644 --- a/phpBB/includes/constants.php +++ b/phpBB/includes/constants.php @@ -260,6 +260,7 @@ define('SESSIONS_TABLE', $table_prefix . 'sessions'); define('SESSIONS_KEYS_TABLE', $table_prefix . 'sessions_keys'); define('SITELIST_TABLE', $table_prefix . 'sitelist'); define('SMILIES_TABLE', $table_prefix . 'smilies'); +define('SPHINX_TABLE', $table_prefix . 'sphinx'); define('STYLES_TABLE', $table_prefix . 'styles'); define('STYLES_TEMPLATE_TABLE', $table_prefix . 'styles_template'); define('STYLES_TEMPLATE_DATA_TABLE',$table_prefix . 'styles_template_data'); diff --git a/phpBB/includes/db/db_tools.php b/phpBB/includes/db/db_tools.php index 73eae4e967..6df3aac9ce 100644 --- a/phpBB/includes/db/db_tools.php +++ b/phpBB/includes/db/db_tools.php @@ -20,7 +20,6 @@ if (!defined('IN_PHPBB')) * Currently not supported is returning SQL for creating tables. * * @package dbal -* @note currently not used within phpBB3, but may be utilized later. */ class phpbb_db_tools { diff --git a/phpBB/includes/search/fulltext_sphinx.php b/phpBB/includes/search/fulltext_sphinx.php new file mode 100644 index 0000000000..8371f6b377 --- /dev/null +++ b/phpBB/includes/search/fulltext_sphinx.php @@ -0,0 +1,804 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +/** +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* @ignore +*/ +/** +* This statement is necessary as this file is sometimes included from within a +* function and the variables used are in global space. +*/ +global $phpbb_root_path, $phpEx, $table_prefix; +require($phpbb_root_path . 'includes/sphinxapi.' . $phpEx); + +define('SPHINX_MAX_MATCHES', 20000); +define('SPHINX_CONNECT_RETRIES', 3); +define('SPHINX_CONNECT_WAIT_TIME', 300); + +/** +* fulltext_sphinx +* Fulltext search based on the sphinx search deamon +* @package search +*/ +class phpbb_search_fulltext_sphinx +{ + private $stats = array(); + private $split_words = array(); + private $id; + private $indexes; + private $sphinx; + private $auth; + private $config; + private $db; + private $db_tools; + private $dbtype; + private $user; + private $config_file_data = ''; + public $search_query; + public $common_words = array(); + + /** + * Constructor + * Creates a new phpbb_search_fulltext_postgres, which is used as a search backend. + * + * @param string|bool $error Any error that occurs is passed on through this reference variable otherwise false + */ + public function __construct(&$error) + { + global $config, $db, $user, $auth, $phpbb_root_path, $phpEx; + $this->config = $config; + $this->user = $user; + $this->db = $db; + $this->auth = $auth; + + if (!class_exists('phpbb_db_tools')) + { + require($phpbb_root_path . 'includes/db/db_tools.' . $phpEx); + } + + // Initialize phpbb_db_tools object + $this->db_tools = new phpbb_db_tools($this->db); + + if(!$this->config['fulltext_sphinx_id']) + { + set_config('fulltext_sphinx_id', unique_id()); + } + $this->id = $this->config['fulltext_sphinx_id']; + $this->indexes = 'index_phpbb_' . $this->id . '_delta;index_phpbb_' . $this->id . '_main'; + + $this->sphinx = new SphinxClient(); + + $this->sphinx->SetServer(($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost'), ($this->config['fulltext_sphinx_port'] ? (int) $this->config['fulltext_sphinx_port'] : 9312)); + + $error = false; + } + + /** + * Returns the name of this search backend to be displayed to administrators + * + * @return string Name + * + * @access public + */ + public function get_name() + { + return 'Sphinx Fulltext'; + } + + /** + * Checks permissions and paths, if everything is correct it generates the config file + * + * @return string|bool Language key of the error/incompatiblity encountered, or false if successful + * + * @access public + */ + function init() + { + if ($this->db->sql_layer != 'mysql' && $this->db->sql_layer != 'mysql4' && $this->db->sql_layer != 'mysqli' && $this->db->sql_layer != 'postgres') + { + return $this->user->lang['FULLTEXT_SPHINX_WRONG_DATABASE']; + } + + // Move delta to main index each hour + set_config('search_gc', 3600); + + return false; + } + + /** + * Generates content of sphinx.conf + * + * @return bool True if sphinx.conf content is correctly generated, false otherwise + * + * @access private + */ + function config_generate() + { + global $phpbb_root_path, $phpEx; + + // Check if Database is supported by Sphinx + if ($this->db->sql_layer =='mysql' || $this->db->sql_layer == 'mysql4' || $this->db->sql_layer == 'mysqli') + { + $this->dbtype = 'mysql'; + } + else if ($this->db->sql_layer == 'postgres') + { + $this->dbtype = 'pgsql'; + } + else + { + $this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_WRONG_DATABASE'); + return false; + } + + // Check if directory paths have been filled + if (!$this->config['fulltext_sphinx_data_path']) + { + $this->config_file_data = $this->user->lang('FULLTEXT_SPHINX_NO_CONFIG_DATA'); + return false; + } + + include($phpbb_root_path . 'config.' . $phpEx); + + /* Now that we're sure everything was entered correctly, + generate a config for the index. We use a config value + fulltext_sphinx_id for this, as it should be unique. */ + $config_object = new phpbb_search_sphinx_config($this->config_file_data); + $config_data = array( + 'source source_phpbb_' . $this->id . '_main' => array( + array('type', $this->dbtype), + // This config value sql_host needs to be changed incase sphinx and sql are on different servers + array('sql_host', $dbhost), + array('sql_user', $dbuser), + array('sql_pass', $dbpasswd), + array('sql_db', $dbname), + array('sql_port', $dbport), + array('sql_query_pre', 'SET NAMES \'utf8\''), + array('sql_query_pre', 'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = (SELECT MAX(post_id) FROM ' . POSTS_TABLE . ') WHERE counter_id = 1'), + array('sql_query_range', 'SELECT MIN(post_id), MAX(post_id) FROM ' . POSTS_TABLE . ''), + array('sql_range_step', '5000'), + array('sql_query', 'SELECT + p.post_id AS id, + p.forum_id, + p.topic_id, + p.poster_id, + CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, + p.post_time, + p.post_subject, + p.post_subject as title, + p.post_text as data, + t.topic_last_post_time, + 0 as deleted + FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t + WHERE + p.topic_id = t.topic_id + AND p.post_id >= $start AND p.post_id <= $end'), + array('sql_query_post', ''), + array('sql_query_post_index', 'UPDATE ' . SPHINX_TABLE . ' SET max_doc_id = $maxid WHERE counter_id = 1'), + array('sql_query_info', 'SELECT * FROM ' . POSTS_TABLE . ' WHERE post_id = $id'), + array('sql_attr_uint', 'forum_id'), + array('sql_attr_uint', 'topic_id'), + array('sql_attr_uint', 'poster_id'), + array('sql_attr_bool', 'topic_first_post'), + array('sql_attr_bool', 'deleted'), + array('sql_attr_timestamp' , 'post_time'), + array('sql_attr_timestamp' , 'topic_last_post_time'), + array('sql_attr_str2ordinal', 'post_subject'), + ), + 'source source_phpbb_' . $this->id . '_delta : source_phpbb_' . $this->id . '_main' => array( + array('sql_query_pre', ''), + array('sql_query_range', ''), + array('sql_range_step', ''), + array('sql_query', 'SELECT + p.post_id AS id, + p.forum_id, + p.topic_id, + p.poster_id, + CASE WHEN p.post_id = t.topic_first_post_id THEN 1 ELSE 0 END as topic_first_post, + p.post_time, + p.post_subject, + p.post_subject as title, + p.post_text as data, + t.topic_last_post_time, + 0 as deleted + FROM ' . POSTS_TABLE . ' p, ' . TOPICS_TABLE . ' t + WHERE + p.topic_id = t.topic_id + AND p.post_id >= ( SELECT max_doc_id FROM ' . SPHINX_TABLE . ' WHERE counter_id=1 )'), + ), + 'index index_phpbb_' . $this->id . '_main' => array( + array('path', $this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_main'), + array('source', 'source_phpbb_' . $this->id . '_main'), + array('docinfo', 'extern'), + array('morphology', 'none'), + array('stopwords', ''), + array('min_word_len', '2'), + array('charset_type', 'utf-8'), + array('charset_table', 'U+FF10..U+FF19->0..9, 0..9, U+FF41..U+FF5A->a..z, U+FF21..U+FF3A->a..z, A..Z->a..z, a..z, U+0149, U+017F, U+0138, U+00DF, U+00FF, U+00C0..U+00D6->U+00E0..U+00F6, U+00E0..U+00F6, U+00D8..U+00DE->U+00F8..U+00FE, U+00F8..U+00FE, U+0100->U+0101, U+0101, U+0102->U+0103, U+0103, U+0104->U+0105, U+0105, U+0106->U+0107, U+0107, U+0108->U+0109, U+0109, U+010A->U+010B, U+010B, U+010C->U+010D, U+010D, U+010E->U+010F, U+010F, U+0110->U+0111, U+0111, U+0112->U+0113, U+0113, U+0114->U+0115, U+0115, U+0116->U+0117, U+0117, U+0118->U+0119, U+0119, U+011A->U+011B, U+011B, U+011C->U+011D, U+011D, U+011E->U+011F, U+011F, U+0130->U+0131, U+0131, U+0132->U+0133, U+0133, U+0134->U+0135, U+0135, U+0136->U+0137, U+0137, U+0139->U+013A, U+013A, U+013B->U+013C, U+013C, U+013D->U+013E, U+013E, U+013F->U+0140, U+0140, U+0141->U+0142, U+0142, U+0143->U+0144, U+0144, U+0145->U+0146, U+0146, U+0147->U+0148, U+0148, U+014A->U+014B, U+014B, U+014C->U+014D, U+014D, U+014E->U+014F, U+014F, U+0150->U+0151, U+0151, U+0152->U+0153, U+0153, U+0154->U+0155, U+0155, U+0156->U+0157, U+0157, U+0158->U+0159, U+0159, U+015A->U+015B, U+015B, U+015C->U+015D, U+015D, U+015E->U+015F, U+015F, U+0160->U+0161, U+0161, U+0162->U+0163, U+0163, U+0164->U+0165, U+0165, U+0166->U+0167, U+0167, U+0168->U+0169, U+0169, U+016A->U+016B, U+016B, U+016C->U+016D, U+016D, U+016E->U+016F, U+016F, U+0170->U+0171, U+0171, U+0172->U+0173, U+0173, U+0174->U+0175, U+0175, U+0176->U+0177, U+0177, U+0178->U+00FF, U+00FF, U+0179->U+017A, U+017A, U+017B->U+017C, U+017C, U+017D->U+017E, U+017E, U+0410..U+042F->U+0430..U+044F, U+0430..U+044F, U+4E00..U+9FFF'), + array('min_prefix_len', '0'), + array('min_infix_len', '0'), + ), + 'index index_phpbb_' . $this->id . '_delta : index_phpbb_' . $this->id . '_main' => array( + array('path', $this->config['fulltext_sphinx_data_path'] . 'index_phpbb_' . $this->id . '_delta'), + array('source', 'source_phpbb_' . $this->id . '_delta'), + ), + 'indexer' => array( + array('mem_limit', $this->config['fulltext_sphinx_indexer_mem_limit'] . 'M'), + ), + 'searchd' => array( + array('compat_sphinxql_magics' , '0'), + array('listen' , ($this->config['fulltext_sphinx_host'] ? $this->config['fulltext_sphinx_host'] : 'localhost') . ':' . ($this->config['fulltext_sphinx_port'] ? $this->config['fulltext_sphinx_port'] : '9312')), + array('log', $this->config['fulltext_sphinx_data_path'] . 'log/searchd.log'), + array('query_log', $this->config['fulltext_sphinx_data_path'] . 'log/sphinx-query.log'), + array('read_timeout', '5'), + array('max_children', '30'), + array('pid_file', $this->config['fulltext_sphinx_data_path'] . 'searchd.pid'), + array('max_matches', (string) SPHINX_MAX_MATCHES), + array('binlog_path', $this->config['fulltext_sphinx_data_path']), + ), + ); + + $non_unique = array('sql_query_pre' => true, 'sql_attr_uint' => true, 'sql_attr_timestamp' => true, 'sql_attr_str2ordinal' => true, 'sql_attr_bool' => true); + $delete = array('sql_group_column' => true, 'sql_date_column' => true, 'sql_str2ordinal_column' => true); + foreach ($config_data as $section_name => $section_data) + { + $section = $config_object->get_section_by_name($section_name); + if (!$section) + { + $section = $config_object->add_section($section_name); + } + + foreach ($delete as $key => $void) + { + $section->delete_variables_by_name($key); + } + + foreach ($non_unique as $key => $void) + { + $section->delete_variables_by_name($key); + } + + foreach ($section_data as $entry) + { + $key = $entry[0]; + $value = $entry[1]; + + if (!isset($non_unique[$key])) + { + $variable = $section->get_variable_by_name($key); + if (!$variable) + { + $variable = $section->create_variable($key, $value); + } + else + { + $variable->set_value($value); + } + } + else + { + $variable = $section->create_variable($key, $value); + } + } + } + $this->config_file_data = $config_object->get_data(); + + return true; + } + + /** + * Splits keywords entered by a user into an array of words stored in $this->split_words + * Stores the tidied search query in $this->search_query + * + * @param string $keywords Contains the keyword as entered by the user + * @param string $terms is either 'all' or 'any' + * @return false if no valid keywords were found and otherwise true + * + * @access public + */ + function split_keywords(&$keywords, $terms) + { + if ($terms == 'all') + { + $match = array('#\sand\s#i', '#\sor\s#i', '#\snot\s#i', '#\+#', '#-#', '#\|#', '#@#'); + $replace = array(' & ', ' | ', ' - ', ' +', ' -', ' |', ''); + + $replacements = 0; + $keywords = preg_replace($match, $replace, $keywords); + $this->sphinx->SetMatchMode(SPH_MATCH_EXTENDED); + } + else + { + $this->sphinx->SetMatchMode(SPH_MATCH_ANY); + } + + // Keep quotes and new lines + $keywords = str_replace(array('"', "\n"), array('"', ' '), trim($keywords)); + + if (strlen($keywords) > 0) + { + $this->search_query = str_replace('"', '"', $keywords); + return true; + } + + return false; + } + + /** + * Performs a search on keywords depending on display specific params. You have to run split_keywords() first. + * + * @param string $type contains either posts or topics depending on what should be searched for + * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) + * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) + * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query + * @param string $sort_key is the key of $sort_by_sql for the selected sorting + * @param string $sort_dir is either a or d representing ASC and DESC + * @param string $sort_days specifies the maximum amount of days a post may be old + * @param array $ex_fid_ary specifies an array of forum ids which should not be searched + * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts + * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched + * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty + * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match + * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered + * @param int $start indicates the first index of the page + * @param int $per_page number of ids each page is supposed to contain + * @return boolean|int total number of results + * + * @access public + */ + function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) + { + // No keywords? No posts. + if (!strlen($this->search_query) && !sizeof($author_ary)) + { + return false; + } + + $id_ary = array(); + + $join_topic = ($type != 'posts'); + + // Sorting + + if ($type == 'topics') + { + switch ($sort_key) + { + case 'a': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'poster_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + + case 'f': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'forum_id ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + + case 'i': + + case 's': + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'post_subject ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + + case 't': + + default: + $this->sphinx->SetGroupBy('topic_id', SPH_GROUPBY_ATTR, 'topic_last_post_time ' . (($sort_dir == 'a') ? 'ASC' : 'DESC')); + break; + } + } + else + { + switch ($sort_key) + { + case 'a': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'poster_id'); + break; + + case 'f': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'forum_id'); + break; + + case 'i': + + case 's': + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_subject'); + break; + + case 't': + + default: + $this->sphinx->SetSortMode(($sort_dir == 'a') ? SPH_SORT_ATTR_ASC : SPH_SORT_ATTR_DESC, 'post_time'); + break; + } + } + + // Most narrow filters first + if ($topic_id) + { + $this->sphinx->SetFilter('topic_id', array($topic_id)); + } + + $search_query_prefix = ''; + + switch ($fields) + { + case 'titleonly': + // Only search the title + if ($terms == 'all') + { + $search_query_prefix = '@title '; + } + // Weight for the title + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); + // 1 is first_post, 0 is not first post + $this->sphinx->SetFilter('topic_first_post', array(1)); + break; + + case 'msgonly': + // Only search the body + if ($terms == 'all') + { + $search_query_prefix = '@data '; + } + // Weight for the body + $this->sphinx->SetFieldWeights(array("title" => 1, "data" => 5)); + break; + + case 'firstpost': + // More relative weight for the title, also search the body + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); + // 1 is first_post, 0 is not first post + $this->sphinx->SetFilter('topic_first_post', array(1)); + break; + + default: + // More relative weight for the title, also search the body + $this->sphinx->SetFieldWeights(array("title" => 5, "data" => 1)); + break; + } + + if (sizeof($author_ary)) + { + $this->sphinx->SetFilter('poster_id', $author_ary); + } + + if (sizeof($ex_fid_ary)) + { + // All forums that a user is allowed to access + $fid_ary = array_unique(array_intersect(array_keys($this->auth->acl_getf('f_read', true)), array_keys($this->auth->acl_getf('f_search', true)))); + // All forums that the user wants to and can search in + $search_forums = array_diff($fid_ary, $ex_fid_ary); + + if (sizeof($search_forums)) + { + $this->sphinx->SetFilter('forum_id', $search_forums); + } + } + + $this->sphinx->SetFilter('deleted', array(0)); + + $this->sphinx->SetLimits($start, (int) $per_page, SPHINX_MAX_MATCHES); + $result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + + // Could be connection to localhost:9312 failed (errno=111, + // msg=Connection refused) during rotate, retry if so + $retries = SPHINX_CONNECT_RETRIES; + while (!$result && (strpos($this->sphinx->_error, "errno=111,") !== false) && $retries--) + { + usleep(SPHINX_CONNECT_WAIT_TIME); + $result = $this->sphinx->Query($search_query_prefix . str_replace('"', '"', $this->search_query), $this->indexes); + } + + $id_ary = array(); + if (isset($result['matches'])) + { + if ($type == 'posts') + { + $id_ary = array_keys($result['matches']); + } + else + { + foreach ($result['matches'] as $key => $value) + { + $id_ary[] = $value['attrs']['topic_id']; + } + } + } + else + { + return false; + } + + $result_count = $result['total_found']; + + $id_ary = array_slice($id_ary, 0, (int) $per_page); + + return $result_count; + } + + /** + * Performs a search on an author's posts without caring about message contents. Depends on display specific params + * + * @param string $type contains either posts or topics depending on what should be searched for + * @param boolean $firstpost_only if true, only topic starting posts will be considered + * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query + * @param string $sort_key is the key of $sort_by_sql for the selected sorting + * @param string $sort_dir is either a or d representing ASC and DESC + * @param string $sort_days specifies the maximum amount of days a post may be old + * @param array $ex_fid_ary specifies an array of forum ids which should not be searched + * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts + * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched + * @param array $author_ary an array of author ids + * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match + * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered + * @param int $start indicates the first index of the page + * @param int $per_page number of ids each page is supposed to contain + * @return boolean|int total number of results + * + * @access public + */ + function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) + { + $this->search_query = ''; + + $this->sphinx->SetMatchMode(SPH_MATCH_FULLSCAN); + $fields = ($firstpost_only) ? 'firstpost' : 'all'; + $terms = 'all'; + return $this->keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, $id_ary, $start, $per_page); + } + + /** + * Updates wordlist and wordmatch tables when a message is posted or changed + * + * @param string $mode Contains the post mode: edit, post, reply, quote + * @param int $post_id The id of the post which is modified/created + * @param string &$message New or updated post content + * @param string &$subject New or updated post subject + * @param int $poster_id Post author's user id + * @param int $forum_id The id of the forum in which the post is located + * + * @access public + */ + function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) + { + if ($mode == 'edit') + { + $this->sphinx->UpdateAttributes($this->indexes, array('forum_id', 'poster_id'), array((int)$post_id => array((int)$forum_id, (int)$poster_id))); + } + else if ($mode != 'post' && $post_id) + { + // Update topic_last_post_time for full topic + $sql_array = array( + 'SELECT' => 'p1.post_id', + 'FROM' => array( + POSTS_TABLE => 'p1', + ), + 'LEFT_JOIN' => array(array( + 'FROM' => array( + POSTS_TABLE => 'p2' + ), + 'ON' => 'p1.topic_id = p2.topic_id', + )), + ); + + $sql = $this->db->sql_build_query('SELECT', $sql_array); + $result = $this->db->sql_query($sql); + + $post_updates = array(); + $post_time = time(); + while ($row = $this->db->sql_fetchrow($result)) + { + $post_updates[(int)$row['post_id']] = array($post_time); + } + $this->db->sql_freeresult($result); + + if (sizeof($post_updates)) + { + $this->sphinx->UpdateAttributes($this->indexes, array('topic_last_post_time'), $post_updates); + } + } + } + + /** + * Delete a post from the index after it was deleted + * + * @access public + */ + function index_remove($post_ids, $author_ids, $forum_ids) + { + $values = array(); + foreach ($post_ids as $post_id) + { + $values[$post_id] = array(1); + } + + $this->sphinx->UpdateAttributes($this->indexes, array('deleted'), $values); + } + + /** + * Nothing needs to be destroyed + * + * @access public + */ + function tidy($create = false) + { + set_config('search_last_gc', time(), true); + } + + /** + * Create sphinx table + * + * @return string|bool error string is returned incase of errors otherwise false + * + * @access public + */ + function create_index($acp_module, $u_action) + { + if (!$this->index_created()) + { + $table_data = array( + 'COLUMNS' => array( + 'counter_id' => array('UINT', 0), + 'max_doc_id' => array('UINT', 0), + ), + 'PRIMARY_KEY' => 'counter_id', + ); + $this->db_tools->sql_create_table(SPHINX_TABLE, $table_data); + + $sql = 'TRUNCATE TABLE ' . SPHINX_TABLE; + $this->db->sql_query($sql); + + $data = array( + 'counter_id' => '1', + 'max_doc_id' => '0', + ); + $sql = 'INSERT INTO ' . SPHINX_TABLE . ' ' . $this->db->sql_build_array('INSERT', $data); + $this->db->sql_query($sql); + } + + return false; + } + + /** + * Drop sphinx table + * + * @return string|bool error string is returned incase of errors otherwise false + * + * @access public + */ + function delete_index($acp_module, $u_action) + { + if (!$this->index_created()) + { + return false; + } + + $this->db_tools->sql_table_drop(SPHINX_TABLE); + + return false; + } + + /** + * Returns true if the sphinx table was created + * + * @return bool true if sphinx table was created + * + * @access public + */ + function index_created($allow_new_files = true) + { + $created = false; + + if ($this->db_tools->sql_table_exists(SPHINX_TABLE)) + { + $created = true; + } + + return $created; + } + + /** + * Returns an associative array containing information about the indexes + * + * @return string|bool Language string of error false otherwise + * + * @access public + */ + function index_stats() + { + if (empty($this->stats)) + { + $this->get_stats(); + } + + return array( + $this->user->lang['FULLTEXT_SPHINX_MAIN_POSTS'] => ($this->index_created()) ? $this->stats['main_posts'] : 0, + $this->user->lang['FULLTEXT_SPHINX_DELTA_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] - $this->stats['main_posts'] : 0, + $this->user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0, + ); + } + + /** + * Collects stats that can be displayed on the index maintenance page + * + * @access private + */ + function get_stats() + { + if ($this->index_created()) + { + $sql = 'SELECT COUNT(post_id) as total_posts + FROM ' . POSTS_TABLE; + $result = $this->db->sql_query($sql); + $this->stats['total_posts'] = (int) $this->db->sql_fetchfield('total_posts'); + $this->db->sql_freeresult($result); + + $sql = 'SELECT COUNT(p.post_id) as main_posts + FROM ' . POSTS_TABLE . ' p, ' . SPHINX_TABLE . ' m + WHERE p.post_id <= m.max_doc_id + AND m.counter_id = 1'; + $result = $this->db->sql_query($sql); + $this->stats['main_posts'] = (int) $this->db->sql_fetchfield('main_posts'); + $this->db->sql_freeresult($result); + } + } + + /** + * Returns a list of options for the ACP to display + * + * @return associative array containing template and config variables + * + * @access public + */ + function acp() + { + $config_vars = array( + 'fulltext_sphinx_data_path' => 'string', + 'fulltext_sphinx_host' => 'string', + 'fulltext_sphinx_port' => 'string', + 'fulltext_sphinx_indexer_mem_limit' => 'int', + ); + + $tpl = ' + <span class="error">' . $this->user->lang['FULLTEXT_SPHINX_CONFIGURE']. '</span> + <dl> + <dt><label for="fulltext_sphinx_data_path">' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH'] . ':</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_DATA_PATH_EXPLAIN'] . '</span></dt> + <dd><input id="fulltext_sphinx_data_path" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_data_path]" value="' . $this->config['fulltext_sphinx_data_path'] . '" /></dd> + </dl> + <dl> + <dt><label for="fulltext_sphinx_host">' . $this->user->lang['FULLTEXT_SPHINX_HOST'] . ':</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_HOST_EXPLAIN'] . '</span></dt> + <dd><input id="fulltext_sphinx_host" type="text" size="40" maxlength="255" name="config[fulltext_sphinx_host]" value="' . $this->config['fulltext_sphinx_host'] . '" /></dd> + </dl> + <dl> + <dt><label for="fulltext_sphinx_port">' . $this->user->lang['FULLTEXT_SPHINX_PORT'] . ':</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_PORT_EXPLAIN'] . '</span></dt> + <dd><input id="fulltext_sphinx_port" type="text" size="4" maxlength="10" name="config[fulltext_sphinx_port]" value="' . $this->config['fulltext_sphinx_port'] . '" /></dd> + </dl> + <dl> + <dt><label for="fulltext_sphinx_indexer_mem_limit">' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT'] . ':</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_INDEXER_MEM_LIMIT_EXPLAIN'] . '</span></dt> + <dd><input id="fulltext_sphinx_indexer_mem_limit" type="text" size="4" maxlength="10" name="config[fulltext_sphinx_indexer_mem_limit]" value="' . $this->config['fulltext_sphinx_indexer_mem_limit'] . '" />' . $this->user->lang['MIB'] . '</dd> + </dl> + <dl> + <dt><label for="fulltext_sphinx_config_file">' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE'] . ':</label><br /><span>' . $this->user->lang['FULLTEXT_SPHINX_CONFIG_FILE_EXPLAIN'] . '</dt> + <dd>' . (($this->config_generate()) ? '<textarea readonly="readonly" rows="6">' . $this->config_file_data . '</textarea>' : $this->config_file_data) . '</dd> + <dl> + '; + + // These are fields required in the config table + return array( + 'tpl' => $tpl, + 'config' => $config_vars + ); + } +} diff --git a/phpBB/includes/search/sphinx/config.php b/phpBB/includes/search/sphinx/config.php new file mode 100644 index 0000000000..f1864f0c8c --- /dev/null +++ b/phpBB/includes/search/sphinx/config.php @@ -0,0 +1,288 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_search_sphinx_config +* An object representing the sphinx configuration +* Can read it from file and write it back out after modification +* @package search +*/ +class phpbb_search_sphinx_config +{ + private $sections = array(); + + /** + * Constructor which optionally loads data from a variable + * + * @param string $config_data Variable containing the sphinx configuration data + * + * @access public + */ + function __construct($config_data) + { + if ($config_data != '') + { + $this->read($config_data); + } + } + + /** + * Get a section object by its name + * + * @param string $name The name of the section that shall be returned + * @return phpbb_search_sphinx_config_section The section object or null if none was found + * + * @access public + */ + function get_section_by_name($name) + { + for ($i = 0, $size = sizeof($this->sections); $i < $size; $i++) + { + // Make sure this is really a section object and not a comment + if (($this->sections[$i] instanceof phpbb_search_sphinx_config_section) && $this->sections[$i]->get_name() == $name) + { + return $this->sections[$i]; + } + } + } + + /** + * Appends a new empty section to the end of the config + * + * @param string $name The name for the new section + * @return phpbb_search_sphinx_config_section The newly created section object + * + * @access public + */ + function add_section($name) + { + $this->sections[] = new phpbb_search_sphinx_config_section($name, ''); + return $this->sections[sizeof($this->sections) - 1]; + } + + /** + * Reads the config file data + * + * @param string $config_data The config file data + * + * @access private + */ + function read($config_data) + { + $this->sections = array(); + + $section = null; + $found_opening_bracket = false; + $in_value = false; + + foreach ($config_data as $i => $line) + { + // If the value of a variable continues to the next line because the line + // break was escaped then we don't trim leading space but treat it as a part of the value + if ($in_value) + { + $line = rtrim($line); + } + else + { + $line = trim($line); + } + + // If we're not inside a section look for one + if (!$section) + { + // Add empty lines and comments as comment objects to the section list + // that way they're not deleted when reassembling the file from the sections + if (!$line || $line[0] == '#') + { + $this->sections[] = new phpbb_search_sphinx_config_comment($config_file[$i]); + continue; + } + else + { + // Otherwise we scan the line reading the section name until we find + // an opening curly bracket or a comment + $section_name = ''; + $section_name_comment = ''; + $found_opening_bracket = false; + for ($j = 0, $length = strlen($line); $j < $length; $j++) + { + if ($line[$j] == '#') + { + $section_name_comment = substr($line, $j); + break; + } + + if ($found_opening_bracket) + { + continue; + } + + if ($line[$j] == '{') + { + $found_opening_bracket = true; + continue; + } + + $section_name .= $line[$j]; + } + + // And then we create the new section object + $section_name = trim($section_name); + $section = new phpbb_search_sphinx_config_section($section_name, $section_name_comment); + } + } + else + { + // If we're looking for variables inside a section + $skip_first = false; + + // If we're not in a value continuing over the line feed + if (!$in_value) + { + // Then add empty lines and comments as comment objects to the variable list + // of this section so they're not deleted on reassembly + if (!$line || $line[0] == '#') + { + $section->add_variable(new phpbb_search_sphinx_config_comment($config_file[$i])); + continue; + } + + // As long as we haven't yet actually found an opening bracket for this section + // we treat everything as comments so it's not deleted either + if (!$found_opening_bracket) + { + if ($line[0] == '{') + { + $skip_first = true; + $line = substr($line, 1); + $found_opening_bracket = true; + } + else + { + $section->add_variable(new phpbb_search_sphinx_config_comment($config_file[$i])); + continue; + } + } + } + + // If we did not find a comment in this line or still add to the previous + // line's value ... + if ($line || $in_value) + { + if (!$in_value) + { + $name = ''; + $value = ''; + $comment = ''; + $found_assignment = false; + } + $in_value = false; + $end_section = false; + + /* ... then we should prase this line char by char: + - first there's the variable name + - then an equal sign + - the variable value + - possibly a backslash before the linefeed in this case we need to continue + parsing the value in the next line + - a # indicating that the rest of the line is a comment + - a closing curly bracket indicating the end of this section*/ + for ($j = 0, $length = strlen($line); $j < $length; $j++) + { + if ($line[$j] == '#') + { + $comment = substr($line, $j); + break; + } + else if ($line[$j] == '}') + { + $comment = substr($line, $j + 1); + $end_section = true; + break; + } + else if (!$found_assignment) + { + if ($line[$j] == '=') + { + $found_assignment = true; + } + else + { + $name .= $line[$j]; + } + } + else + { + if ($line[$j] == '\\' && $j == $length - 1) + { + $value .= "\n"; + $in_value = true; + // Go to the next line and keep processing the value in there + continue 2; + } + $value .= $line[$j]; + } + } + + // If a name and an equal sign were found then we have append a + // new variable object to the section + if ($name && $found_assignment) + { + $section->add_variable(new phpbb_search_sphinx_config_variable(trim($name), trim($value), ($end_section) ? '' : $comment)); + continue; + } + + /* If we found a closing curly bracket this section has been completed + and we can append it to the section list and continue with looking for + the next section */ + if ($end_section) + { + $section->set_end_comment($comment); + $this->sections[] = $section; + $section = null; + continue; + } + } + + // If we did not find anything meaningful up to here, then just treat it + // as a comment + $comment = ($skip_first) ? "\t" . substr(ltrim($config_file[$i]), 1) : $config_file[$i]; + $section->add_variable(new phpbb_search_sphinx_config_comment($comment)); + } + } + + } + + /** + * Returns the config data + * + * @return string $data The config data that is generated + * + * @access public + */ + function get_data() + { + $data = ""; + foreach ($this->sections as $section) + { + $data .= $section->to_string(); + } + + return $data; + } +} diff --git a/phpBB/includes/search/sphinx/config_comment.php b/phpBB/includes/search/sphinx/config_comment.php new file mode 100644 index 0000000000..7f695dbf0c --- /dev/null +++ b/phpBB/includes/search/sphinx/config_comment.php @@ -0,0 +1,49 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_search_sphinx_config_comment +* Represents a comment inside the sphinx configuration +*/ +class phpbb_search_sphinx_config_comment +{ + private $exact_string; + + /** + * Create a new comment + * + * @param string $exact_string The content of the comment including newlines, leading whitespace, etc. + * + * @access public + */ + function __construct($exact_string) + { + $this->exact_string = $exact_string; + } + + /** + * Simply returns the comment as it was created + * + * @return string The exact string that was specified in the constructor + * + * @access public + */ + function to_string() + { + return $this->exact_string; + } +} diff --git a/phpBB/includes/search/sphinx/config_section.php b/phpBB/includes/search/sphinx/config_section.php new file mode 100644 index 0000000000..79c9c8563d --- /dev/null +++ b/phpBB/includes/search/sphinx/config_section.php @@ -0,0 +1,162 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_search_sphinx_config_section +* Represents a single section inside the sphinx configuration +*/ +class phpbb_search_sphinx_config_section +{ + private $name; + private $comment; + private $end_comment; + private $variables = array(); + + /** + * Construct a new section + * + * @param string $name Name of the section + * @param string $comment Comment that should be appended after the name in the + * textual format. + * + * @access public + */ + function __construct($name, $comment) + { + $this->name = $name; + $this->comment = $comment; + $this->end_comment = ''; + } + + /** + * Add a variable object to the list of variables in this section + * + * @param phpbb_search_sphinx_config_variable $variable The variable object + * + * @access public + */ + function add_variable($variable) + { + $this->variables[] = $variable; + } + + /** + * Adds a comment after the closing bracket in the textual representation + * + * @param string $end_comment + * + * @access public + */ + function set_end_comment($end_comment) + { + $this->end_comment = $end_comment; + } + + /** + * Getter for the name of this section + * + * @return string Section's name + * + * @access public + */ + function get_name() + { + return $this->name; + } + + /** + * Get a variable object by its name + * + * @param string $name The name of the variable that shall be returned + * @return phpbb_search_sphinx_config_section The first variable object from this section with the + * given name or null if none was found + * + * @access public + */ + function get_variable_by_name($name) + { + for ($i = 0, $size = sizeof($this->variables); $i < $size; $i++) + { + // Make sure this is a variable object and not a comment + if (($this->variables[$i] instanceof phpbb_search_sphinx_config_variable) && $this->variables[$i]->get_name() == $name) + { + return $this->variables[$i]; + } + } + } + + /** + * Deletes all variables with the given name + * + * @param string $name The name of the variable objects that are supposed to be removed + * + * @access public + */ + function delete_variables_by_name($name) + { + for ($i = 0, $size = sizeof($this->variables); $i < $size; $i++) + { + // Make sure this is a variable object and not a comment + if (($this->variables[$i] instanceof phpbb_search_sphinx_config_variable) && $this->variables[$i]->get_name() == $name) + { + array_splice($this->variables, $i, 1); + $i--; + } + } + } + + /** + * Create a new variable object and append it to the variable list of this section + * + * @param string $name The name for the new variable + * @param string $value The value for the new variable + * @return phpbb_search_sphinx_config_variable Variable object that was created + * + * @access public + */ + function create_variable($name, $value) + { + $this->variables[] = new phpbb_search_sphinx_config_variable($name, $value, ''); + return $this->variables[sizeof($this->variables) - 1]; + } + + /** + * Turns this object into a string which can be written to a config file + * + * @return string Config data in textual form, parsable for sphinx + * + * @access public + */ + function to_string() + { + $content = $this->name . ' ' . $this->comment . "\n{\n"; + + // Make sure we don't get too many newlines after the opening bracket + while (trim($this->variables[0]->to_string()) == '') + { + array_shift($this->variables); + } + + foreach ($this->variables as $variable) + { + $content .= $variable->to_string(); + } + $content .= '}' . $this->end_comment . "\n"; + + return $content; + } +} diff --git a/phpBB/includes/search/sphinx/config_variable.php b/phpBB/includes/search/sphinx/config_variable.php new file mode 100644 index 0000000000..35abe281cb --- /dev/null +++ b/phpBB/includes/search/sphinx/config_variable.php @@ -0,0 +1,80 @@ +<?php +/** +* +* @package search +* @copyright (c) 2005 phpBB Group +* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License v2 +* +*/ + +/** +* @ignore +*/ +if (!defined('IN_PHPBB')) +{ + exit; +} + +/** +* phpbb_search_sphinx_config_variable +* Represents a single variable inside the sphinx configuration +*/ +class phpbb_search_sphinx_config_variable +{ + private $name; + private $value; + private $comment; + + /** + * Constructs a new variable object + * + * @param string $name Name of the variable + * @param string $value Value of the variable + * @param string $comment Optional comment after the variable in the + * config file + * + * @access public + */ + function __construct($name, $value, $comment) + { + $this->name = $name; + $this->value = $value; + $this->comment = $comment; + } + + /** + * Getter for the variable's name + * + * @return string The variable object's name + * + * @access public + */ + function get_name() + { + return $this->name; + } + + /** + * Allows changing the variable's value + * + * @param string $value New value for this variable + * + * @access public + */ + function set_value($value) + { + $this->value = $value; + } + + /** + * Turns this object into a string readable by sphinx + * + * @return string Config data in textual form + * + * @access public + */ + function to_string() + { + return "\t" . $this->name . ' = ' . str_replace("\n", "\\\n", $this->value) . ' ' . $this->comment . "\n"; + } +} diff --git a/phpBB/includes/sphinxapi.php b/phpBB/includes/sphinxapi.php new file mode 100644 index 0000000000..bd83b1d2e0 --- /dev/null +++ b/phpBB/includes/sphinxapi.php @@ -0,0 +1,1712 @@ +<?php
+
+//
+// $Id: sphinxapi.php 3087 2012-01-30 23:07:35Z shodan $
+//
+
+//
+// Copyright (c) 2001-2012, Andrew Aksyonoff
+// Copyright (c) 2008-2012, Sphinx Technologies Inc
+// All rights reserved
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License. You should have
+// received a copy of the GPL license along with this program; if you
+// did not, you can find it at http://www.gnu.org/
+//
+
+/////////////////////////////////////////////////////////////////////////////
+// PHP version of Sphinx searchd client (PHP API)
+/////////////////////////////////////////////////////////////////////////////
+
+/// known searchd commands
+define ( "SEARCHD_COMMAND_SEARCH", 0 );
+define ( "SEARCHD_COMMAND_EXCERPT", 1 );
+define ( "SEARCHD_COMMAND_UPDATE", 2 );
+define ( "SEARCHD_COMMAND_KEYWORDS", 3 );
+define ( "SEARCHD_COMMAND_PERSIST", 4 );
+define ( "SEARCHD_COMMAND_STATUS", 5 );
+define ( "SEARCHD_COMMAND_FLUSHATTRS", 7 );
+
+/// current client-side command implementation versions
+define ( "VER_COMMAND_SEARCH", 0x119 );
+define ( "VER_COMMAND_EXCERPT", 0x104 );
+define ( "VER_COMMAND_UPDATE", 0x102 );
+define ( "VER_COMMAND_KEYWORDS", 0x100 );
+define ( "VER_COMMAND_STATUS", 0x100 );
+define ( "VER_COMMAND_QUERY", 0x100 );
+define ( "VER_COMMAND_FLUSHATTRS", 0x100 );
+
+/// known searchd status codes
+define ( "SEARCHD_OK", 0 );
+define ( "SEARCHD_ERROR", 1 );
+define ( "SEARCHD_RETRY", 2 );
+define ( "SEARCHD_WARNING", 3 );
+
+/// known match modes
+define ( "SPH_MATCH_ALL", 0 );
+define ( "SPH_MATCH_ANY", 1 );
+define ( "SPH_MATCH_PHRASE", 2 );
+define ( "SPH_MATCH_BOOLEAN", 3 );
+define ( "SPH_MATCH_EXTENDED", 4 );
+define ( "SPH_MATCH_FULLSCAN", 5 );
+define ( "SPH_MATCH_EXTENDED2", 6 ); // extended engine V2 (TEMPORARY, WILL BE REMOVED)
+
+/// known ranking modes (ext2 only)
+define ( "SPH_RANK_PROXIMITY_BM25", 0 ); ///< default mode, phrase proximity major factor and BM25 minor one
+define ( "SPH_RANK_BM25", 1 ); ///< statistical mode, BM25 ranking only (faster but worse quality)
+define ( "SPH_RANK_NONE", 2 ); ///< no ranking, all matches get a weight of 1
+define ( "SPH_RANK_WORDCOUNT", 3 ); ///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
+define ( "SPH_RANK_PROXIMITY", 4 );
+define ( "SPH_RANK_MATCHANY", 5 );
+define ( "SPH_RANK_FIELDMASK", 6 );
+define ( "SPH_RANK_SPH04", 7 );
+define ( "SPH_RANK_EXPR", 8 );
+define ( "SPH_RANK_TOTAL", 9 );
+
+/// known sort modes
+define ( "SPH_SORT_RELEVANCE", 0 );
+define ( "SPH_SORT_ATTR_DESC", 1 );
+define ( "SPH_SORT_ATTR_ASC", 2 );
+define ( "SPH_SORT_TIME_SEGMENTS", 3 );
+define ( "SPH_SORT_EXTENDED", 4 );
+define ( "SPH_SORT_EXPR", 5 );
+
+/// known filter types
+define ( "SPH_FILTER_VALUES", 0 );
+define ( "SPH_FILTER_RANGE", 1 );
+define ( "SPH_FILTER_FLOATRANGE", 2 );
+
+/// known attribute types
+define ( "SPH_ATTR_INTEGER", 1 );
+define ( "SPH_ATTR_TIMESTAMP", 2 );
+define ( "SPH_ATTR_ORDINAL", 3 );
+define ( "SPH_ATTR_BOOL", 4 );
+define ( "SPH_ATTR_FLOAT", 5 );
+define ( "SPH_ATTR_BIGINT", 6 );
+define ( "SPH_ATTR_STRING", 7 );
+define ( "SPH_ATTR_MULTI", 0x40000001 );
+define ( "SPH_ATTR_MULTI64", 0x40000002 );
+
+/// known grouping functions
+define ( "SPH_GROUPBY_DAY", 0 );
+define ( "SPH_GROUPBY_WEEK", 1 );
+define ( "SPH_GROUPBY_MONTH", 2 );
+define ( "SPH_GROUPBY_YEAR", 3 );
+define ( "SPH_GROUPBY_ATTR", 4 );
+define ( "SPH_GROUPBY_ATTRPAIR", 5 );
+
+// important properties of PHP's integers:
+// - always signed (one bit short of PHP_INT_SIZE)
+// - conversion from string to int is saturated
+// - float is double
+// - div converts arguments to floats
+// - mod converts arguments to ints
+
+// the packing code below works as follows:
+// - when we got an int, just pack it
+// if performance is a problem, this is the branch users should aim for
+//
+// - otherwise, we got a number in string form
+// this might be due to different reasons, but we assume that this is
+// because it didn't fit into PHP int
+//
+// - factor the string into high and low ints for packing
+// - if we have bcmath, then it is used
+// - if we don't, we have to do it manually (this is the fun part)
+//
+// - x64 branch does factoring using ints
+// - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
+//
+// unpacking routines are pretty much the same.
+// - return ints if we can
+// - otherwise format number into a string
+
+/// pack 64-bit signed
+function sphPackI64 ( $v )
+{
+ assert ( is_numeric($v) );
+
+ // x64
+ if ( PHP_INT_SIZE>=8 )
+ {
+ $v = (int)$v;
+ return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
+ }
+
+ // x32, int
+ if ( is_int($v) )
+ return pack ( "NN", $v < 0 ? -1 : 0, $v );
+
+ // x32, bcmath
+ if ( function_exists("bcmul") )
+ {
+ if ( bccomp ( $v, 0 ) == -1 )
+ $v = bcadd ( "18446744073709551616", $v );
+ $h = bcdiv ( $v, "4294967296", 0 );
+ $l = bcmod ( $v, "4294967296" );
+ return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
+ }
+
+ // x32, no-bcmath
+ $p = max(0, strlen($v) - 13);
+ $lo = abs((float)substr($v, $p));
+ $hi = abs((float)substr($v, 0, $p));
+
+ $m = $lo + $hi*1316134912.0; // (10 ^ 13) % (1 << 32) = 1316134912
+ $q = floor($m/4294967296.0);
+ $l = $m - ($q*4294967296.0);
+ $h = $hi*2328.0 + $q; // (10 ^ 13) / (1 << 32) = 2328
+
+ if ( $v<0 )
+ {
+ if ( $l==0 )
+ $h = 4294967296.0 - $h;
+ else
+ {
+ $h = 4294967295.0 - $h;
+ $l = 4294967296.0 - $l;
+ }
+ }
+ return pack ( "NN", $h, $l );
+}
+
+/// pack 64-bit unsigned
+function sphPackU64 ( $v )
+{
+ assert ( is_numeric($v) );
+
+ // x64
+ if ( PHP_INT_SIZE>=8 )
+ {
+ assert ( $v>=0 );
+
+ // x64, int
+ if ( is_int($v) )
+ return pack ( "NN", $v>>32, $v&0xFFFFFFFF );
+
+ // x64, bcmath
+ if ( function_exists("bcmul") )
+ {
+ $h = bcdiv ( $v, 4294967296, 0 );
+ $l = bcmod ( $v, 4294967296 );
+ return pack ( "NN", $h, $l );
+ }
+
+ // x64, no-bcmath
+ $p = max ( 0, strlen($v) - 13 );
+ $lo = (int)substr ( $v, $p );
+ $hi = (int)substr ( $v, 0, $p );
+
+ $m = $lo + $hi*1316134912;
+ $l = $m % 4294967296;
+ $h = $hi*2328 + (int)($m/4294967296);
+
+ return pack ( "NN", $h, $l );
+ }
+
+ // x32, int
+ if ( is_int($v) )
+ return pack ( "NN", 0, $v );
+
+ // x32, bcmath
+ if ( function_exists("bcmul") )
+ {
+ $h = bcdiv ( $v, "4294967296", 0 );
+ $l = bcmod ( $v, "4294967296" );
+ return pack ( "NN", (float)$h, (float)$l ); // conversion to float is intentional; int would lose 31st bit
+ }
+
+ // x32, no-bcmath
+ $p = max(0, strlen($v) - 13);
+ $lo = (float)substr($v, $p);
+ $hi = (float)substr($v, 0, $p);
+
+ $m = $lo + $hi*1316134912.0;
+ $q = floor($m / 4294967296.0);
+ $l = $m - ($q * 4294967296.0);
+ $h = $hi*2328.0 + $q;
+
+ return pack ( "NN", $h, $l );
+}
+
+// unpack 64-bit unsigned
+function sphUnpackU64 ( $v )
+{
+ list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
+
+ if ( PHP_INT_SIZE>=8 )
+ {
+ if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
+ if ( $lo<0 ) $lo += (1<<32);
+
+ // x64, int
+ if ( $hi<=2147483647 )
+ return ($hi<<32) + $lo;
+
+ // x64, bcmath
+ if ( function_exists("bcmul") )
+ return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
+
+ // x64, no-bcmath
+ $C = 100000;
+ $h = ((int)($hi / $C) << 32) + (int)($lo / $C);
+ $l = (($hi % $C) << 32) + ($lo % $C);
+ if ( $l>$C )
+ {
+ $h += (int)($l / $C);
+ $l = $l % $C;
+ }
+
+ if ( $h==0 )
+ return $l;
+ return sprintf ( "%d%05d", $h, $l );
+ }
+
+ // x32, int
+ if ( $hi==0 )
+ {
+ if ( $lo>0 )
+ return $lo;
+ return sprintf ( "%u", $lo );
+ }
+
+ $hi = sprintf ( "%u", $hi );
+ $lo = sprintf ( "%u", $lo );
+
+ // x32, bcmath
+ if ( function_exists("bcmul") )
+ return bcadd ( $lo, bcmul ( $hi, "4294967296" ) );
+
+ // x32, no-bcmath
+ $hi = (float)$hi;
+ $lo = (float)$lo;
+
+ $q = floor($hi/10000000.0);
+ $r = $hi - $q*10000000.0;
+ $m = $lo + $r*4967296.0;
+ $mq = floor($m/10000000.0);
+ $l = $m - $mq*10000000.0;
+ $h = $q*4294967296.0 + $r*429.0 + $mq;
+
+ $h = sprintf ( "%.0f", $h );
+ $l = sprintf ( "%07.0f", $l );
+ if ( $h=="0" )
+ return sprintf( "%.0f", (float)$l );
+ return $h . $l;
+}
+
+// unpack 64-bit signed
+function sphUnpackI64 ( $v )
+{
+ list ( $hi, $lo ) = array_values ( unpack ( "N*N*", $v ) );
+
+ // x64
+ if ( PHP_INT_SIZE>=8 )
+ {
+ if ( $hi<0 ) $hi += (1<<32); // because php 5.2.2 to 5.2.5 is totally fucked up again
+ if ( $lo<0 ) $lo += (1<<32);
+
+ return ($hi<<32) + $lo;
+ }
+
+ // x32, int
+ if ( $hi==0 )
+ {
+ if ( $lo>0 )
+ return $lo;
+ return sprintf ( "%u", $lo );
+ }
+ // x32, int
+ elseif ( $hi==-1 )
+ {
+ if ( $lo<0 )
+ return $lo;
+ return sprintf ( "%.0f", $lo - 4294967296.0 );
+ }
+
+ $neg = "";
+ $c = 0;
+ if ( $hi<0 )
+ {
+ $hi = ~$hi;
+ $lo = ~$lo;
+ $c = 1;
+ $neg = "-";
+ }
+
+ $hi = sprintf ( "%u", $hi );
+ $lo = sprintf ( "%u", $lo );
+
+ // x32, bcmath
+ if ( function_exists("bcmul") )
+ return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi, "4294967296" ) ), $c );
+
+ // x32, no-bcmath
+ $hi = (float)$hi;
+ $lo = (float)$lo;
+
+ $q = floor($hi/10000000.0);
+ $r = $hi - $q*10000000.0;
+ $m = $lo + $r*4967296.0;
+ $mq = floor($m/10000000.0);
+ $l = $m - $mq*10000000.0 + $c;
+ $h = $q*4294967296.0 + $r*429.0 + $mq;
+ if ( $l==10000000 )
+ {
+ $l = 0;
+ $h += 1;
+ }
+
+ $h = sprintf ( "%.0f", $h );
+ $l = sprintf ( "%07.0f", $l );
+ if ( $h=="0" )
+ return $neg . sprintf( "%.0f", (float)$l );
+ return $neg . $h . $l;
+}
+
+
+function sphFixUint ( $value )
+{
+ if ( PHP_INT_SIZE>=8 )
+ {
+ // x64 route, workaround broken unpack() in 5.2.2+
+ if ( $value<0 ) $value += (1<<32);
+ return $value;
+ }
+ else
+ {
+ // x32 route, workaround php signed/unsigned braindamage
+ return sprintf ( "%u", $value );
+ }
+}
+
+
+/// sphinx searchd client class
+class SphinxClient
+{
+ var $_host; ///< searchd host (default is "localhost")
+ var $_port; ///< searchd port (default is 9312)
+ var $_offset; ///< how many records to seek from result-set start (default is 0)
+ var $_limit; ///< how many records to return from result-set starting at offset (default is 20)
+ var $_mode; ///< query matching mode (default is SPH_MATCH_ALL)
+ var $_weights; ///< per-field weights (default is 1 for all fields)
+ var $_sort; ///< match sorting mode (default is SPH_SORT_RELEVANCE)
+ var $_sortby; ///< attribute to sort by (defualt is "")
+ var $_min_id; ///< min ID to match (default is 0, which means no limit)
+ var $_max_id; ///< max ID to match (default is 0, which means no limit)
+ var $_filters; ///< search filters
+ var $_groupby; ///< group-by attribute name
+ var $_groupfunc; ///< group-by function (to pre-process group-by attribute value with)
+ var $_groupsort; ///< group-by sorting clause (to sort groups in result set with)
+ var $_groupdistinct;///< group-by count-distinct attribute
+ var $_maxmatches; ///< max matches to retrieve
+ var $_cutoff; ///< cutoff to stop searching at (default is 0)
+ var $_retrycount; ///< distributed retries count
+ var $_retrydelay; ///< distributed retries delay
+ var $_anchor; ///< geographical anchor point
+ var $_indexweights; ///< per-index weights
+ var $_ranker; ///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
+ var $_rankexpr; ///< ranking mode expression (for SPH_RANK_EXPR)
+ var $_maxquerytime; ///< max query time, milliseconds (default is 0, do not limit)
+ var $_fieldweights; ///< per-field-name weights
+ var $_overrides; ///< per-query attribute values overrides
+ var $_select; ///< select-list (attributes or expressions, with optional aliases)
+
+ var $_error; ///< last error message
+ var $_warning; ///< last warning message
+ var $_connerror; ///< connection error vs remote error flag
+
+ var $_reqs; ///< requests array for multi-query
+ var $_mbenc; ///< stored mbstring encoding
+ var $_arrayresult; ///< whether $result["matches"] should be a hash or an array
+ var $_timeout; ///< connect timeout
+
+ /////////////////////////////////////////////////////////////////////////////
+ // common stuff
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// create a new client object and fill defaults
+ function SphinxClient ()
+ {
+ // per-client-object settings
+ $this->_host = "localhost";
+ $this->_port = 9312;
+ $this->_path = false;
+ $this->_socket = false;
+
+ // per-query settings
+ $this->_offset = 0;
+ $this->_limit = 20;
+ $this->_mode = SPH_MATCH_ALL;
+ $this->_weights = array ();
+ $this->_sort = SPH_SORT_RELEVANCE;
+ $this->_sortby = "";
+ $this->_min_id = 0;
+ $this->_max_id = 0;
+ $this->_filters = array ();
+ $this->_groupby = "";
+ $this->_groupfunc = SPH_GROUPBY_DAY;
+ $this->_groupsort = "@group desc";
+ $this->_groupdistinct= "";
+ $this->_maxmatches = 1000;
+ $this->_cutoff = 0;
+ $this->_retrycount = 0;
+ $this->_retrydelay = 0;
+ $this->_anchor = array ();
+ $this->_indexweights= array ();
+ $this->_ranker = SPH_RANK_PROXIMITY_BM25;
+ $this->_rankexpr = "";
+ $this->_maxquerytime= 0;
+ $this->_fieldweights= array();
+ $this->_overrides = array();
+ $this->_select = "*";
+
+ $this->_error = ""; // per-reply fields (for single-query case)
+ $this->_warning = "";
+ $this->_connerror = false;
+
+ $this->_reqs = array (); // requests storage (for multi-query case)
+ $this->_mbenc = "";
+ $this->_arrayresult = false;
+ $this->_timeout = 0;
+ }
+
+ function __destruct()
+ {
+ if ( $this->_socket !== false )
+ fclose ( $this->_socket );
+ }
+
+ /// get last error message (string)
+ function GetLastError ()
+ {
+ return $this->_error;
+ }
+
+ /// get last warning message (string)
+ function GetLastWarning ()
+ {
+ return $this->_warning;
+ }
+
+ /// get last error flag (to tell network connection errors from searchd errors or broken responses)
+ function IsConnectError()
+ {
+ return $this->_connerror;
+ }
+
+ /// set searchd host name (string) and port (integer)
+ function SetServer ( $host, $port = 0 )
+ {
+ assert ( is_string($host) );
+ if ( $host[0] == '/')
+ {
+ $this->_path = 'unix://' . $host;
+ return;
+ }
+ if ( substr ( $host, 0, 7 )=="unix://" )
+ {
+ $this->_path = $host;
+ return;
+ }
+
+ assert ( is_int($port) );
+ $this->_host = $host;
+ $this->_port = $port;
+ $this->_path = '';
+
+ }
+
+ /// set server connection timeout (0 to remove)
+ function SetConnectTimeout ( $timeout )
+ {
+ assert ( is_numeric($timeout) );
+ $this->_timeout = $timeout;
+ }
+
+
+ function _Send ( $handle, $data, $length )
+ {
+ if ( feof($handle) || fwrite ( $handle, $data, $length ) !== $length )
+ {
+ $this->_error = 'connection unexpectedly closed (timed out?)';
+ $this->_connerror = true;
+ return false;
+ }
+ return true;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// enter mbstring workaround mode
+ function _MBPush ()
+ {
+ $this->_mbenc = "";
+ if ( ini_get ( "mbstring.func_overload" ) & 2 )
+ {
+ $this->_mbenc = mb_internal_encoding();
+ mb_internal_encoding ( "latin1" );
+ }
+ }
+
+ /// leave mbstring workaround mode
+ function _MBPop ()
+ {
+ if ( $this->_mbenc )
+ mb_internal_encoding ( $this->_mbenc );
+ }
+
+ /// connect to searchd server
+ function _Connect ()
+ {
+ if ( $this->_socket!==false )
+ {
+ // we are in persistent connection mode, so we have a socket
+ // however, need to check whether it's still alive
+ if ( !@feof ( $this->_socket ) )
+ return $this->_socket;
+
+ // force reopen
+ $this->_socket = false;
+ }
+
+ $errno = 0;
+ $errstr = "";
+ $this->_connerror = false;
+
+ if ( $this->_path )
+ {
+ $host = $this->_path;
+ $port = 0;
+ }
+ else
+ {
+ $host = $this->_host;
+ $port = $this->_port;
+ }
+
+ if ( $this->_timeout<=0 )
+ $fp = @fsockopen ( $host, $port, $errno, $errstr );
+ else
+ $fp = @fsockopen ( $host, $port, $errno, $errstr, $this->_timeout );
+
+ if ( !$fp )
+ {
+ if ( $this->_path )
+ $location = $this->_path;
+ else
+ $location = "{$this->_host}:{$this->_port}";
+
+ $errstr = trim ( $errstr );
+ $this->_error = "connection to $location failed (errno=$errno, msg=$errstr)";
+ $this->_connerror = true;
+ return false;
+ }
+
+ // send my version
+ // this is a subtle part. we must do it before (!) reading back from searchd.
+ // because otherwise under some conditions (reported on FreeBSD for instance)
+ // TCP stack could throttle write-write-read pattern because of Nagle.
+ if ( !$this->_Send ( $fp, pack ( "N", 1 ), 4 ) )
+ {
+ fclose ( $fp );
+ $this->_error = "failed to send client protocol version";
+ return false;
+ }
+
+ // check version
+ list(,$v) = unpack ( "N*", fread ( $fp, 4 ) );
+ $v = (int)$v;
+ if ( $v<1 )
+ {
+ fclose ( $fp );
+ $this->_error = "expected searchd protocol version 1+, got version '$v'";
+ return false;
+ }
+
+ return $fp;
+ }
+
+ /// get and check response packet from searchd server
+ function _GetResponse ( $fp, $client_ver )
+ {
+ $response = "";
+ $len = 0;
+
+ $header = fread ( $fp, 8 );
+ if ( strlen($header)==8 )
+ {
+ list ( $status, $ver, $len ) = array_values ( unpack ( "n2a/Nb", $header ) );
+ $left = $len;
+ while ( $left>0 && !feof($fp) )
+ {
+ $chunk = fread ( $fp, min ( 8192, $left ) );
+ if ( $chunk )
+ {
+ $response .= $chunk;
+ $left -= strlen($chunk);
+ }
+ }
+ }
+ if ( $this->_socket === false )
+ fclose ( $fp );
+
+ // check response
+ $read = strlen ( $response );
+ if ( !$response || $read!=$len )
+ {
+ $this->_error = $len
+ ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
+ : "received zero-sized searchd response";
+ return false;
+ }
+
+ // check status
+ if ( $status==SEARCHD_WARNING )
+ {
+ list(,$wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
+ $this->_warning = substr ( $response, 4, $wlen );
+ return substr ( $response, 4+$wlen );
+ }
+ if ( $status==SEARCHD_ERROR )
+ {
+ $this->_error = "searchd error: " . substr ( $response, 4 );
+ return false;
+ }
+ if ( $status==SEARCHD_RETRY )
+ {
+ $this->_error = "temporary searchd error: " . substr ( $response, 4 );
+ return false;
+ }
+ if ( $status!=SEARCHD_OK )
+ {
+ $this->_error = "unknown status code '$status'";
+ return false;
+ }
+
+ // check version
+ if ( $ver<$client_ver )
+ {
+ $this->_warning = sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
+ $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff );
+ }
+
+ return $response;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // searching
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// set offset and count into result set,
+ /// and optionally set max-matches and cutoff limits
+ function SetLimits ( $offset, $limit, $max=0, $cutoff=0 )
+ {
+ assert ( is_int($offset) );
+ assert ( is_int($limit) );
+ assert ( $offset>=0 );
+ assert ( $limit>0 );
+ assert ( $max>=0 );
+ $this->_offset = $offset;
+ $this->_limit = $limit;
+ if ( $max>0 )
+ $this->_maxmatches = $max;
+ if ( $cutoff>0 )
+ $this->_cutoff = $cutoff;
+ }
+
+ /// set maximum query time, in milliseconds, per-index
+ /// integer, 0 means "do not limit"
+ function SetMaxQueryTime ( $max )
+ {
+ assert ( is_int($max) );
+ assert ( $max>=0 );
+ $this->_maxquerytime = $max;
+ }
+
+ /// set matching mode
+ function SetMatchMode ( $mode )
+ {
+ assert ( $mode==SPH_MATCH_ALL
+ || $mode==SPH_MATCH_ANY
+ || $mode==SPH_MATCH_PHRASE
+ || $mode==SPH_MATCH_BOOLEAN
+ || $mode==SPH_MATCH_EXTENDED
+ || $mode==SPH_MATCH_FULLSCAN
+ || $mode==SPH_MATCH_EXTENDED2 );
+ $this->_mode = $mode;
+ }
+
+ /// set ranking mode
+ function SetRankingMode ( $ranker, $rankexpr="" )
+ {
+ assert ( $ranker>=0 && $ranker<SPH_RANK_TOTAL );
+ assert ( is_string($rankexpr) );
+ $this->_ranker = $ranker;
+ $this->_rankexpr = $rankexpr;
+ }
+
+ /// set matches sorting mode
+ function SetSortMode ( $mode, $sortby="" )
+ {
+ assert (
+ $mode==SPH_SORT_RELEVANCE ||
+ $mode==SPH_SORT_ATTR_DESC ||
+ $mode==SPH_SORT_ATTR_ASC ||
+ $mode==SPH_SORT_TIME_SEGMENTS ||
+ $mode==SPH_SORT_EXTENDED ||
+ $mode==SPH_SORT_EXPR );
+ assert ( is_string($sortby) );
+ assert ( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0 );
+
+ $this->_sort = $mode;
+ $this->_sortby = $sortby;
+ }
+
+ /// bind per-field weights by order
+ /// DEPRECATED; use SetFieldWeights() instead
+ function SetWeights ( $weights )
+ {
+ assert ( is_array($weights) );
+ foreach ( $weights as $weight )
+ assert ( is_int($weight) );
+
+ $this->_weights = $weights;
+ }
+
+ /// bind per-field weights by name
+ function SetFieldWeights ( $weights )
+ {
+ assert ( is_array($weights) );
+ foreach ( $weights as $name=>$weight )
+ {
+ assert ( is_string($name) );
+ assert ( is_int($weight) );
+ }
+ $this->_fieldweights = $weights;
+ }
+
+ /// bind per-index weights by name
+ function SetIndexWeights ( $weights )
+ {
+ assert ( is_array($weights) );
+ foreach ( $weights as $index=>$weight )
+ {
+ assert ( is_string($index) );
+ assert ( is_int($weight) );
+ }
+ $this->_indexweights = $weights;
+ }
+
+ /// set IDs range to match
+ /// only match records if document ID is beetwen $min and $max (inclusive)
+ function SetIDRange ( $min, $max )
+ {
+ assert ( is_numeric($min) );
+ assert ( is_numeric($max) );
+ assert ( $min<=$max );
+ $this->_min_id = $min;
+ $this->_max_id = $max;
+ }
+
+ /// set values set filter
+ /// only match records where $attribute value is in given set
+ function SetFilter ( $attribute, $values, $exclude=false )
+ {
+ assert ( is_string($attribute) );
+ assert ( is_array($values) );
+ assert ( count($values) );
+
+ if ( is_array($values) && count($values) )
+ {
+ foreach ( $values as $value )
+ assert ( is_numeric($value) );
+
+ $this->_filters[] = array ( "type"=>SPH_FILTER_VALUES, "attr"=>$attribute, "exclude"=>$exclude, "values"=>$values );
+ }
+ }
+
+ /// set range filter
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
+ function SetFilterRange ( $attribute, $min, $max, $exclude=false )
+ {
+ assert ( is_string($attribute) );
+ assert ( is_numeric($min) );
+ assert ( is_numeric($max) );
+ assert ( $min<=$max );
+
+ $this->_filters[] = array ( "type"=>SPH_FILTER_RANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
+ }
+
+ /// set float range filter
+ /// only match records if $attribute value is beetwen $min and $max (inclusive)
+ function SetFilterFloatRange ( $attribute, $min, $max, $exclude=false )
+ {
+ assert ( is_string($attribute) );
+ assert ( is_float($min) );
+ assert ( is_float($max) );
+ assert ( $min<=$max );
+
+ $this->_filters[] = array ( "type"=>SPH_FILTER_FLOATRANGE, "attr"=>$attribute, "exclude"=>$exclude, "min"=>$min, "max"=>$max );
+ }
+
+ /// setup anchor point for geosphere distance calculations
+ /// required to use @geodist in filters and sorting
+ /// latitude and longitude must be in radians
+ function SetGeoAnchor ( $attrlat, $attrlong, $lat, $long )
+ {
+ assert ( is_string($attrlat) );
+ assert ( is_string($attrlong) );
+ assert ( is_float($lat) );
+ assert ( is_float($long) );
+
+ $this->_anchor = array ( "attrlat"=>$attrlat, "attrlong"=>$attrlong, "lat"=>$lat, "long"=>$long );
+ }
+
+ /// set grouping attribute and function
+ function SetGroupBy ( $attribute, $func, $groupsort="@group desc" )
+ {
+ assert ( is_string($attribute) );
+ assert ( is_string($groupsort) );
+ assert ( $func==SPH_GROUPBY_DAY
+ || $func==SPH_GROUPBY_WEEK
+ || $func==SPH_GROUPBY_MONTH
+ || $func==SPH_GROUPBY_YEAR
+ || $func==SPH_GROUPBY_ATTR
+ || $func==SPH_GROUPBY_ATTRPAIR );
+
+ $this->_groupby = $attribute;
+ $this->_groupfunc = $func;
+ $this->_groupsort = $groupsort;
+ }
+
+ /// set count-distinct attribute for group-by queries
+ function SetGroupDistinct ( $attribute )
+ {
+ assert ( is_string($attribute) );
+ $this->_groupdistinct = $attribute;
+ }
+
+ /// set distributed retries count and delay
+ function SetRetries ( $count, $delay=0 )
+ {
+ assert ( is_int($count) && $count>=0 );
+ assert ( is_int($delay) && $delay>=0 );
+ $this->_retrycount = $count;
+ $this->_retrydelay = $delay;
+ }
+
+ /// set result set format (hash or array; hash by default)
+ /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
+ function SetArrayResult ( $arrayresult )
+ {
+ assert ( is_bool($arrayresult) );
+ $this->_arrayresult = $arrayresult;
+ }
+
+ /// set attribute values override
+ /// there can be only one override per attribute
+ /// $values must be a hash that maps document IDs to attribute values
+ function SetOverride ( $attrname, $attrtype, $values )
+ {
+ assert ( is_string ( $attrname ) );
+ assert ( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT ) ) );
+ assert ( is_array ( $values ) );
+
+ $this->_overrides[$attrname] = array ( "attr"=>$attrname, "type"=>$attrtype, "values"=>$values );
+ }
+
+ /// set select-list (attributes or expressions), SQL-like syntax
+ function SetSelect ( $select )
+ {
+ assert ( is_string ( $select ) );
+ $this->_select = $select;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ /// clear all filters (for multi-queries)
+ function ResetFilters ()
+ {
+ $this->_filters = array();
+ $this->_anchor = array();
+ }
+
+ /// clear groupby settings (for multi-queries)
+ function ResetGroupBy ()
+ {
+ $this->_groupby = "";
+ $this->_groupfunc = SPH_GROUPBY_DAY;
+ $this->_groupsort = "@group desc";
+ $this->_groupdistinct= "";
+ }
+
+ /// clear all attribute value overrides (for multi-queries)
+ function ResetOverrides ()
+ {
+ $this->_overrides = array ();
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ /// connect to searchd server, run given search query through given indexes,
+ /// and return the search results
+ function Query ( $query, $index="*", $comment="" )
+ {
+ assert ( empty($this->_reqs) );
+
+ $this->AddQuery ( $query, $index, $comment );
+ $results = $this->RunQueries ();
+ $this->_reqs = array (); // just in case it failed too early
+
+ if ( !is_array($results) )
+ return false; // probably network error; error message should be already filled
+
+ $this->_error = $results[0]["error"];
+ $this->_warning = $results[0]["warning"];
+ if ( $results[0]["status"]==SEARCHD_ERROR )
+ return false;
+ else
+ return $results[0];
+ }
+
+ /// helper to pack floats in network byte order
+ function _PackFloat ( $f )
+ {
+ $t1 = pack ( "f", $f ); // machine order
+ list(,$t2) = unpack ( "L*", $t1 ); // int in machine order
+ return pack ( "N", $t2 );
+ }
+
+ /// add query to multi-query batch
+ /// returns index into results array from RunQueries() call
+ function AddQuery ( $query, $index="*", $comment="" )
+ {
+ // mbstring workaround
+ $this->_MBPush ();
+
+ // build request
+ $req = pack ( "NNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker );
+ if ( $this->_ranker==SPH_RANK_EXPR )
+ $req .= pack ( "N", strlen($this->_rankexpr) ) . $this->_rankexpr;
+ $req .= pack ( "N", $this->_sort ); // (deprecated) sort mode
+ $req .= pack ( "N", strlen($this->_sortby) ) . $this->_sortby;
+ $req .= pack ( "N", strlen($query) ) . $query; // query itself
+ $req .= pack ( "N", count($this->_weights) ); // weights
+ foreach ( $this->_weights as $weight )
+ $req .= pack ( "N", (int)$weight );
+ $req .= pack ( "N", strlen($index) ) . $index; // indexes
+ $req .= pack ( "N", 1 ); // id64 range marker
+ $req .= sphPackU64 ( $this->_min_id ) . sphPackU64 ( $this->_max_id ); // id64 range
+
+ // filters
+ $req .= pack ( "N", count($this->_filters) );
+ foreach ( $this->_filters as $filter )
+ {
+ $req .= pack ( "N", strlen($filter["attr"]) ) . $filter["attr"];
+ $req .= pack ( "N", $filter["type"] );
+ switch ( $filter["type"] )
+ {
+ case SPH_FILTER_VALUES:
+ $req .= pack ( "N", count($filter["values"]) );
+ foreach ( $filter["values"] as $value )
+ $req .= sphPackI64 ( $value );
+ break;
+
+ case SPH_FILTER_RANGE:
+ $req .= sphPackI64 ( $filter["min"] ) . sphPackI64 ( $filter["max"] );
+ break;
+
+ case SPH_FILTER_FLOATRANGE:
+ $req .= $this->_PackFloat ( $filter["min"] ) . $this->_PackFloat ( $filter["max"] );
+ break;
+
+ default:
+ assert ( 0 && "internal error: unhandled filter type" );
+ }
+ $req .= pack ( "N", $filter["exclude"] );
+ }
+
+ // group-by clause, max-matches count, group-sort clause, cutoff count
+ $req .= pack ( "NN", $this->_groupfunc, strlen($this->_groupby) ) . $this->_groupby;
+ $req .= pack ( "N", $this->_maxmatches );
+ $req .= pack ( "N", strlen($this->_groupsort) ) . $this->_groupsort;
+ $req .= pack ( "NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
+ $req .= pack ( "N", strlen($this->_groupdistinct) ) . $this->_groupdistinct;
+
+ // anchor point
+ if ( empty($this->_anchor) )
+ {
+ $req .= pack ( "N", 0 );
+ } else
+ {
+ $a =& $this->_anchor;
+ $req .= pack ( "N", 1 );
+ $req .= pack ( "N", strlen($a["attrlat"]) ) . $a["attrlat"];
+ $req .= pack ( "N", strlen($a["attrlong"]) ) . $a["attrlong"];
+ $req .= $this->_PackFloat ( $a["lat"] ) . $this->_PackFloat ( $a["long"] );
+ }
+
+ // per-index weights
+ $req .= pack ( "N", count($this->_indexweights) );
+ foreach ( $this->_indexweights as $idx=>$weight )
+ $req .= pack ( "N", strlen($idx) ) . $idx . pack ( "N", $weight );
+
+ // max query time
+ $req .= pack ( "N", $this->_maxquerytime );
+
+ // per-field weights
+ $req .= pack ( "N", count($this->_fieldweights) );
+ foreach ( $this->_fieldweights as $field=>$weight )
+ $req .= pack ( "N", strlen($field) ) . $field . pack ( "N", $weight );
+
+ // comment
+ $req .= pack ( "N", strlen($comment) ) . $comment;
+
+ // attribute overrides
+ $req .= pack ( "N", count($this->_overrides) );
+ foreach ( $this->_overrides as $key => $entry )
+ {
+ $req .= pack ( "N", strlen($entry["attr"]) ) . $entry["attr"];
+ $req .= pack ( "NN", $entry["type"], count($entry["values"]) );
+ foreach ( $entry["values"] as $id=>$val )
+ {
+ assert ( is_numeric($id) );
+ assert ( is_numeric($val) );
+
+ $req .= sphPackU64 ( $id );
+ switch ( $entry["type"] )
+ {
+ case SPH_ATTR_FLOAT: $req .= $this->_PackFloat ( $val ); break;
+ case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val ); break;
+ default: $req .= pack ( "N", $val ); break;
+ }
+ }
+ }
+
+ // select-list
+ $req .= pack ( "N", strlen($this->_select) ) . $this->_select;
+
+ // mbstring workaround
+ $this->_MBPop ();
+
+ // store request to requests array
+ $this->_reqs[] = $req;
+ return count($this->_reqs)-1;
+ }
+
+ /// connect to searchd, run queries batch, and return an array of result sets
+ function RunQueries ()
+ {
+ if ( empty($this->_reqs) )
+ {
+ $this->_error = "no queries defined, issue AddQuery() first";
+ return false;
+ }
+
+ // mbstring workaround
+ $this->_MBPush ();
+
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop ();
+ return false;
+ }
+
+ // send query, get response
+ $nreqs = count($this->_reqs);
+ $req = join ( "", $this->_reqs );
+ $len = 8+strlen($req);
+ $req = pack ( "nnNNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, 0, $nreqs ) . $req; // add header
+
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_SEARCH ) ) )
+ {
+ $this->_MBPop ();
+ return false;
+ }
+
+ // query sent ok; we can reset reqs now
+ $this->_reqs = array ();
+
+ // parse and return response
+ return $this->_ParseSearchResponse ( $response, $nreqs );
+ }
+
+ /// parse and return search query (or queries) response
+ function _ParseSearchResponse ( $response, $nreqs )
+ {
+ $p = 0; // current position
+ $max = strlen($response); // max position for checks, to protect against broken responses
+
+ $results = array ();
+ for ( $ires=0; $ires<$nreqs && $p<$max; $ires++ )
+ {
+ $results[] = array();
+ $result =& $results[$ires];
+
+ $result["error"] = "";
+ $result["warning"] = "";
+
+ // extract status
+ list(,$status) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $result["status"] = $status;
+ if ( $status!=SEARCHD_OK )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $message = substr ( $response, $p, $len ); $p += $len;
+
+ if ( $status==SEARCHD_WARNING )
+ {
+ $result["warning"] = $message;
+ } else
+ {
+ $result["error"] = $message;
+ continue;
+ }
+ }
+
+ // read schema
+ $fields = array ();
+ $attrs = array ();
+
+ list(,$nfields) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ while ( $nfields-->0 && $p<$max )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $fields[] = substr ( $response, $p, $len ); $p += $len;
+ }
+ $result["fields"] = $fields;
+
+ list(,$nattrs) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ while ( $nattrs-->0 && $p<$max )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $attr = substr ( $response, $p, $len ); $p += $len;
+ list(,$type) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $attrs[$attr] = $type;
+ }
+ $result["attrs"] = $attrs;
+
+ // read match count
+ list(,$count) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ list(,$id64) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+
+ // read matches
+ $idx = -1;
+ while ( $count-->0 && $p<$max )
+ {
+ // index into result array
+ $idx++;
+
+ // parse document id and weight
+ if ( $id64 )
+ {
+ $doc = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
+ list(,$weight) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ }
+ else
+ {
+ list ( $doc, $weight ) = array_values ( unpack ( "N*N*",
+ substr ( $response, $p, 8 ) ) );
+ $p += 8;
+ $doc = sphFixUint($doc);
+ }
+ $weight = sprintf ( "%u", $weight );
+
+ // create match entry
+ if ( $this->_arrayresult )
+ $result["matches"][$idx] = array ( "id"=>$doc, "weight"=>$weight );
+ else
+ $result["matches"][$doc]["weight"] = $weight;
+
+ // parse and create attributes
+ $attrvals = array ();
+ foreach ( $attrs as $attr=>$type )
+ {
+ // handle 64bit ints
+ if ( $type==SPH_ATTR_BIGINT )
+ {
+ $attrvals[$attr] = sphUnpackI64 ( substr ( $response, $p, 8 ) ); $p += 8;
+ continue;
+ }
+
+ // handle floats
+ if ( $type==SPH_ATTR_FLOAT )
+ {
+ list(,$uval) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ list(,$fval) = unpack ( "f*", pack ( "L", $uval ) );
+ $attrvals[$attr] = $fval;
+ continue;
+ }
+
+ // handle everything else as unsigned ints
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ if ( $type==SPH_ATTR_MULTI )
+ {
+ $attrvals[$attr] = array ();
+ $nvalues = $val;
+ while ( $nvalues-->0 && $p<$max )
+ {
+ list(,$val) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $attrvals[$attr][] = sphFixUint($val);
+ }
+ } else if ( $type==SPH_ATTR_MULTI64 )
+ {
+ $attrvals[$attr] = array ();
+ $nvalues = $val;
+ while ( $nvalues>0 && $p<$max )
+ {
+ $attrvals[$attr][] = sphUnpackU64 ( substr ( $response, $p, 8 ) ); $p += 8;
+ $nvalues -= 2;
+ }
+ } else if ( $type==SPH_ATTR_STRING )
+ {
+ $attrvals[$attr] = substr ( $response, $p, $val );
+ $p += $val;
+ } else
+ {
+ $attrvals[$attr] = sphFixUint($val);
+ }
+ }
+
+ if ( $this->_arrayresult )
+ $result["matches"][$idx]["attrs"] = $attrvals;
+ else
+ $result["matches"][$doc]["attrs"] = $attrvals;
+ }
+
+ list ( $total, $total_found, $msecs, $words ) =
+ array_values ( unpack ( "N*N*N*N*", substr ( $response, $p, 16 ) ) );
+ $result["total"] = sprintf ( "%u", $total );
+ $result["total_found"] = sprintf ( "%u", $total_found );
+ $result["time"] = sprintf ( "%.3f", $msecs/1000 );
+ $p += 16;
+
+ while ( $words-->0 && $p<$max )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $word = substr ( $response, $p, $len ); $p += $len;
+ list ( $docs, $hits ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
+ $result["words"][$word] = array (
+ "docs"=>sprintf ( "%u", $docs ),
+ "hits"=>sprintf ( "%u", $hits ) );
+ }
+ }
+
+ $this->_MBPop ();
+ return $results;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // excerpts generation
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// connect to searchd server, and generate exceprts (snippets)
+ /// of given documents for given query. returns false on failure,
+ /// an array of snippets on success
+ function BuildExcerpts ( $docs, $index, $words, $opts=array() )
+ {
+ assert ( is_array($docs) );
+ assert ( is_string($index) );
+ assert ( is_string($words) );
+ assert ( is_array($opts) );
+
+ $this->_MBPush ();
+
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop();
+ return false;
+ }
+
+ /////////////////
+ // fixup options
+ /////////////////
+
+ if ( !isset($opts["before_match"]) ) $opts["before_match"] = "<b>";
+ if ( !isset($opts["after_match"]) ) $opts["after_match"] = "</b>";
+ if ( !isset($opts["chunk_separator"]) ) $opts["chunk_separator"] = " ... ";
+ if ( !isset($opts["limit"]) ) $opts["limit"] = 256;
+ if ( !isset($opts["limit_passages"]) ) $opts["limit_passages"] = 0;
+ if ( !isset($opts["limit_words"]) ) $opts["limit_words"] = 0;
+ if ( !isset($opts["around"]) ) $opts["around"] = 5;
+ if ( !isset($opts["exact_phrase"]) ) $opts["exact_phrase"] = false;
+ if ( !isset($opts["single_passage"]) ) $opts["single_passage"] = false;
+ if ( !isset($opts["use_boundaries"]) ) $opts["use_boundaries"] = false;
+ if ( !isset($opts["weight_order"]) ) $opts["weight_order"] = false;
+ if ( !isset($opts["query_mode"]) ) $opts["query_mode"] = false;
+ if ( !isset($opts["force_all_words"]) ) $opts["force_all_words"] = false;
+ if ( !isset($opts["start_passage_id"]) ) $opts["start_passage_id"] = 1;
+ if ( !isset($opts["load_files"]) ) $opts["load_files"] = false;
+ if ( !isset($opts["html_strip_mode"]) ) $opts["html_strip_mode"] = "index";
+ if ( !isset($opts["allow_empty"]) ) $opts["allow_empty"] = false;
+ if ( !isset($opts["passage_boundary"]) ) $opts["passage_boundary"] = "none";
+ if ( !isset($opts["emit_zones"]) ) $opts["emit_zones"] = false;
+ if ( !isset($opts["load_files_scattered"]) ) $opts["load_files_scattered"] = false;
+
+
+ /////////////////
+ // build request
+ /////////////////
+
+ // v.1.2 req
+ $flags = 1; // remove spaces
+ if ( $opts["exact_phrase"] ) $flags |= 2;
+ if ( $opts["single_passage"] ) $flags |= 4;
+ if ( $opts["use_boundaries"] ) $flags |= 8;
+ if ( $opts["weight_order"] ) $flags |= 16;
+ if ( $opts["query_mode"] ) $flags |= 32;
+ if ( $opts["force_all_words"] ) $flags |= 64;
+ if ( $opts["load_files"] ) $flags |= 128;
+ if ( $opts["allow_empty"] ) $flags |= 256;
+ if ( $opts["emit_zones"] ) $flags |= 512;
+ if ( $opts["load_files_scattered"] ) $flags |= 1024;
+ $req = pack ( "NN", 0, $flags ); // mode=0, flags=$flags
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
+ $req .= pack ( "N", strlen($words) ) . $words; // req words
+
+ // options
+ $req .= pack ( "N", strlen($opts["before_match"]) ) . $opts["before_match"];
+ $req .= pack ( "N", strlen($opts["after_match"]) ) . $opts["after_match"];
+ $req .= pack ( "N", strlen($opts["chunk_separator"]) ) . $opts["chunk_separator"];
+ $req .= pack ( "NN", (int)$opts["limit"], (int)$opts["around"] );
+ $req .= pack ( "NNN", (int)$opts["limit_passages"], (int)$opts["limit_words"], (int)$opts["start_passage_id"] ); // v.1.2
+ $req .= pack ( "N", strlen($opts["html_strip_mode"]) ) . $opts["html_strip_mode"];
+ $req .= pack ( "N", strlen($opts["passage_boundary"]) ) . $opts["passage_boundary"];
+
+ // documents
+ $req .= pack ( "N", count($docs) );
+ foreach ( $docs as $doc )
+ {
+ assert ( is_string($doc) );
+ $req .= pack ( "N", strlen($doc) ) . $doc;
+ }
+
+ ////////////////////////////
+ // send query, get response
+ ////////////////////////////
+
+ $len = strlen($req);
+ $req = pack ( "nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ) . $req; // add header
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_EXCERPT ) ) )
+ {
+ $this->_MBPop ();
+ return false;
+ }
+
+ //////////////////
+ // parse response
+ //////////////////
+
+ $pos = 0;
+ $res = array ();
+ $rlen = strlen($response);
+ for ( $i=0; $i<count($docs); $i++ )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) );
+ $pos += 4;
+
+ if ( $pos+$len > $rlen )
+ {
+ $this->_error = "incomplete reply";
+ $this->_MBPop ();
+ return false;
+ }
+ $res[] = $len ? substr ( $response, $pos, $len ) : "";
+ $pos += $len;
+ }
+
+ $this->_MBPop ();
+ return $res;
+ }
+
+
+ /////////////////////////////////////////////////////////////////////////////
+ // keyword generation
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// connect to searchd server, and generate keyword list for a given query
+ /// returns false on failure,
+ /// an array of words on success
+ function BuildKeywords ( $query, $index, $hits )
+ {
+ assert ( is_string($query) );
+ assert ( is_string($index) );
+ assert ( is_bool($hits) );
+
+ $this->_MBPush ();
+
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop();
+ return false;
+ }
+
+ /////////////////
+ // build request
+ /////////////////
+
+ // v.1.0 req
+ $req = pack ( "N", strlen($query) ) . $query; // req query
+ $req .= pack ( "N", strlen($index) ) . $index; // req index
+ $req .= pack ( "N", (int)$hits );
+
+ ////////////////////////////
+ // send query, get response
+ ////////////////////////////
+
+ $len = strlen($req);
+ $req = pack ( "nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ) . $req; // add header
+ if ( !( $this->_Send ( $fp, $req, $len+8 ) ) ||
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_KEYWORDS ) ) )
+ {
+ $this->_MBPop ();
+ return false;
+ }
+
+ //////////////////
+ // parse response
+ //////////////////
+
+ $pos = 0;
+ $res = array ();
+ $rlen = strlen($response);
+ list(,$nwords) = unpack ( "N*", substr ( $response, $pos, 4 ) );
+ $pos += 4;
+ for ( $i=0; $i<$nwords; $i++ )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
+ $tokenized = $len ? substr ( $response, $pos, $len ) : "";
+ $pos += $len;
+
+ list(,$len) = unpack ( "N*", substr ( $response, $pos, 4 ) ); $pos += 4;
+ $normalized = $len ? substr ( $response, $pos, $len ) : "";
+ $pos += $len;
+
+ $res[] = array ( "tokenized"=>$tokenized, "normalized"=>$normalized );
+
+ if ( $hits )
+ {
+ list($ndocs,$nhits) = array_values ( unpack ( "N*N*", substr ( $response, $pos, 8 ) ) );
+ $pos += 8;
+ $res [$i]["docs"] = $ndocs;
+ $res [$i]["hits"] = $nhits;
+ }
+
+ if ( $pos > $rlen )
+ {
+ $this->_error = "incomplete reply";
+ $this->_MBPop ();
+ return false;
+ }
+ }
+
+ $this->_MBPop ();
+ return $res;
+ }
+
+ function EscapeString ( $string )
+ {
+ $from = array ( '\\', '(',')','|','-','!','@','~','"','&', '/', '^', '$', '=' );
+ $to = array ( '\\\\', '\(','\)','\|','\-','\!','\@','\~','\"', '\&', '\/', '\^', '\$', '\=' );
+
+ return str_replace ( $from, $to, $string );
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // attribute updates
+ /////////////////////////////////////////////////////////////////////////////
+
+ /// batch update given attributes in given rows in given indexes
+ /// returns amount of updated documents (0 or more) on success, or -1 on failure
+ function UpdateAttributes ( $index, $attrs, $values, $mva=false )
+ {
+ // verify everything
+ assert ( is_string($index) );
+ assert ( is_bool($mva) );
+
+ assert ( is_array($attrs) );
+ foreach ( $attrs as $attr )
+ assert ( is_string($attr) );
+
+ assert ( is_array($values) );
+ foreach ( $values as $id=>$entry )
+ {
+ assert ( is_numeric($id) );
+ assert ( is_array($entry) );
+ assert ( count($entry)==count($attrs) );
+ foreach ( $entry as $v )
+ {
+ if ( $mva )
+ {
+ assert ( is_array($v) );
+ foreach ( $v as $vv )
+ assert ( is_int($vv) );
+ } else
+ assert ( is_int($v) );
+ }
+ }
+
+ // build request
+ $this->_MBPush ();
+ $req = pack ( "N", strlen($index) ) . $index;
+
+ $req .= pack ( "N", count($attrs) );
+ foreach ( $attrs as $attr )
+ {
+ $req .= pack ( "N", strlen($attr) ) . $attr;
+ $req .= pack ( "N", $mva ? 1 : 0 );
+ }
+
+ $req .= pack ( "N", count($values) );
+ foreach ( $values as $id=>$entry )
+ {
+ $req .= sphPackU64 ( $id );
+ foreach ( $entry as $v )
+ {
+ $req .= pack ( "N", $mva ? count($v) : $v );
+ if ( $mva )
+ foreach ( $v as $vv )
+ $req .= pack ( "N", $vv );
+ }
+ }
+
+ // connect, send query, get response
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop ();
+ return -1;
+ }
+
+ $len = strlen($req);
+ $req = pack ( "nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ) . $req; // add header
+ if ( !$this->_Send ( $fp, $req, $len+8 ) )
+ {
+ $this->_MBPop ();
+ return -1;
+ }
+
+ if (!( $response = $this->_GetResponse ( $fp, VER_COMMAND_UPDATE ) ))
+ {
+ $this->_MBPop ();
+ return -1;
+ }
+
+ // parse response
+ list(,$updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
+ $this->_MBPop ();
+ return $updated;
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
+ // persistent connections
+ /////////////////////////////////////////////////////////////////////////////
+
+ function Open()
+ {
+ if ( $this->_socket !== false )
+ {
+ $this->_error = 'already connected';
+ return false;
+ }
+ if ( !$fp = $this->_Connect() )
+ return false;
+
+ // command, command version = 0, body length = 4, body = 1
+ $req = pack ( "nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1 );
+ if ( !$this->_Send ( $fp, $req, 12 ) )
+ return false;
+
+ $this->_socket = $fp;
+ return true;
+ }
+
+ function Close()
+ {
+ if ( $this->_socket === false )
+ {
+ $this->_error = 'not connected';
+ return false;
+ }
+
+ fclose ( $this->_socket );
+ $this->_socket = false;
+
+ return true;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ // status
+ //////////////////////////////////////////////////////////////////////////
+
+ function Status ()
+ {
+ $this->_MBPush ();
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop();
+ return false;
+ }
+
+ $req = pack ( "nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, 1 ); // len=4, body=1
+ if ( !( $this->_Send ( $fp, $req, 12 ) ) ||
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_STATUS ) ) )
+ {
+ $this->_MBPop ();
+ return false;
+ }
+
+ $res = substr ( $response, 4 ); // just ignore length, error handling, etc
+ $p = 0;
+ list ( $rows, $cols ) = array_values ( unpack ( "N*N*", substr ( $response, $p, 8 ) ) ); $p += 8;
+
+ $res = array();
+ for ( $i=0; $i<$rows; $i++ )
+ for ( $j=0; $j<$cols; $j++ )
+ {
+ list(,$len) = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
+ $res[$i][] = substr ( $response, $p, $len ); $p += $len;
+ }
+
+ $this->_MBPop ();
+ return $res;
+ }
+
+ //////////////////////////////////////////////////////////////////////////
+ // flush
+ //////////////////////////////////////////////////////////////////////////
+
+ function FlushAttributes ()
+ {
+ $this->_MBPush ();
+ if (!( $fp = $this->_Connect() ))
+ {
+ $this->_MBPop();
+ return -1;
+ }
+
+ $req = pack ( "nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); // len=0
+ if ( !( $this->_Send ( $fp, $req, 8 ) ) ||
+ !( $response = $this->_GetResponse ( $fp, VER_COMMAND_FLUSHATTRS ) ) )
+ {
+ $this->_MBPop ();
+ return -1;
+ }
+
+ $tag = -1;
+ if ( strlen($response)==4 )
+ list(,$tag) = unpack ( "N*", $response );
+ else
+ $this->_error = "unexpected response length";
+
+ $this->_MBPop ();
+ return $tag;
+ }
+}
+
+//
+// $Id: sphinxapi.php 3087 2012-01-30 23:07:35Z shodan $
+//
|