From 7c8d0191d2a7430cbd10cd187e215193404f4716 Mon Sep 17 00:00:00 2001 From: David Bailey Date: Mon, 6 Jan 2025 22:35:30 +0100 Subject: [PATCH] feat(search): :sparkles: add actual searching --- www/src/db_handler/db_interface.php | 24 +++- www/src/db_handler/mysql_handler.php | 169 ++++++++++++++++++++++----- 2 files changed, 160 insertions(+), 33 deletions(-) diff --git a/www/src/db_handler/db_interface.php b/www/src/db_handler/db_interface.php index f143b81..2ca64d1 100644 --- a/www/src/db_handler/db_interface.php +++ b/www/src/db_handler/db_interface.php @@ -83,13 +83,25 @@ interface PostdataInterface { // Returns an array of PostData information - // based on the tag search list + // based on various search parameters. // - // Tag searchlist is comprised of space-separated - // tags. Each tag can have a weighting prefix, - // and some special tags exist (such as limit:N, - // order:S). - public function search_posts($taglist); + // search_options can either be: + // - An Array + // - Or a String + // + // In case of it being an Array, it may include + // the keys: + // - "query" (which will be processed similar + // to how $search_options will be processed), + // - "text", which is searched for in text fields + // (title, brief, fulltext), + // - "tags", which is matched IN BINARY MODE against + // the post tags + // - "path", which is used as filter + // - "order_by": determines which column to order by. NULL + // will order by FULLTEXT match scores + // - "limit" and "offset", self-explanatory + public function search_posts($search_options); } ?> \ No newline at end of file diff --git a/www/src/db_handler/mysql_handler.php b/www/src/db_handler/mysql_handler.php index 90f9adf..bac80c9 100644 --- a/www/src/db_handler/mysql_handler.php +++ b/www/src/db_handler/mysql_handler.php @@ -10,7 +10,7 @@ class MySQLHandler CONST SQL_READ_COLUMNS = [ 'id', 'path', 'created_at', 'updated_at', - 'title', 'view_count', 'brief']; + 'title', 'view_count', 'brief', 'search_score']; CONST SQL_WRITE_COLUMNS = ['path', 'title', 'brief']; @@ -327,42 +327,157 @@ class MySQLHandler return $data['post_markdown']; } - public function search_posts($taglist) { - $qry = " - SELECT * - FROM posts - WHERE MATCH(post_tags) AGAINST (? IN BOOLEAN MODE) - "; + public function parse_search_query_string($text) { + $element_array = explode(' ', $text); - $search_data = TagList\create_db_search($taglist); + $return_text = ''; + $return_tags = []; + $return_options = []; - $order_by = $search_data['modifiers']['order_by'] ?? 'updated_at'; - $limit = intval($search_data['modifiers']['limit'] ?? 20); - $offset = intval($search_data['modifiers']['offset'] ?? 0); + foreach($element_array as $element) { + if(strlen($element) == 0) + continue; - if($limit > 100) { - throw new Exception('Search limit above maximum (max 100 results per search)'); + if(preg_match('/^(\w+):(.+)$/', $element, $match)) { + if($match[1] == 'tags') { + $return_tags = array_merge($return_tags, explode(',', $match[2])); + } else { + $return_options[$match[1]] = $match[2]; + } + } else { + $return_text .= $element . ' '; + } } - $allowed_ordering = [ - 'path' => true, - 'path DESC' => true, - 'created_at' => true, - 'created_at DESC' => true, - 'updated_at' => true, - 'updated_at DESC' => true + return [ + 'text' => $return_text, + 'tags' => $return_tags, + 'options' => $return_options ]; - // TODO move this to a class var + } - if(!isset($allowed_ordering[$order_by])) { - throw new Exception('Search order not allowed'); + public function search_posts($options) { + // Function to perform an arbitrary search across + // the database. + // + // "options" input is a Hash with the following + // possible keys: + // - query: This text will be interpreted + // as a combination of text to search as well as + // tags, order-by requirements, etc. + // - text: This text will be used as unmodified + // input to the FULLTEXT matching + // - tags: This may be either a list or a string of tags + // to use for searching + // - path: Which path to search within + // - order_by: What column (if any) to search by + // - limit: Number of results to return, at most + // - offset: Number of results to skip before returning + + if(gettype($options) == 'string') { + $options = [ + 'query' => $options + ]; } - $order_by = 'post_' . $order_by; - $qry = $qry . " ORDER BY " . $order_by . " LIMIT ? OFFSET ?"; + // Arrays to construct the query selection later + $qry_selects = ['posts.*']; + $qry_select_data = []; + $qry_select_types = ''; - $search_results = $this->_exec($qry, "sii", $search_data['parameter_string'], - $limit, $offset)->fetch_all(MYSQLI_ASSOC); + $qry_wheres = []; + $qry_where_data = []; + $qry_where_types = ''; + + $options['text'] ??= ''; + + if(gettype($options['tags'] ?? null) == 'string') { + $options['tags'] = TagList\_str_to_raw_taglist($options['tags']); + } else { + $options['tags'] ??= []; + } + + $options['limit'] = min($options['limit'] ?? 100, 100); + + // This code will take a generic user-input string, and will process it + // to see if there are any special options to consider. + // + // These options will always be overridden by the original "options" + // array. Text and Tags will be merged. For the limit, the minimum will + // be chosen. + if(isset($options['query'])) { + $search_options = $this->parse_search_query_string($options['query']); + + if(strlen($search_options['text']) > 0) { + $options['text'] ??= ''; + $options['text'] .= ' ' . $search_options['text']; + } + + $options['tags'] = array_merge($options['tags'], $search_options['tags']); + + if(isset($search_options['limit'])) { + $options['limit'] = min($options['limit'], intval($search_options['limit'])); + } + if(isset($search_options['offset'])) { + $options['offset'] = intval($options['offset']); + } + + $options = array_merge($options, $search_options['options']); + } + + // If we have any tags, construct a tag-matching query + if(count($options['tags']) > 0) { + $tag_search_string = TagList\create_db_search($options['tags'])['parameter_string']; + + $qry_wheres []= "MATCH(post_tags) AGAINST (? IN BOOLEAN MODE)"; + $qry_where_data []= $tag_search_string; + $qry_where_types .= 's'; + } + + // If we have any text query strings, we get to construct a rather fun, complex + // array of MATCH() AGAINST() text queries. + if(strlen($options['text']) > 0) { + $text_search_scores = [0]; + $text_search_wheres = []; + foreach([['title', 6], ['brief', 4], ['markdown', 1]] as $arg) { + $text_search_scores []= "((MATCH(post_" . $arg[0] . ") AGAINST (?)) * " . $arg[1] . ')'; + $qry_select_data []= $options['text']; + $qry_select_types .= 's'; + + $text_search_wheres []= "(MATCH(post_" . $arg[0] . ") AGAINST (?))"; + $qry_where_data []= $options['text']; + $qry_where_types .= 's'; + } + + $qry_selects []= '(' . implode('+', $text_search_scores) . ') AS post_search_score'; + $qry_wheres []= '(' . implode(' OR ', $text_search_wheres) . ')'; + } else { + $qry_selects []= '0 AS post_search_score'; + } + + if(isset($options['path']) && strlen($options['path']) > 0) { + $qry_wheres []= "post_path LIKE ?"; + $qry_where_data []= $options['path'] . '%'; + $qry_where_types .= 's'; + } + + if(count($qry_wheres) == 0) { + throw new Exception("No search filtering options supplied!"); + } + + $options['offset'] ??= 0; + + $qry = + "SELECT " . implode(', ', $qry_selects) . " + FROM posts + LEFT JOIN post_markdown ON posts.post_id = post_markdown.post_id + WHERE " . implode(' and ', $qry_wheres) . " + ORDER BY post_search_score DESC + LIMIT " . $options['limit'] . " + OFFSET " . $options['offset']; + + $search_results = $this->_exec($qry, $qry_select_types . $qry_where_types, + ...array_merge($qry_select_data, $qry_where_data))->fetch_all(MYSQLI_ASSOC); $outdata = []; foreach($search_results AS $post_element) {