feat(search): add actual searching

This commit is contained in:
David Bailey 2025-01-06 22:35:30 +01:00
parent 02054d418d
commit 7c8d0191d2
2 changed files with 160 additions and 33 deletions

View file

@ -83,13 +83,25 @@ interface PostdataInterface {
// Returns an array of PostData information
// based on the tag search list
// based on various search parameters.
//
// Tag searchlist is comprised of space-separated
// tags. Each tag can have a weighting prefix,
// and some special tags exist (such as limit:N,
// order:S).
public function search_posts($taglist);
// search_options can either be:
// - An Array
// - Or a String
//
// In case of it being an Array, it may include
// the keys:
// - "query" (which will be processed similar
// to how $search_options will be processed),
// - "text", which is searched for in text fields
// (title, brief, fulltext),
// - "tags", which is matched IN BINARY MODE against
// the post tags
// - "path", which is used as filter
// - "order_by": determines which column to order by. NULL
// will order by FULLTEXT match scores
// - "limit" and "offset", self-explanatory
public function search_posts($search_options);
}
?>

View file

@ -10,7 +10,7 @@ class MySQLHandler
CONST SQL_READ_COLUMNS = [
'id', 'path', 'created_at', 'updated_at',
'title', 'view_count', 'brief'];
'title', 'view_count', 'brief', 'search_score'];
CONST SQL_WRITE_COLUMNS = ['path', 'title', 'brief'];
@ -327,42 +327,157 @@ class MySQLHandler
return $data['post_markdown'];
}
public function search_posts($taglist) {
$qry = "
SELECT *
FROM posts
WHERE MATCH(post_tags) AGAINST (? IN BOOLEAN MODE)
";
public function parse_search_query_string($text) {
$element_array = explode(' ', $text);
$search_data = TagList\create_db_search($taglist);
$return_text = '';
$return_tags = [];
$return_options = [];
$order_by = $search_data['modifiers']['order_by'] ?? 'updated_at';
$limit = intval($search_data['modifiers']['limit'] ?? 20);
$offset = intval($search_data['modifiers']['offset'] ?? 0);
foreach($element_array as $element) {
if(strlen($element) == 0)
continue;
if($limit > 100) {
throw new Exception('Search limit above maximum (max 100 results per search)');
if(preg_match('/^(\w+):(.+)$/', $element, $match)) {
if($match[1] == 'tags') {
$return_tags = array_merge($return_tags, explode(',', $match[2]));
} else {
$return_options[$match[1]] = $match[2];
}
} else {
$return_text .= $element . ' ';
}
}
$allowed_ordering = [
'path' => true,
'path DESC' => true,
'created_at' => true,
'created_at DESC' => true,
'updated_at' => true,
'updated_at DESC' => true
return [
'text' => $return_text,
'tags' => $return_tags,
'options' => $return_options
];
// TODO move this to a class var
}
if(!isset($allowed_ordering[$order_by])) {
throw new Exception('Search order not allowed');
public function search_posts($options) {
// Function to perform an arbitrary search across
// the database.
//
// "options" input is a Hash with the following
// possible keys:
// - query: This text will be interpreted
// as a combination of text to search as well as
// tags, order-by requirements, etc.
// - text: This text will be used as unmodified
// input to the FULLTEXT matching
// - tags: This may be either a list or a string of tags
// to use for searching
// - path: Which path to search within
// - order_by: What column (if any) to search by
// - limit: Number of results to return, at most
// - offset: Number of results to skip before returning
if(gettype($options) == 'string') {
$options = [
'query' => $options
];
}
$order_by = 'post_' . $order_by;
$qry = $qry . " ORDER BY " . $order_by . " LIMIT ? OFFSET ?";
// Arrays to construct the query selection later
$qry_selects = ['posts.*'];
$qry_select_data = [];
$qry_select_types = '';
$search_results = $this->_exec($qry, "sii", $search_data['parameter_string'],
$limit, $offset)->fetch_all(MYSQLI_ASSOC);
$qry_wheres = [];
$qry_where_data = [];
$qry_where_types = '';
$options['text'] ??= '';
if(gettype($options['tags'] ?? null) == 'string') {
$options['tags'] = TagList\_str_to_raw_taglist($options['tags']);
} else {
$options['tags'] ??= [];
}
$options['limit'] = min($options['limit'] ?? 100, 100);
// This code will take a generic user-input string, and will process it
// to see if there are any special options to consider.
//
// These options will always be overridden by the original "options"
// array. Text and Tags will be merged. For the limit, the minimum will
// be chosen.
if(isset($options['query'])) {
$search_options = $this->parse_search_query_string($options['query']);
if(strlen($search_options['text']) > 0) {
$options['text'] ??= '';
$options['text'] .= ' ' . $search_options['text'];
}
$options['tags'] = array_merge($options['tags'], $search_options['tags']);
if(isset($search_options['limit'])) {
$options['limit'] = min($options['limit'], intval($search_options['limit']));
}
if(isset($search_options['offset'])) {
$options['offset'] = intval($options['offset']);
}
$options = array_merge($options, $search_options['options']);
}
// If we have any tags, construct a tag-matching query
if(count($options['tags']) > 0) {
$tag_search_string = TagList\create_db_search($options['tags'])['parameter_string'];
$qry_wheres []= "MATCH(post_tags) AGAINST (? IN BOOLEAN MODE)";
$qry_where_data []= $tag_search_string;
$qry_where_types .= 's';
}
// If we have any text query strings, we get to construct a rather fun, complex
// array of MATCH() AGAINST() text queries.
if(strlen($options['text']) > 0) {
$text_search_scores = [0];
$text_search_wheres = [];
foreach([['title', 6], ['brief', 4], ['markdown', 1]] as $arg) {
$text_search_scores []= "((MATCH(post_" . $arg[0] . ") AGAINST (?)) * " . $arg[1] . ')';
$qry_select_data []= $options['text'];
$qry_select_types .= 's';
$text_search_wheres []= "(MATCH(post_" . $arg[0] . ") AGAINST (?))";
$qry_where_data []= $options['text'];
$qry_where_types .= 's';
}
$qry_selects []= '(' . implode('+', $text_search_scores) . ') AS post_search_score';
$qry_wheres []= '(' . implode(' OR ', $text_search_wheres) . ')';
} else {
$qry_selects []= '0 AS post_search_score';
}
if(isset($options['path']) && strlen($options['path']) > 0) {
$qry_wheres []= "post_path LIKE ?";
$qry_where_data []= $options['path'] . '%';
$qry_where_types .= 's';
}
if(count($qry_wheres) == 0) {
throw new Exception("No search filtering options supplied!");
}
$options['offset'] ??= 0;
$qry =
"SELECT " . implode(', ', $qry_selects) . "
FROM posts
LEFT JOIN post_markdown ON posts.post_id = post_markdown.post_id
WHERE " . implode(' and ', $qry_wheres) . "
ORDER BY post_search_score DESC
LIMIT " . $options['limit'] . "
OFFSET " . $options['offset'];
$search_results = $this->_exec($qry, $qry_select_types . $qry_where_types,
...array_merge($qry_select_data, $qry_where_data))->fetch_all(MYSQLI_ASSOC);
$outdata = [];
foreach($search_results AS $post_element) {