From a65801969733d688a86ac3de7ebbe6cbce039b5f Mon Sep 17 00:00:00 2001 From: shish Date: Mon, 19 May 2008 15:59:58 +0000 Subject: [PATCH] Slightly major rework of the search code internals, makes things more sane and fixes a couple of bugs git-svn-id: file:///home/shish/svn/shimmie2/trunk@867 7f39781d-f577-437e-ae19-be835c7a54ca --- contrib/danbooru_api/main.php | 2 +- contrib/numeric_score/main.php | 2 +- contrib/rating/main.php | 2 +- core/database.class.php | 80 +++++++++++++++++++++++++++------- ext/index/main.php | 12 ++--- ext/user/main.php | 4 +- 6 files changed, 75 insertions(+), 27 deletions(-) diff --git a/contrib/danbooru_api/main.php b/contrib/danbooru_api/main.php index 12a269c1..991361fd 100644 --- a/contrib/danbooru_api/main.php +++ b/contrib/danbooru_api/main.php @@ -55,7 +55,7 @@ class DanbooruApi extends Extension if(preg_match("/md5:([0-9a-fA-F]*)/i", $event->term, $matches)) { $hash = strtolower($matches[1]); - $event->set_querylet(new Querylet("AND (images.hash = '$hash')")); + $event->set_querylet(new Querylet("images.hash = '$hash'")); } } } diff --git a/contrib/numeric_score/main.php b/contrib/numeric_score/main.php index f1e1c8d6..32291f39 100644 --- a/contrib/numeric_score/main.php +++ b/contrib/numeric_score/main.php @@ -64,7 +64,7 @@ class NumericScore extends Extension { if(preg_match("/score(<|=|>)(\d+)/", $event->term, $matches)) { $cmp = $matches[1]; $score = $matches[2]; - $event->set_querylet(new Querylet("AND (numeric_score $cmp $score)")); + $event->set_querylet(new Querylet("numeric_score $cmp $score")); } } } diff --git a/contrib/rating/main.php b/contrib/rating/main.php index 138039e9..e8623af4 100644 --- a/contrib/rating/main.php +++ b/contrib/rating/main.php @@ -76,7 +76,7 @@ class Ratings extends Extension { $arr[] = "'" . $sqes[$i] . "'"; } $set = join(', ', $arr); - $event->set_querylet(new Querylet("AND (rating IN ($set))")); + $event->set_querylet(new Querylet("rating IN ($set)")); } } } diff --git a/core/database.class.php b/core/database.class.php index f68425d7..254dbffa 100644 --- a/core/database.class.php +++ b/core/database.class.php @@ -28,7 +28,26 @@ class Querylet { public function add_variable($var) { $this->variables[] = $var; } -} // }}} +} +class TagQuerylet { + var $tag; + var $positive; + + public function TagQuerylet($tag, $positive) { + $this->tag = $tag; + $this->positive = $positive; + } +} +class ImgQuerylet { + var $qlet; + var $positive; + + public function ImgQuerylet($qlet, $positive) { + $this->qlet = $qlet; + $this->positive = $positive; + } +} +// }}} // {{{ dbengines class DBEngine { var $name = null; @@ -204,11 +223,13 @@ class Database { } private function build_search_querylet($terms) { - $tag_search = new Querylet("0"); + $tag_querylets = array(); + $img_querylets = array(); $positive_tag_count = 0; $negative_tag_count = 0; - $img_search = new Querylet(""); + + // turn each term into a specific type of querylet foreach($terms as $term) { $negative = false; if((strlen($term) > 0) && ($term[0] == '-')) { @@ -221,28 +242,54 @@ class Database { $stpe = new SearchTermParseEvent($term); send_event($stpe); if($stpe->is_querylet_set()) { - $img_search->append($stpe->get_querylet()); + $img_querylets[] = new ImgQuerylet($stpe->get_querylet(), !$negative); } else { $term = str_replace("*", "%", $term); $term = str_replace("?", "_", $term); if(!preg_match("/^[%_]+$/", $term)) { - $sign = $negative ? "-" : "+"; - if($sign == "+") $positive_tag_count++; - else $negative_tag_count++; - $tag_search->append(new Querylet(" $sign (tag LIKE ?)", array($term))); + $tag_querylets[] = new TagQuerylet($term, !$negative); } } } + // merge all the tag querylets into one generic one + $sql = "0"; + $terms = array(); + foreach($tag_querylets as $tq) { + $sign = $tq->positive ? "+" : "-"; + $sql .= " $sign (tag LIKE ?)"; + $terms[] = $tq->tag; + + if($sign == "+") $positive_tag_count++; + else $negative_tag_count++; + } + $tag_search = new Querylet($sql, $terms); + + // merge all the image metadata searches into one generic querylet + $n = 0; + $sql = ""; + $terms = array(); + foreach($img_querylets as $iq) { + if($n++ > 0) $sql .= " AND"; + if(!$iq->positive) $sql .= " NOT"; + $sql .= " (" . $iq->qlet->sql . ")"; + $terms = array_merge($terms, $iq->qlet->variables); + } + $img_search = new Querylet($sql, $terms); + + + // no tags, do a simple search (+image metadata if we have any) if($positive_tag_count + $negative_tag_count == 0) { $query = new Querylet($this->get_images); if(strlen($img_search->sql) > 0) { - $query->append_sql("WHERE 1=1 "); + $query->append_sql(" WHERE "); $query->append($img_search); } } + + // one positive tag (a common case), do an optimised search else if($positive_tag_count == 1 && $negative_tag_count == 0) { $query = new Querylet( // MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_- @@ -258,9 +305,12 @@ class Database { $tag_search->variables); if(strlen($img_search->sql) > 0) { + $query->append_sql(" AND "); $query->append($img_search); } } + + // more than one positive tag, or more than zero negative tags else { $s_tag_array = array_map("sql_escape", $tag_search->variables); $s_tag_list = join(', ', $s_tag_array); @@ -292,22 +342,20 @@ class Database { $query = new Querylet(" SELECT *, UNIX_TIMESTAMP(posted) AS posted_timestamp FROM ({$subquery->sql}) AS images ", $subquery->variables); + + if(strlen($img_search->sql) > 0) { + $query->append_sql(" WHERE "); + $query->append($img_search); + } } else { # there are no results, "where 1=0" should shortcut things $query = new Querylet(" SELECT images.* FROM images - LEFT JOIN image_tags ON image_tags.image_id = images.id - JOIN tags ON image_tags.tag_id = tags.id WHERE 1=0 "); } - - if(strlen($img_search->sql) > 0) { - $query->append_sql("WHERE 1=1 "); - $query->append($img_search); - } } return $query; diff --git a/ext/index/main.php b/ext/index/main.php index 0633752a..db483903 100644 --- a/ext/index/main.php +++ b/ext/index/main.php @@ -85,30 +85,30 @@ class Index extends Extension { if(preg_match("/size(<|>|<=|>=|=)(\d+)x(\d+)/", $event->term, $matches)) { $cmp = $matches[1]; $args = array(int_escape($matches[2]), int_escape($matches[3])); - $event->set_querylet(new Querylet("AND (width $cmp ? AND height $cmp ?)", $args)); + $event->set_querylet(new Querylet("width $cmp ? AND height $cmp ?", $args)); } else if(preg_match("/ratio(<|>|<=|>=|=)(\d+):(\d+)/", $event->term, $matches)) { $cmp = $matches[1]; $args = array(int_escape($matches[2]), int_escape($matches[3])); - $event->set_querylet(new Querylet("AND (width / height $cmp ? / ?)", $args)); + $event->set_querylet(new Querylet("width / height $cmp ? / ?", $args)); } else if(preg_match("/(filesize|id)(<|>|<=|>=|=)(\d+[kmg]?b?)/i", $event->term, $matches)) { $col = $matches[1]; $cmp = $matches[2]; $val = parse_shorthand_int($matches[3]); - $event->set_querylet(new Querylet("AND (images.$col $cmp ?)", array($val))); + $event->set_querylet(new Querylet("images.$col $cmp ?", array($val))); } else if(preg_match("/hash=([0-9a-fA-F]*)/i", $event->term, $matches)) { $hash = strtolower($matches[2]); - $event->set_querylet(new Querylet("AND (images.hash = '$hash')")); + $event->set_querylet(new Querylet("images.hash = '$hash'")); } else if(preg_match("/(filetype|ext)=([a-zA-Z0-9]*)/i", $event->term, $matches)) { $ext = strtolower($matches[2]); - $event->set_querylet(new Querylet("AND (images.ext = '$ext')")); + $event->set_querylet(new Querylet("images.ext = '$ext'")); } else if(preg_match("/(filename|name)=([a-zA-Z0-9]*)/i", $event->term, $matches)) { $filename = strtolower($matches[2]); - $event->set_querylet(new Querylet("AND (images.filename LIKE '%$filename%')")); + $event->set_querylet(new Querylet("images.filename LIKE '%$filename%'")); } } } diff --git a/ext/user/main.php b/ext/user/main.php index a63e9ac1..21a2796c 100644 --- a/ext/user/main.php +++ b/ext/user/main.php @@ -169,11 +169,11 @@ class UserPage extends Extension { else { $user_id = -1; } - $event->set_querylet(new Querylet("AND (images.owner_id = $user_id)")); + $event->set_querylet(new Querylet("images.owner_id = $user_id")); } else if(preg_match("/(poster|user)_id=([0-9]+)/i", $event->term, $matches)) { $user_id = int_escape($matches[2]); - $event->set_querylet(new Querylet("AND (images.owner_id = $user_id)")); + $event->set_querylet(new Querylet("images.owner_id = $user_id")); } } }