Slightly major rework of the search code internals, makes things more sane and fixes a couple of bugs

git-svn-id: file:///home/shish/svn/shimmie2/trunk@867 7f39781d-f577-437e-ae19-be835c7a54ca
This commit is contained in:
shish 2008-05-19 15:59:58 +00:00
parent 2c1c605f12
commit a658019697
6 changed files with 75 additions and 27 deletions

View File

@ -55,7 +55,7 @@ class DanbooruApi extends Extension
if(preg_match("/md5:([0-9a-fA-F]*)/i", $event->term, $matches)) if(preg_match("/md5:([0-9a-fA-F]*)/i", $event->term, $matches))
{ {
$hash = strtolower($matches[1]); $hash = strtolower($matches[1]);
$event->set_querylet(new Querylet("AND (images.hash = '$hash')")); $event->set_querylet(new Querylet("images.hash = '$hash'"));
} }
} }
} }

View File

@ -64,7 +64,7 @@ class NumericScore extends Extension {
if(preg_match("/score(<|=|>)(\d+)/", $event->term, $matches)) { if(preg_match("/score(<|=|>)(\d+)/", $event->term, $matches)) {
$cmp = $matches[1]; $cmp = $matches[1];
$score = $matches[2]; $score = $matches[2];
$event->set_querylet(new Querylet("AND (numeric_score $cmp $score)")); $event->set_querylet(new Querylet("numeric_score $cmp $score"));
} }
} }
} }

View File

@ -76,7 +76,7 @@ class Ratings extends Extension {
$arr[] = "'" . $sqes[$i] . "'"; $arr[] = "'" . $sqes[$i] . "'";
} }
$set = join(', ', $arr); $set = join(', ', $arr);
$event->set_querylet(new Querylet("AND (rating IN ($set))")); $event->set_querylet(new Querylet("rating IN ($set)"));
} }
} }
} }

View File

@ -28,7 +28,26 @@ class Querylet {
public function add_variable($var) { public function add_variable($var) {
$this->variables[] = $var; $this->variables[] = $var;
} }
} // }}} }
class TagQuerylet {
var $tag;
var $positive;
public function TagQuerylet($tag, $positive) {
$this->tag = $tag;
$this->positive = $positive;
}
}
class ImgQuerylet {
var $qlet;
var $positive;
public function ImgQuerylet($qlet, $positive) {
$this->qlet = $qlet;
$this->positive = $positive;
}
}
// }}}
// {{{ dbengines // {{{ dbengines
class DBEngine { class DBEngine {
var $name = null; var $name = null;
@ -204,11 +223,13 @@ class Database {
} }
private function build_search_querylet($terms) { private function build_search_querylet($terms) {
$tag_search = new Querylet("0"); $tag_querylets = array();
$img_querylets = array();
$positive_tag_count = 0; $positive_tag_count = 0;
$negative_tag_count = 0; $negative_tag_count = 0;
$img_search = new Querylet("");
// turn each term into a specific type of querylet
foreach($terms as $term) { foreach($terms as $term) {
$negative = false; $negative = false;
if((strlen($term) > 0) && ($term[0] == '-')) { if((strlen($term) > 0) && ($term[0] == '-')) {
@ -221,28 +242,54 @@ class Database {
$stpe = new SearchTermParseEvent($term); $stpe = new SearchTermParseEvent($term);
send_event($stpe); send_event($stpe);
if($stpe->is_querylet_set()) { if($stpe->is_querylet_set()) {
$img_search->append($stpe->get_querylet()); $img_querylets[] = new ImgQuerylet($stpe->get_querylet(), !$negative);
} }
else { else {
$term = str_replace("*", "%", $term); $term = str_replace("*", "%", $term);
$term = str_replace("?", "_", $term); $term = str_replace("?", "_", $term);
if(!preg_match("/^[%_]+$/", $term)) { if(!preg_match("/^[%_]+$/", $term)) {
$sign = $negative ? "-" : "+"; $tag_querylets[] = new TagQuerylet($term, !$negative);
if($sign == "+") $positive_tag_count++;
else $negative_tag_count++;
$tag_search->append(new Querylet(" $sign (tag LIKE ?)", array($term)));
} }
} }
} }
// merge all the tag querylets into one generic one
$sql = "0";
$terms = array();
foreach($tag_querylets as $tq) {
$sign = $tq->positive ? "+" : "-";
$sql .= " $sign (tag LIKE ?)";
$terms[] = $tq->tag;
if($sign == "+") $positive_tag_count++;
else $negative_tag_count++;
}
$tag_search = new Querylet($sql, $terms);
// merge all the image metadata searches into one generic querylet
$n = 0;
$sql = "";
$terms = array();
foreach($img_querylets as $iq) {
if($n++ > 0) $sql .= " AND";
if(!$iq->positive) $sql .= " NOT";
$sql .= " (" . $iq->qlet->sql . ")";
$terms = array_merge($terms, $iq->qlet->variables);
}
$img_search = new Querylet($sql, $terms);
// no tags, do a simple search (+image metadata if we have any)
if($positive_tag_count + $negative_tag_count == 0) { if($positive_tag_count + $negative_tag_count == 0) {
$query = new Querylet($this->get_images); $query = new Querylet($this->get_images);
if(strlen($img_search->sql) > 0) { if(strlen($img_search->sql) > 0) {
$query->append_sql("WHERE 1=1 "); $query->append_sql(" WHERE ");
$query->append($img_search); $query->append($img_search);
} }
} }
// one positive tag (a common case), do an optimised search
else if($positive_tag_count == 1 && $negative_tag_count == 0) { else if($positive_tag_count == 1 && $negative_tag_count == 0) {
$query = new Querylet( $query = new Querylet(
// MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_- // MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_-
@ -258,9 +305,12 @@ class Database {
$tag_search->variables); $tag_search->variables);
if(strlen($img_search->sql) > 0) { if(strlen($img_search->sql) > 0) {
$query->append_sql(" AND ");
$query->append($img_search); $query->append($img_search);
} }
} }
// more than one positive tag, or more than zero negative tags
else { else {
$s_tag_array = array_map("sql_escape", $tag_search->variables); $s_tag_array = array_map("sql_escape", $tag_search->variables);
$s_tag_list = join(', ', $s_tag_array); $s_tag_list = join(', ', $s_tag_array);
@ -292,22 +342,20 @@ class Database {
$query = new Querylet(" $query = new Querylet("
SELECT *, UNIX_TIMESTAMP(posted) AS posted_timestamp SELECT *, UNIX_TIMESTAMP(posted) AS posted_timestamp
FROM ({$subquery->sql}) AS images ", $subquery->variables); FROM ({$subquery->sql}) AS images ", $subquery->variables);
if(strlen($img_search->sql) > 0) {
$query->append_sql(" WHERE ");
$query->append($img_search);
}
} }
else { else {
# there are no results, "where 1=0" should shortcut things # there are no results, "where 1=0" should shortcut things
$query = new Querylet(" $query = new Querylet("
SELECT images.* SELECT images.*
FROM images FROM images
LEFT JOIN image_tags ON image_tags.image_id = images.id
JOIN tags ON image_tags.tag_id = tags.id
WHERE 1=0 WHERE 1=0
"); ");
} }
if(strlen($img_search->sql) > 0) {
$query->append_sql("WHERE 1=1 ");
$query->append($img_search);
}
} }
return $query; return $query;

View File

@ -85,30 +85,30 @@ class Index extends Extension {
if(preg_match("/size(<|>|<=|>=|=)(\d+)x(\d+)/", $event->term, $matches)) { if(preg_match("/size(<|>|<=|>=|=)(\d+)x(\d+)/", $event->term, $matches)) {
$cmp = $matches[1]; $cmp = $matches[1];
$args = array(int_escape($matches[2]), int_escape($matches[3])); $args = array(int_escape($matches[2]), int_escape($matches[3]));
$event->set_querylet(new Querylet("AND (width $cmp ? AND height $cmp ?)", $args)); $event->set_querylet(new Querylet("width $cmp ? AND height $cmp ?", $args));
} }
else if(preg_match("/ratio(<|>|<=|>=|=)(\d+):(\d+)/", $event->term, $matches)) { else if(preg_match("/ratio(<|>|<=|>=|=)(\d+):(\d+)/", $event->term, $matches)) {
$cmp = $matches[1]; $cmp = $matches[1];
$args = array(int_escape($matches[2]), int_escape($matches[3])); $args = array(int_escape($matches[2]), int_escape($matches[3]));
$event->set_querylet(new Querylet("AND (width / height $cmp ? / ?)", $args)); $event->set_querylet(new Querylet("width / height $cmp ? / ?", $args));
} }
else if(preg_match("/(filesize|id)(<|>|<=|>=|=)(\d+[kmg]?b?)/i", $event->term, $matches)) { else if(preg_match("/(filesize|id)(<|>|<=|>=|=)(\d+[kmg]?b?)/i", $event->term, $matches)) {
$col = $matches[1]; $col = $matches[1];
$cmp = $matches[2]; $cmp = $matches[2];
$val = parse_shorthand_int($matches[3]); $val = parse_shorthand_int($matches[3]);
$event->set_querylet(new Querylet("AND (images.$col $cmp ?)", array($val))); $event->set_querylet(new Querylet("images.$col $cmp ?", array($val)));
} }
else if(preg_match("/hash=([0-9a-fA-F]*)/i", $event->term, $matches)) { else if(preg_match("/hash=([0-9a-fA-F]*)/i", $event->term, $matches)) {
$hash = strtolower($matches[2]); $hash = strtolower($matches[2]);
$event->set_querylet(new Querylet("AND (images.hash = '$hash')")); $event->set_querylet(new Querylet("images.hash = '$hash'"));
} }
else if(preg_match("/(filetype|ext)=([a-zA-Z0-9]*)/i", $event->term, $matches)) { else if(preg_match("/(filetype|ext)=([a-zA-Z0-9]*)/i", $event->term, $matches)) {
$ext = strtolower($matches[2]); $ext = strtolower($matches[2]);
$event->set_querylet(new Querylet("AND (images.ext = '$ext')")); $event->set_querylet(new Querylet("images.ext = '$ext'"));
} }
else if(preg_match("/(filename|name)=([a-zA-Z0-9]*)/i", $event->term, $matches)) { else if(preg_match("/(filename|name)=([a-zA-Z0-9]*)/i", $event->term, $matches)) {
$filename = strtolower($matches[2]); $filename = strtolower($matches[2]);
$event->set_querylet(new Querylet("AND (images.filename LIKE '%$filename%')")); $event->set_querylet(new Querylet("images.filename LIKE '%$filename%'"));
} }
} }
} }

View File

@ -169,11 +169,11 @@ class UserPage extends Extension {
else { else {
$user_id = -1; $user_id = -1;
} }
$event->set_querylet(new Querylet("AND (images.owner_id = $user_id)")); $event->set_querylet(new Querylet("images.owner_id = $user_id"));
} }
else if(preg_match("/(poster|user)_id=([0-9]+)/i", $event->term, $matches)) { else if(preg_match("/(poster|user)_id=([0-9]+)/i", $event->term, $matches)) {
$user_id = int_escape($matches[2]); $user_id = int_escape($matches[2]);
$event->set_querylet(new Querylet("AND (images.owner_id = $user_id)")); $event->set_querylet(new Querylet("images.owner_id = $user_id"));
} }
} }
} }