lots of deduping for searching

This commit is contained in:
Shish 2016-06-07 01:34:38 +01:00
parent 7f2609f727
commit dc83d4ace7

View File

@ -756,18 +756,16 @@ class Image {
private static function build_search_querylet($terms) { private static function build_search_querylet($terms) {
assert('is_array($terms)'); assert('is_array($terms)');
global $database; global $database;
if($database->get_driver_name() === "mysql")
return Image::build_ugly_search_querylet($terms);
else
return Image::build_accurate_search_querylet($terms);
}
/** $tag_querylets = array();
* @param string[] $terms
* @return ImgQuerylet[]
*/
private static function parse_meta_terms($terms) {
$img_querylets = array(); $img_querylets = array();
$positive_tag_count = 0;
$negative_tag_count = 0;
/*
* Turn a bunch of strings into a bunch of TagQuerylet
* and ImgQuerylet objects
*/
$stpe = new SearchTermParseEvent(null, $terms); $stpe = new SearchTermParseEvent(null, $terms);
send_event($stpe); send_event($stpe);
if ($stpe->is_querylet_set()) { if ($stpe->is_querylet_set()) {
@ -775,25 +773,106 @@ class Image {
$img_querylets[] = new ImgQuerylet($querylet, true); $img_querylets[] = new ImgQuerylet($querylet, true);
} }
} }
return $img_querylets;
$terms = Tag::resolve_aliases($terms);
foreach ($terms as $term) {
$positive = true;
if (is_string($term) && !empty($term) && ($term[0] == '-')) {
$positive = false;
$term = substr($term, 1);
}
if (strlen($term) === 0) {
continue;
} }
/** $stpe = new SearchTermParseEvent($term, $terms);
* @param ImgQuerylet[] $img_querylets send_event($stpe);
* @return Querylet if ($stpe->is_querylet_set()) {
*/ foreach ($stpe->get_querylets() as $querylet) {
private static function build_img_search($img_querylets) { $img_querylets[] = new ImgQuerylet($querylet, $positive);
// merge all the image metadata searches into one generic querylet
$n = 0;
$sql = "";
$terms = array();
foreach ($img_querylets as $iq) {
if ($n++ > 0) $sql .= " AND";
if (!$iq->positive) $sql .= " NOT";
$sql .= " (" . $iq->qlet->sql . ")";
$terms = array_merge($terms, $iq->qlet->variables);
} }
return new Querylet($sql, $terms); } else {
$expansions = Tag::resolve_wildcard($term);
if ($expansions) {
if ($positive) $positive_tag_count++;
else $negative_tag_count++;
}
foreach ($expansions as $expanded_term) {
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
}
}
}
/*
* Turn a bunch of Querylet objects into a base query
*
* Must follow the format
*
* SELECT images.*
* FROM (...) AS images
* WHERE (...)
*
* ie, return a set of images.* columns, and end with a WHERE
*/
// no tags, do a simple search
if($positive_tag_count + $negative_tag_count == 0) {
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=1
");
}
// one positive tag (a common case), do an optimised search
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
$query = new Querylet($database->scoreql_to_sql("
SELECT *
FROM (
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
GROUP BY images.id
) AS images
WHERE 1=1
"), array("tag"=>$tag_querylets[0]->tag));
}
// more than one positive tag, or more than zero negative tags
else {
if($database->get_driver_name() === "mysql")
$query = Image::build_ugly_search_querylet(
$tag_querylets,
$positive_tag_count
);
else
$query = Image::build_accurate_search_querylet(
$tag_querylets,
$positive_tag_count
);
}
/*
* Merge all the image metadata searches into one generic querylet
* and append to the base querylet with "AND blah"
*/
if($img_querylets) {
$n = 0;
$img_sql = "";
$img_vars = array();
foreach ($img_querylets as $iq) {
if ($n++ > 0) $img_sql .= " AND";
if (!$iq->positive) $img_sql .= " NOT";
$img_sql .= " (" . $iq->qlet->sql . ")";
$img_vars = array_merge($img_vars, $iq->qlet->variables);
}
$query->append_sql(" AND ");
$query->append(new Querylet($img_sql, $img_vars));
}
return $query;
} }
/** /**
@ -817,74 +896,18 @@ class Image {
* All the subqueries are executed every time for every row in the * All the subqueries are executed every time for every row in the
* images table. Yes, MySQL does suck this much. * images table. Yes, MySQL does suck this much.
* *
* @param string[] $terms * @param array $tag_querylets
* @return \Querylet * @param int $positive_tag_count
* @return Querylet
*/ */
private static function build_accurate_search_querylet($terms) { private static function build_accurate_search_querylet(
$tag_querylets,
$positive_tag_count
) {
global $database; global $database;
$tag_querylets = array();
$img_querylets = self::parse_meta_terms($terms);
$positive_tag_count = 0;
// parse the words that are searched for into
// various types of querylet
$terms = Tag::resolve_aliases($terms);
foreach($terms as $term) {
$positive = true;
if(is_string($term) && !empty($term) && ($term[0] == '-')) {
$positive = false;
$term = substr($term, 1);
}
if(strlen($term) === 0) {
continue;
}
$stpe = new SearchTermParseEvent($term, $terms);
send_event($stpe);
if($stpe->is_querylet_set()) {
foreach($stpe->get_querylets() as $querylet) {
$img_querylets[] = new ImgQuerylet($querylet, $positive);
}
}
else {
$expansions = Tag::resolve_wildcard($term);
if($expansions && $positive) $positive_tag_count++;
foreach($expansions as $expanded_term) {
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
}
}
}
$img_search = self::build_img_search($img_querylets);
// How many tag querylets are there?
$count_tag_querylets = count($tag_querylets);
// no tags, do a simple search (+image metadata if we have any)
if($count_tag_querylets === 0) {
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=1
");
}
// one positive tag (a common case), do an optimised search
else if($count_tag_querylets === 1 && $tag_querylets[0]->positive) {
$query = new Querylet($database->scoreql_to_sql("
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"), array("tag"=>$tag_querylets[0]->tag));
}
// more than one positive tag, or more than zero negative tags
else {
$positive_tag_id_array = array(); $positive_tag_id_array = array();
$negative_tag_id_array = array(); $negative_tag_id_array = array();
$tags_ok = true;
foreach ($tag_querylets as $tq) { foreach ($tag_querylets as $tq) {
$tag_ids = $database->get_col( $tag_ids = $database->get_col(
@ -892,18 +915,25 @@ class Image {
SELECT id SELECT id
FROM tags FROM tags
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag) WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"), array("tag" => $tq->tag) "),
array("tag" => $tq->tag)
); );
if ($tq->positive) { if ($tq->positive) {
$positive_tag_id_array = array_merge($positive_tag_id_array, $tag_ids); $positive_tag_id_array = array_merge($positive_tag_id_array, $tag_ids);
$tags_ok = count($tag_ids) > 0; if (count($tag_ids) == 0) {
if (!$tags_ok) break; # one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
return new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
} else { } else {
$negative_tag_id_array = array_merge($negative_tag_id_array, $tag_ids); $negative_tag_id_array = array_merge($negative_tag_id_array, $tag_ids);
} }
} }
if ($tags_ok) {
$have_pos = count($positive_tag_id_array) > 0; $have_pos = count($positive_tag_id_array) > 0;
$have_neg = count($negative_tag_id_array) > 0; $have_neg = count($negative_tag_id_array) > 0;
@ -929,192 +959,93 @@ class Image {
WHERE tag_id IN ($negative_tag_id_list) WHERE tag_id IN ($negative_tag_id_list)
"; ";
} }
$query = new Querylet(" return new Querylet("
SELECT images.* SELECT images.*
FROM images FROM images
WHERE images.id IN ($sql) WHERE images.id IN ($sql)
"); ");
} else {
# one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
}
if (!empty($img_search->sql)) {
$query->append_sql(" AND ");
$query->append($img_search);
return $query;
}
return $query;
} }
/** /**
* this function exists because mysql is a turd, see the docs for * this function exists because mysql is a turd, see the docs for
* build_accurate_search_querylet() for a full explanation * build_accurate_search_querylet() for a full explanation
* *
* @param array $terms * @param array $tag_querylets
* @param int $positive_tag_count
* @return Querylet * @return Querylet
*/ */
private static function build_ugly_search_querylet($terms) { private static function build_ugly_search_querylet(
$tag_querylets,
$positive_tag_count
) {
global $database; global $database;
$tag_querylets = array();
$img_querylets = self::parse_meta_terms($terms);
$positive_tag_count = 0;
$negative_tag_count = 0;
$wildcard_count = 0;
$terms = Tag::resolve_aliases($terms);
reset($terms); // rewind to first element in array.
// turn each term into a specific type of querylet
foreach($terms as $term) {
$negative = false;
if( !empty($term) && ($term[0] == '-')) {
$negative = true;
$term = substr($term, 1);
}
$stpe = new SearchTermParseEvent($term, $terms);
send_event($stpe);
if($stpe->is_querylet_set()) {
foreach($stpe->get_querylets() as $querylet) {
$img_querylets[] = new ImgQuerylet($querylet, !$negative);
}
}
else {
$term = str_replace("*", "%", $term);
$term = str_replace("?", "_", $term);
if(!preg_match("/^[%_]+$/", $term)) {
$tag_querylets[] = new TagQuerylet($term, !$negative);
}
}
if(strpos($term, '%') !== FALSE) $wildcard_count++;
}
// merge all the tag querylets into one generic one // merge all the tag querylets into one generic one
$sql = "0"; $sql = "0";
$terms = array(); $terms = array();
foreach($tag_querylets as $tq) { foreach($tag_querylets as $tq) {
$sign = $tq->positive ? "+" : "-"; $sign = $tq->positive ? "+" : "-";
if(!$wildcard_count) {
$sql .= ' '.$sign.' (tag LIKE :tag'.Image::$tag_n.')';
} else {
$sql .= ' '.$sign.' IF(SUM(tag LIKE :tag'.Image::$tag_n.'), 1, 0)'; $sql .= ' '.$sign.' IF(SUM(tag LIKE :tag'.Image::$tag_n.'), 1, 0)';
}
$terms['tag'.Image::$tag_n] = $tq->tag; $terms['tag'.Image::$tag_n] = $tq->tag;
Image::$tag_n++; Image::$tag_n++;
if($sign === "+") $positive_tag_count++;
else $negative_tag_count++;
} }
$tag_search = new Querylet($sql, $terms); $tag_search = new Querylet($sql, $terms);
$img_search = self::build_img_search($img_querylets);
// no tags, do a simple search (+image metadata if we have any) // only negative tags - shortcut to fail
if($positive_tag_count + $negative_tag_count == 0) { if($positive_tag_count == 0) {
$query = new Querylet(" // TODO: This isn't currently implemented.
// SEE: https://github.com/shish/shimmie2/issues/66
return new Querylet("
SELECT images.* SELECT images.*
FROM images FROM images
WHERE 1=1 WHERE 1=0
"); ");
} }
// one positive tag (a common case), do an optimised search
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
// MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_-
// "{$this->get_images} WHERE images.id IN (SELECT image_id FROM tags WHERE tag LIKE ?) ",
$group_by = (!$wildcard_count ? "" : "GROUP BY images.id");
$query = new Querylet("
SELECT images.*
FROM images
JOIN image_tags ON images.id=image_tags.image_id
JOIN tags ON image_tags.tag_id=tags.id
WHERE tag LIKE :tag0
{$group_by}
", $tag_search->variables);
}
// more than one positive tag, and zero or more negative tags
else if($positive_tag_count >= 1) {
$tag_id_array = array(); $tag_id_array = array();
$tags_ok = true;
$x = 0; $x = 0;
foreach($tag_search->variables as $tag) { foreach($tag_search->variables as $tag) {
$tag_ids = $database->get_col( $tag_ids = $database->get_col(
"SELECT id FROM tags WHERE tag LIKE :tag", $database->scoreql_to_sql("
SELECT id
FROM tags
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
"),
array("tag" => $tag) array("tag" => $tag)
); );
$tag_id_array = array_merge($tag_id_array, $tag_ids); $tag_id_array = array_merge($tag_id_array, $tag_ids);
$tags_ok = count($tag_ids) > 0 || !$tag_querylets[$x]->positive; if($tag_querylets[$x]->positive && count($tag_ids) == 0) {
if(!$tags_ok) break; # one of the positive tags had zero results, therefor there
# can be no results; "where 1=0" should shortcut things
return new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
$x++; $x++;
} }
if($tags_ok) { Image::$tag_n = 0;
$tag_id_list = join(', ', $tag_id_array); return new Querylet('
SELECT *
$sum = (!$wildcard_count ? "SUM" : ""); FROM (
$subquery = new Querylet(' SELECT images.*, SUM('.$tag_search->sql.') AS score
SELECT images.*, '.$sum.'('.$tag_search->sql.') AS score
FROM images FROM images
LEFT JOIN image_tags ON image_tags.image_id = images.id LEFT JOIN image_tags ON image_tags.image_id = images.id
JOIN tags ON image_tags.tag_id = tags.id JOIN tags ON image_tags.tag_id = tags.id
WHERE tags.id IN ('.$tag_id_list.') WHERE tags.id IN (' . join(', ', $tag_id_array) . ')
GROUP BY images.id GROUP BY images.id
HAVING score = :score', HAVING score = :score
array_merge( ) AS images
WHERE 1=1
', array_merge(
$tag_search->variables, $tag_search->variables,
array("score"=>$positive_tag_count) array("score"=>$positive_tag_count)
) ));
);
$query = new Querylet('
SELECT *
FROM ('.$subquery->sql.') AS images
WHERE 1=1
', $subquery->variables);
}
else {
# there are no results, "where 1=0" should shortcut things
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
}
//zero positive tags and one or more negative tags
//TODO: This isn't currently implemented. SEE: https://github.com/shish/shimmie2/issues/66
else {
$query = new Querylet("
SELECT images.*
FROM images
WHERE 1=0
");
}
if (!empty($img_search->sql)) {
$query->append_sql(" AND ");
$query->append($img_search);
return $query;
}
Image::$tag_n = 0;
return $query;
} }
} }