lots of deduping for searching
This commit is contained in:
parent
7f2609f727
commit
dc83d4ace7
@ -756,18 +756,16 @@ class Image {
|
|||||||
private static function build_search_querylet($terms) {
|
private static function build_search_querylet($terms) {
|
||||||
assert('is_array($terms)');
|
assert('is_array($terms)');
|
||||||
global $database;
|
global $database;
|
||||||
if($database->get_driver_name() === "mysql")
|
|
||||||
return Image::build_ugly_search_querylet($terms);
|
|
||||||
else
|
|
||||||
return Image::build_accurate_search_querylet($terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
$tag_querylets = array();
|
||||||
* @param string[] $terms
|
|
||||||
* @return ImgQuerylet[]
|
|
||||||
*/
|
|
||||||
private static function parse_meta_terms($terms) {
|
|
||||||
$img_querylets = array();
|
$img_querylets = array();
|
||||||
|
$positive_tag_count = 0;
|
||||||
|
$negative_tag_count = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turn a bunch of strings into a bunch of TagQuerylet
|
||||||
|
* and ImgQuerylet objects
|
||||||
|
*/
|
||||||
$stpe = new SearchTermParseEvent(null, $terms);
|
$stpe = new SearchTermParseEvent(null, $terms);
|
||||||
send_event($stpe);
|
send_event($stpe);
|
||||||
if ($stpe->is_querylet_set()) {
|
if ($stpe->is_querylet_set()) {
|
||||||
@ -775,25 +773,106 @@ class Image {
|
|||||||
$img_querylets[] = new ImgQuerylet($querylet, true);
|
$img_querylets[] = new ImgQuerylet($querylet, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return $img_querylets;
|
|
||||||
|
$terms = Tag::resolve_aliases($terms);
|
||||||
|
foreach ($terms as $term) {
|
||||||
|
$positive = true;
|
||||||
|
if (is_string($term) && !empty($term) && ($term[0] == '-')) {
|
||||||
|
$positive = false;
|
||||||
|
$term = substr($term, 1);
|
||||||
|
}
|
||||||
|
if (strlen($term) === 0) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
$stpe = new SearchTermParseEvent($term, $terms);
|
||||||
* @param ImgQuerylet[] $img_querylets
|
send_event($stpe);
|
||||||
* @return Querylet
|
if ($stpe->is_querylet_set()) {
|
||||||
*/
|
foreach ($stpe->get_querylets() as $querylet) {
|
||||||
private static function build_img_search($img_querylets) {
|
$img_querylets[] = new ImgQuerylet($querylet, $positive);
|
||||||
// merge all the image metadata searches into one generic querylet
|
|
||||||
$n = 0;
|
|
||||||
$sql = "";
|
|
||||||
$terms = array();
|
|
||||||
foreach ($img_querylets as $iq) {
|
|
||||||
if ($n++ > 0) $sql .= " AND";
|
|
||||||
if (!$iq->positive) $sql .= " NOT";
|
|
||||||
$sql .= " (" . $iq->qlet->sql . ")";
|
|
||||||
$terms = array_merge($terms, $iq->qlet->variables);
|
|
||||||
}
|
}
|
||||||
return new Querylet($sql, $terms);
|
} else {
|
||||||
|
$expansions = Tag::resolve_wildcard($term);
|
||||||
|
if ($expansions) {
|
||||||
|
if ($positive) $positive_tag_count++;
|
||||||
|
else $negative_tag_count++;
|
||||||
|
}
|
||||||
|
foreach ($expansions as $expanded_term) {
|
||||||
|
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Turn a bunch of Querylet objects into a base query
|
||||||
|
*
|
||||||
|
* Must follow the format
|
||||||
|
*
|
||||||
|
* SELECT images.*
|
||||||
|
* FROM (...) AS images
|
||||||
|
* WHERE (...)
|
||||||
|
*
|
||||||
|
* ie, return a set of images.* columns, and end with a WHERE
|
||||||
|
*/
|
||||||
|
|
||||||
|
// no tags, do a simple search
|
||||||
|
if($positive_tag_count + $negative_tag_count == 0) {
|
||||||
|
$query = new Querylet("
|
||||||
|
SELECT images.*
|
||||||
|
FROM images
|
||||||
|
WHERE 1=1
|
||||||
|
");
|
||||||
|
}
|
||||||
|
|
||||||
|
// one positive tag (a common case), do an optimised search
|
||||||
|
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
|
||||||
|
$query = new Querylet($database->scoreql_to_sql("
|
||||||
|
SELECT *
|
||||||
|
FROM (
|
||||||
|
SELECT images.*
|
||||||
|
FROM images
|
||||||
|
JOIN image_tags ON images.id=image_tags.image_id
|
||||||
|
JOIN tags ON image_tags.tag_id=tags.id
|
||||||
|
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
|
||||||
|
GROUP BY images.id
|
||||||
|
) AS images
|
||||||
|
WHERE 1=1
|
||||||
|
"), array("tag"=>$tag_querylets[0]->tag));
|
||||||
|
}
|
||||||
|
|
||||||
|
// more than one positive tag, or more than zero negative tags
|
||||||
|
else {
|
||||||
|
if($database->get_driver_name() === "mysql")
|
||||||
|
$query = Image::build_ugly_search_querylet(
|
||||||
|
$tag_querylets,
|
||||||
|
$positive_tag_count
|
||||||
|
);
|
||||||
|
else
|
||||||
|
$query = Image::build_accurate_search_querylet(
|
||||||
|
$tag_querylets,
|
||||||
|
$positive_tag_count
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Merge all the image metadata searches into one generic querylet
|
||||||
|
* and append to the base querylet with "AND blah"
|
||||||
|
*/
|
||||||
|
if($img_querylets) {
|
||||||
|
$n = 0;
|
||||||
|
$img_sql = "";
|
||||||
|
$img_vars = array();
|
||||||
|
foreach ($img_querylets as $iq) {
|
||||||
|
if ($n++ > 0) $img_sql .= " AND";
|
||||||
|
if (!$iq->positive) $img_sql .= " NOT";
|
||||||
|
$img_sql .= " (" . $iq->qlet->sql . ")";
|
||||||
|
$img_vars = array_merge($img_vars, $iq->qlet->variables);
|
||||||
|
}
|
||||||
|
$query->append_sql(" AND ");
|
||||||
|
$query->append(new Querylet($img_sql, $img_vars));
|
||||||
|
}
|
||||||
|
|
||||||
|
return $query;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -817,74 +896,18 @@ class Image {
|
|||||||
* All the subqueries are executed every time for every row in the
|
* All the subqueries are executed every time for every row in the
|
||||||
* images table. Yes, MySQL does suck this much.
|
* images table. Yes, MySQL does suck this much.
|
||||||
*
|
*
|
||||||
* @param string[] $terms
|
* @param array $tag_querylets
|
||||||
* @return \Querylet
|
* @param int $positive_tag_count
|
||||||
|
* @return Querylet
|
||||||
*/
|
*/
|
||||||
private static function build_accurate_search_querylet($terms) {
|
private static function build_accurate_search_querylet(
|
||||||
|
$tag_querylets,
|
||||||
|
$positive_tag_count
|
||||||
|
) {
|
||||||
global $database;
|
global $database;
|
||||||
|
|
||||||
$tag_querylets = array();
|
|
||||||
$img_querylets = self::parse_meta_terms($terms);
|
|
||||||
$positive_tag_count = 0;
|
|
||||||
|
|
||||||
// parse the words that are searched for into
|
|
||||||
// various types of querylet
|
|
||||||
$terms = Tag::resolve_aliases($terms);
|
|
||||||
foreach($terms as $term) {
|
|
||||||
$positive = true;
|
|
||||||
if(is_string($term) && !empty($term) && ($term[0] == '-')) {
|
|
||||||
$positive = false;
|
|
||||||
$term = substr($term, 1);
|
|
||||||
}
|
|
||||||
if(strlen($term) === 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$stpe = new SearchTermParseEvent($term, $terms);
|
|
||||||
send_event($stpe);
|
|
||||||
if($stpe->is_querylet_set()) {
|
|
||||||
foreach($stpe->get_querylets() as $querylet) {
|
|
||||||
$img_querylets[] = new ImgQuerylet($querylet, $positive);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$expansions = Tag::resolve_wildcard($term);
|
|
||||||
if($expansions && $positive) $positive_tag_count++;
|
|
||||||
foreach($expansions as $expanded_term) {
|
|
||||||
$tag_querylets[] = new TagQuerylet($expanded_term, $positive);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$img_search = self::build_img_search($img_querylets);
|
|
||||||
|
|
||||||
// How many tag querylets are there?
|
|
||||||
$count_tag_querylets = count($tag_querylets);
|
|
||||||
|
|
||||||
// no tags, do a simple search (+image metadata if we have any)
|
|
||||||
if($count_tag_querylets === 0) {
|
|
||||||
$query = new Querylet("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
WHERE 1=1
|
|
||||||
");
|
|
||||||
}
|
|
||||||
|
|
||||||
// one positive tag (a common case), do an optimised search
|
|
||||||
else if($count_tag_querylets === 1 && $tag_querylets[0]->positive) {
|
|
||||||
$query = new Querylet($database->scoreql_to_sql("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
JOIN image_tags ON images.id=image_tags.image_id
|
|
||||||
JOIN tags ON image_tags.tag_id=tags.id
|
|
||||||
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
|
|
||||||
"), array("tag"=>$tag_querylets[0]->tag));
|
|
||||||
}
|
|
||||||
|
|
||||||
// more than one positive tag, or more than zero negative tags
|
|
||||||
else {
|
|
||||||
$positive_tag_id_array = array();
|
$positive_tag_id_array = array();
|
||||||
$negative_tag_id_array = array();
|
$negative_tag_id_array = array();
|
||||||
$tags_ok = true;
|
|
||||||
|
|
||||||
foreach ($tag_querylets as $tq) {
|
foreach ($tag_querylets as $tq) {
|
||||||
$tag_ids = $database->get_col(
|
$tag_ids = $database->get_col(
|
||||||
@ -892,18 +915,25 @@ class Image {
|
|||||||
SELECT id
|
SELECT id
|
||||||
FROM tags
|
FROM tags
|
||||||
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
|
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
|
||||||
"), array("tag" => $tq->tag)
|
"),
|
||||||
|
array("tag" => $tq->tag)
|
||||||
);
|
);
|
||||||
if ($tq->positive) {
|
if ($tq->positive) {
|
||||||
$positive_tag_id_array = array_merge($positive_tag_id_array, $tag_ids);
|
$positive_tag_id_array = array_merge($positive_tag_id_array, $tag_ids);
|
||||||
$tags_ok = count($tag_ids) > 0;
|
if (count($tag_ids) == 0) {
|
||||||
if (!$tags_ok) break;
|
# one of the positive tags had zero results, therefor there
|
||||||
|
# can be no results; "where 1=0" should shortcut things
|
||||||
|
return new Querylet("
|
||||||
|
SELECT images.*
|
||||||
|
FROM images
|
||||||
|
WHERE 1=0
|
||||||
|
");
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
$negative_tag_id_array = array_merge($negative_tag_id_array, $tag_ids);
|
$negative_tag_id_array = array_merge($negative_tag_id_array, $tag_ids);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($tags_ok) {
|
|
||||||
$have_pos = count($positive_tag_id_array) > 0;
|
$have_pos = count($positive_tag_id_array) > 0;
|
||||||
$have_neg = count($negative_tag_id_array) > 0;
|
$have_neg = count($negative_tag_id_array) > 0;
|
||||||
|
|
||||||
@ -929,192 +959,93 @@ class Image {
|
|||||||
WHERE tag_id IN ($negative_tag_id_list)
|
WHERE tag_id IN ($negative_tag_id_list)
|
||||||
";
|
";
|
||||||
}
|
}
|
||||||
$query = new Querylet("
|
return new Querylet("
|
||||||
SELECT images.*
|
SELECT images.*
|
||||||
FROM images
|
FROM images
|
||||||
WHERE images.id IN ($sql)
|
WHERE images.id IN ($sql)
|
||||||
");
|
");
|
||||||
} else {
|
|
||||||
# one of the positive tags had zero results, therefor there
|
|
||||||
# can be no results; "where 1=0" should shortcut things
|
|
||||||
$query = new Querylet("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
WHERE 1=0
|
|
||||||
");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($img_search->sql)) {
|
|
||||||
$query->append_sql(" AND ");
|
|
||||||
$query->append($img_search);
|
|
||||||
return $query;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $query;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* this function exists because mysql is a turd, see the docs for
|
* this function exists because mysql is a turd, see the docs for
|
||||||
* build_accurate_search_querylet() for a full explanation
|
* build_accurate_search_querylet() for a full explanation
|
||||||
*
|
*
|
||||||
* @param array $terms
|
* @param array $tag_querylets
|
||||||
|
* @param int $positive_tag_count
|
||||||
* @return Querylet
|
* @return Querylet
|
||||||
*/
|
*/
|
||||||
private static function build_ugly_search_querylet($terms) {
|
private static function build_ugly_search_querylet(
|
||||||
|
$tag_querylets,
|
||||||
|
$positive_tag_count
|
||||||
|
) {
|
||||||
global $database;
|
global $database;
|
||||||
|
|
||||||
$tag_querylets = array();
|
|
||||||
$img_querylets = self::parse_meta_terms($terms);
|
|
||||||
$positive_tag_count = 0;
|
|
||||||
$negative_tag_count = 0;
|
|
||||||
$wildcard_count = 0;
|
|
||||||
|
|
||||||
$terms = Tag::resolve_aliases($terms);
|
|
||||||
|
|
||||||
reset($terms); // rewind to first element in array.
|
|
||||||
|
|
||||||
// turn each term into a specific type of querylet
|
|
||||||
foreach($terms as $term) {
|
|
||||||
$negative = false;
|
|
||||||
if( !empty($term) && ($term[0] == '-')) {
|
|
||||||
$negative = true;
|
|
||||||
$term = substr($term, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
$stpe = new SearchTermParseEvent($term, $terms);
|
|
||||||
send_event($stpe);
|
|
||||||
if($stpe->is_querylet_set()) {
|
|
||||||
foreach($stpe->get_querylets() as $querylet) {
|
|
||||||
$img_querylets[] = new ImgQuerylet($querylet, !$negative);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$term = str_replace("*", "%", $term);
|
|
||||||
$term = str_replace("?", "_", $term);
|
|
||||||
if(!preg_match("/^[%_]+$/", $term)) {
|
|
||||||
$tag_querylets[] = new TagQuerylet($term, !$negative);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(strpos($term, '%') !== FALSE) $wildcard_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
// merge all the tag querylets into one generic one
|
// merge all the tag querylets into one generic one
|
||||||
$sql = "0";
|
$sql = "0";
|
||||||
$terms = array();
|
$terms = array();
|
||||||
foreach($tag_querylets as $tq) {
|
foreach($tag_querylets as $tq) {
|
||||||
$sign = $tq->positive ? "+" : "-";
|
$sign = $tq->positive ? "+" : "-";
|
||||||
|
|
||||||
if(!$wildcard_count) {
|
|
||||||
$sql .= ' '.$sign.' (tag LIKE :tag'.Image::$tag_n.')';
|
|
||||||
} else {
|
|
||||||
$sql .= ' '.$sign.' IF(SUM(tag LIKE :tag'.Image::$tag_n.'), 1, 0)';
|
$sql .= ' '.$sign.' IF(SUM(tag LIKE :tag'.Image::$tag_n.'), 1, 0)';
|
||||||
}
|
|
||||||
|
|
||||||
$terms['tag'.Image::$tag_n] = $tq->tag;
|
$terms['tag'.Image::$tag_n] = $tq->tag;
|
||||||
Image::$tag_n++;
|
Image::$tag_n++;
|
||||||
|
|
||||||
if($sign === "+") $positive_tag_count++;
|
|
||||||
else $negative_tag_count++;
|
|
||||||
}
|
}
|
||||||
$tag_search = new Querylet($sql, $terms);
|
$tag_search = new Querylet($sql, $terms);
|
||||||
$img_search = self::build_img_search($img_querylets);
|
|
||||||
|
|
||||||
// no tags, do a simple search (+image metadata if we have any)
|
// only negative tags - shortcut to fail
|
||||||
if($positive_tag_count + $negative_tag_count == 0) {
|
if($positive_tag_count == 0) {
|
||||||
$query = new Querylet("
|
// TODO: This isn't currently implemented.
|
||||||
|
// SEE: https://github.com/shish/shimmie2/issues/66
|
||||||
|
return new Querylet("
|
||||||
SELECT images.*
|
SELECT images.*
|
||||||
FROM images
|
FROM images
|
||||||
WHERE 1=1
|
WHERE 1=0
|
||||||
");
|
");
|
||||||
}
|
}
|
||||||
|
|
||||||
// one positive tag (a common case), do an optimised search
|
|
||||||
else if($positive_tag_count === 1 && $negative_tag_count === 0) {
|
|
||||||
// MySQL is braindead, and does a full table scan on images, running the subquery once for each row -_-
|
|
||||||
// "{$this->get_images} WHERE images.id IN (SELECT image_id FROM tags WHERE tag LIKE ?) ",
|
|
||||||
$group_by = (!$wildcard_count ? "" : "GROUP BY images.id");
|
|
||||||
$query = new Querylet("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
JOIN image_tags ON images.id=image_tags.image_id
|
|
||||||
JOIN tags ON image_tags.tag_id=tags.id
|
|
||||||
WHERE tag LIKE :tag0
|
|
||||||
{$group_by}
|
|
||||||
", $tag_search->variables);
|
|
||||||
}
|
|
||||||
|
|
||||||
// more than one positive tag, and zero or more negative tags
|
|
||||||
else if($positive_tag_count >= 1) {
|
|
||||||
$tag_id_array = array();
|
$tag_id_array = array();
|
||||||
$tags_ok = true;
|
|
||||||
|
|
||||||
$x = 0;
|
$x = 0;
|
||||||
foreach($tag_search->variables as $tag) {
|
foreach($tag_search->variables as $tag) {
|
||||||
$tag_ids = $database->get_col(
|
$tag_ids = $database->get_col(
|
||||||
"SELECT id FROM tags WHERE tag LIKE :tag",
|
$database->scoreql_to_sql("
|
||||||
|
SELECT id
|
||||||
|
FROM tags
|
||||||
|
WHERE SCORE_STRNORM(tag) = SCORE_STRNORM(:tag)
|
||||||
|
"),
|
||||||
array("tag" => $tag)
|
array("tag" => $tag)
|
||||||
);
|
);
|
||||||
$tag_id_array = array_merge($tag_id_array, $tag_ids);
|
$tag_id_array = array_merge($tag_id_array, $tag_ids);
|
||||||
|
|
||||||
$tags_ok = count($tag_ids) > 0 || !$tag_querylets[$x]->positive;
|
if($tag_querylets[$x]->positive && count($tag_ids) == 0) {
|
||||||
if(!$tags_ok) break;
|
# one of the positive tags had zero results, therefor there
|
||||||
|
# can be no results; "where 1=0" should shortcut things
|
||||||
|
return new Querylet("
|
||||||
|
SELECT images.*
|
||||||
|
FROM images
|
||||||
|
WHERE 1=0
|
||||||
|
");
|
||||||
|
}
|
||||||
|
|
||||||
$x++;
|
$x++;
|
||||||
}
|
}
|
||||||
|
|
||||||
if($tags_ok) {
|
Image::$tag_n = 0;
|
||||||
$tag_id_list = join(', ', $tag_id_array);
|
return new Querylet('
|
||||||
|
SELECT *
|
||||||
$sum = (!$wildcard_count ? "SUM" : "");
|
FROM (
|
||||||
$subquery = new Querylet('
|
SELECT images.*, SUM('.$tag_search->sql.') AS score
|
||||||
SELECT images.*, '.$sum.'('.$tag_search->sql.') AS score
|
|
||||||
FROM images
|
FROM images
|
||||||
LEFT JOIN image_tags ON image_tags.image_id = images.id
|
LEFT JOIN image_tags ON image_tags.image_id = images.id
|
||||||
JOIN tags ON image_tags.tag_id = tags.id
|
JOIN tags ON image_tags.tag_id = tags.id
|
||||||
WHERE tags.id IN ('.$tag_id_list.')
|
WHERE tags.id IN (' . join(', ', $tag_id_array) . ')
|
||||||
GROUP BY images.id
|
GROUP BY images.id
|
||||||
HAVING score = :score',
|
HAVING score = :score
|
||||||
array_merge(
|
) AS images
|
||||||
|
WHERE 1=1
|
||||||
|
', array_merge(
|
||||||
$tag_search->variables,
|
$tag_search->variables,
|
||||||
array("score"=>$positive_tag_count)
|
array("score"=>$positive_tag_count)
|
||||||
)
|
));
|
||||||
);
|
|
||||||
$query = new Querylet('
|
|
||||||
SELECT *
|
|
||||||
FROM ('.$subquery->sql.') AS images
|
|
||||||
WHERE 1=1
|
|
||||||
', $subquery->variables);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
# there are no results, "where 1=0" should shortcut things
|
|
||||||
$query = new Querylet("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
WHERE 1=0
|
|
||||||
");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//zero positive tags and one or more negative tags
|
|
||||||
//TODO: This isn't currently implemented. SEE: https://github.com/shish/shimmie2/issues/66
|
|
||||||
else {
|
|
||||||
$query = new Querylet("
|
|
||||||
SELECT images.*
|
|
||||||
FROM images
|
|
||||||
WHERE 1=0
|
|
||||||
");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!empty($img_search->sql)) {
|
|
||||||
$query->append_sql(" AND ");
|
|
||||||
$query->append($img_search);
|
|
||||||
return $query;
|
|
||||||
}
|
|
||||||
|
|
||||||
Image::$tag_n = 0;
|
|
||||||
return $query;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user